Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ requirement_added.txt
mineru.json

test/example
test_pipeline/
reasoning_chains
cache*
ckpt*
Expand Down
20 changes: 15 additions & 5 deletions dataflow/statics/pipelines/api_pipelines/image_vqa.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,15 +20,15 @@ def __init__(self, llm_serving: LLMServingABC = None):
self.storage = FileStorage(
first_entry_file_name="./example_data/image_vqa/sample_data.json",
cache_path="./cache_local",
file_name_prefix="qa",
file_name_prefix="qa_api",
cache_type="json",
)

# ---------- 2. Serving ----------
self.vlm_serving = APIVLMServing_openai(
api_url="http://172.96.141.132:3001/v1", # Any API platform compatible with OpenAI format
api_url="https://dashscope.aliyuncs.com/compatible-mode/v1", # Any API platform compatible with OpenAI format
key_name_of_api_key="DF_API_KEY", # Set the API key for the corresponding platform in the environment variable or line 4
model_name="gpt-5-nano-2025-08-07",
model_name="qwen3-vl-8b-instruct",
image_io=None,
send_request_stream=False,
max_workers=10,
Expand All @@ -44,13 +44,23 @@ def __init__(self, llm_serving: LLMServingABC = None):
# ------------------------------------------------------------------ #
def forward(self):
input_image_key = "image"
output_answer_key = "vqa"
output_step1_key = "question"
output_step2_key = "answer"

# Step 1: Generate the question for the image
self.vqa_generator.run(
storage=self.storage.step(),
input_conversation_key="conversation",
input_image_key=input_image_key,
output_answer_key=output_answer_key,
output_answer_key=output_step1_key,
)

# Step 2: Generate the answer for the question
self.vqa_generator.run(
storage=self.storage.step(),
input_prompt_key=output_step1_key,
input_image_key=input_image_key,
output_answer_key=output_step2_key,
)

# ---------------------------- CLI 入口 -------------------------------- #
Expand Down
20 changes: 15 additions & 5 deletions dataflow/statics/pipelines/gpu_pipelines/image2qa.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,16 +36,26 @@ def __init__(self, llm_serving: LLMServingABC = None):
system_prompt="You are a image question-answer generator. Your task is to generate a question-answer pair for the given image content.",
)

self.media_key = "image"
self.output_key = "qa"

# ------------------------- Pipeline 单步 ------------------------- #
def forward(self):
input_image_key = "image"
output_step1_key = "question"
output_step2_key = "answer"

# Step 1: Generate the question for the image
self.qa_generator.run(
storage=self.storage.step(),
input_conversation_key="conversation",
input_image_key=self.media_key,
output_answer_key=self.output_key,
input_image_key=input_image_key,
output_answer_key=output_step1_key,
)

# Step 2: Generate the answer for the question
self.qa_generator.run(
storage=self.storage.step(),
input_prompt_key=output_step1_key,
input_image_key=input_image_key,
output_answer_key=output_step2_key,
)

# ------------------------------ CLI ------------------------------ #
Expand Down
Loading