OpenDCAI · haolpku · Apr 13, 2026 · Apr 11, 2026
diff --git a/.gitignore b/.gitignore
@@ -13,6 +13,7 @@ requirement_added.txt
 mineru.json
 
 test/example
+test_pipeline/
 reasoning_chains
 cache*
 ckpt*

diff --git a/dataflow/statics/pipelines/api_pipelines/image_vqa.py b/dataflow/statics/pipelines/api_pipelines/image_vqa.py
@@ -20,15 +20,15 @@ def __init__(self, llm_serving: LLMServingABC = None):
         self.storage = FileStorage(
             first_entry_file_name="./example_data/image_vqa/sample_data.json",
             cache_path="./cache_local",
-            file_name_prefix="qa",
+            file_name_prefix="qa_api",
             cache_type="json",
         )
 
         # ---------- 2. Serving ----------
         self.vlm_serving = APIVLMServing_openai(
-            api_url="http://172.96.141.132:3001/v1", # Any API platform compatible with OpenAI format
+            api_url="https://dashscope.aliyuncs.com/compatible-mode/v1", # Any API platform compatible with OpenAI format
             key_name_of_api_key="DF_API_KEY", # Set the API key for the corresponding platform in the environment variable or line 4
-            model_name="gpt-5-nano-2025-08-07",
+            model_name="qwen3-vl-8b-instruct",
             image_io=None,
             send_request_stream=False,
             max_workers=10,
@@ -44,13 +44,23 @@ def __init__(self, llm_serving: LLMServingABC = None):
     # ------------------------------------------------------------------ #
     def forward(self):
         input_image_key = "image"
-        output_answer_key = "vqa"
+        output_step1_key = "question"
+        output_step2_key = "answer"
 
+        # Step 1: Generate the question for the image
         self.vqa_generator.run(
             storage=self.storage.step(),
             input_conversation_key="conversation",
             input_image_key=input_image_key,
-            output_answer_key=output_answer_key,
+            output_answer_key=output_step1_key,
+        )
+
+        # Step 2: Generate the answer for the question
+        self.vqa_generator.run(
+            storage=self.storage.step(),
+            input_prompt_key=output_step1_key,
+            input_image_key=input_image_key,
+            output_answer_key=output_step2_key,
         )
 
 # ---------------------------- CLI 入口 -------------------------------- #

diff --git a/dataflow/statics/pipelines/gpu_pipelines/image2qa.py b/dataflow/statics/pipelines/gpu_pipelines/image2qa.py
@@ -36,16 +36,26 @@ def __init__(self, llm_serving: LLMServingABC = None):
             system_prompt="You are a image question-answer generator. Your task is to generate a question-answer pair for the given image content.",
         )
 
-        self.media_key = "image"
-        self.output_key = "qa"
-
     # ------------------------- Pipeline 单步 ------------------------- #
     def forward(self):
+        input_image_key = "image"
+        output_step1_key = "question"
+        output_step2_key = "answer"
+
+        # Step 1: Generate the question for the image
         self.qa_generator.run(
             storage=self.storage.step(),
             input_conversation_key="conversation",
-            input_image_key=self.media_key,
-            output_answer_key=self.output_key,
+            input_image_key=input_image_key,
+            output_answer_key=output_step1_key,
+        )
+
+        # Step 2: Generate the answer for the question
+        self.qa_generator.run(
+            storage=self.storage.step(),
+            input_prompt_key=output_step1_key,
+            input_image_key=input_image_key,
+            output_answer_key=output_step2_key,
         )
 
 # ------------------------------ CLI ------------------------------ #