From c35a1621b23c49680085ee44016f023c82bba706 Mon Sep 17 00:00:00 2001
From: lphatnguyen <luong-phat.nguyen@rizlum.ai>
Date: Thu, 14 Aug 2025 08:35:10 +0000
Subject: [PATCH] fix mulitple <image>

---
 easydistill/mmkd/dev-vqa/gen_vqa_bank.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/easydistill/mmkd/dev-vqa/gen_vqa_bank.py b/easydistill/mmkd/dev-vqa/gen_vqa_bank.py
index 536fbf9..d8c272f 100644
--- a/easydistill/mmkd/dev-vqa/gen_vqa_bank.py
+++ b/easydistill/mmkd/dev-vqa/gen_vqa_bank.py
@@ -191,7 +191,7 @@ def generate_vqa_conversations(
                 "role": "user",
                 # The content is the list of image dicts, followed by the text dict
                 "content": image_content_list
-                + [{"type": "text", "text": "<image>" + question_text}],
+                + [{"type": "text", "text": "<image>" * len(found_image_paths) + question_text}],
             }
 
             assistant_message = {"role": "assistant", "content": answer_text}
@@ -276,7 +276,7 @@ def generate_multiturn_conversations(
             first_question = random.choice(question_bank[main_field][language])
             conversation.append({
                 "role": "user",
-                "content": image_content_list + [{"type": "text", "text": "<image>" + first_question}],
+                "content": image_content_list + [{"type": "text", "text": "<image>" * len(found_image_paths) + first_question}],
             })
 
             # 3. First Assistant Turn
@@ -363,7 +363,7 @@ def generate_vq_question(
             user_message = {
                 "role": "user",
                 "content": image_content_list
-                + [{"type": "text", "text": "<image>" + question_text}],
+                + [{"type": "text", "text": "<image>" * len(image_paths) + question_text}],
             }
             conversation = [system_message, user_message]
             final_conversations.append(conversation)
@@ -430,7 +430,7 @@ def generate_multiturn_vq_question(
             first_question = random.choice(question_bank[main_field][language])
             conversation.append({
                 "role": "user",
-                "content": image_content_list + [{"type": "text", "text": "<image>" + first_question}],
+                "content": image_content_list + [{"type": "text", "text": "<image>" * len(image_paths) + first_question}],
             })
 
             # 3. Follow-up User Turns (text only)