From c35a1621b23c49680085ee44016f023c82bba706 Mon Sep 17 00:00:00 2001 From: lphatnguyen Date: Thu, 14 Aug 2025 08:35:10 +0000 Subject: [PATCH] fix mulitple --- easydistill/mmkd/dev-vqa/gen_vqa_bank.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/easydistill/mmkd/dev-vqa/gen_vqa_bank.py b/easydistill/mmkd/dev-vqa/gen_vqa_bank.py index 536fbf9..d8c272f 100644 --- a/easydistill/mmkd/dev-vqa/gen_vqa_bank.py +++ b/easydistill/mmkd/dev-vqa/gen_vqa_bank.py @@ -191,7 +191,7 @@ def generate_vqa_conversations( "role": "user", # The content is the list of image dicts, followed by the text dict "content": image_content_list - + [{"type": "text", "text": "" + question_text}], + + [{"type": "text", "text": "" * len(found_image_paths) + question_text}], } assistant_message = {"role": "assistant", "content": answer_text} @@ -276,7 +276,7 @@ def generate_multiturn_conversations( first_question = random.choice(question_bank[main_field][language]) conversation.append({ "role": "user", - "content": image_content_list + [{"type": "text", "text": "" + first_question}], + "content": image_content_list + [{"type": "text", "text": "" * len(found_image_paths) + first_question}], }) # 3. First Assistant Turn @@ -363,7 +363,7 @@ def generate_vq_question( user_message = { "role": "user", "content": image_content_list - + [{"type": "text", "text": "" + question_text}], + + [{"type": "text", "text": "" * len(image_paths) + question_text}], } conversation = [system_message, user_message] final_conversations.append(conversation) @@ -430,7 +430,7 @@ def generate_multiturn_vq_question( first_question = random.choice(question_bank[main_field][language]) conversation.append({ "role": "user", - "content": image_content_list + [{"type": "text", "text": "" + first_question}], + "content": image_content_list + [{"type": "text", "text": "" * len(image_paths) + first_question}], }) # 3. Follow-up User Turns (text only)