From 814fbfee03d1183d27e638bd16a7347b6d8d94be Mon Sep 17 00:00:00 2001 From: lphatnguyen Date: Thu, 7 Aug 2025 15:45:55 +0000 Subject: [PATCH] modify gen_vqa_bank --- easydistill/mmkd/dev-vqa/gen_vqa_bank.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/easydistill/mmkd/dev-vqa/gen_vqa_bank.py b/easydistill/mmkd/dev-vqa/gen_vqa_bank.py index 60fc483..1d02f34 100644 --- a/easydistill/mmkd/dev-vqa/gen_vqa_bank.py +++ b/easydistill/mmkd/dev-vqa/gen_vqa_bank.py @@ -7,7 +7,7 @@ import re def load_json(filepath): """ - Loads a JSON file with robust error handling. + Loads a JSON file . """ try: with open(filepath, 'r', encoding='utf-8') as f: @@ -21,7 +21,7 @@ def load_json(filepath): def read_text_file(filepath): """ - Loads a simple text file. + Loads a prompt from a text file. """ try: with open(filepath, 'r', encoding='utf-8') as f: @@ -32,7 +32,7 @@ def read_text_file(filepath): def format_items_list(items, language): """ - Formats a list of item dictionaries into a human-readable string. + Formats a list of item dictionaries (services) into a human-readable string. """ if not items: return "" @@ -92,7 +92,7 @@ def get_conversational_answer(field, label_data, answer_bank, language): return str(value) if value is not None else "" # --- Conversations Generation for Label Data --- -def generate_field_level_conversations(labels_path, image_root, system_prompt_path, questions_path, answers_path, output_path): +def generate_vqa_conversations(labels_path, image_root, system_prompt_path, questions_path, answers_path, output_path): """ Generates multiple conversational VQA pairs for each field in a label file, and handles multi-page documents. @@ -173,10 +173,9 @@ def generate_field_level_conversations(labels_path, image_root, system_prompt_pa print(f"Formatted data saved to: {output_path}") # --- Conversations Generation for only Images --- -def generate_image_only_conversations(image_root, system_prompt_path, questions_path, output_path): +def generate_vq_question(image_root, system_prompt_path, questions_path, output_path): """ Generates conversational VQA pairs for each document based on images only (no labels). - Groups all images with the same prefix (including _1_scale, _2_scale, etc.) into the same conversation. Each conversation contains a system and user message for each question in the question bank. """ system_prompt = read_text_file(system_prompt_path) @@ -234,4 +233,4 @@ if __name__ == "__main__": # Run the main generation function # generate_field_level_conversations(LABELS_FILE, IMAGE_ROOT, SYSTEM_PROMPT_FILE, QUESTION_BANK_FILE, ANSWER_BANK_FILE, OUTPUT_FILE) - generate_image_only_conversations(IMAGE_ROOT, SYSTEM_PROMPT_FILE, QUESTION_BANK_FILE, OUTPUT_FILE) \ No newline at end of file + generate_vq_question(IMAGE_ROOT, SYSTEM_PROMPT_FILE, QUESTION_BANK_FILE, OUTPUT_FILE) \ No newline at end of file