modify gen_vqa_bank

2025-08-07 15:45:55 +00:00
parent 0637599c3a
commit bbefb444a9
1 changed files with 6 additions and 7 deletions
--- a/easydistill/mmkd/dev-vqa/gen_vqa_bank.py
+++ b/easydistill/mmkd/dev-vqa/gen_vqa_bank.py
@@ -7,7 +7,7 @@ import re

 def load_json(filepath):
    """
-    Loads a JSON file with robust error handling.
+    Loads a JSON file .
    """
    try:
        with open(filepath, 'r', encoding='utf-8') as f:
@@ -21,7 +21,7 @@ def load_json(filepath):

 def read_text_file(filepath):
    """
-    Loads a simple text file.
+    Loads a prompt from a text file.
    """
    try:
        with open(filepath, 'r', encoding='utf-8') as f:
@@ -32,7 +32,7 @@ def read_text_file(filepath):

 def format_items_list(items, language):
    """
-    Formats a list of item dictionaries into a human-readable string.
+    Formats a list of item dictionaries (services) into a human-readable string.
    """
    if not items:
        return ""
@@ -92,7 +92,7 @@ def get_conversational_answer(field, label_data, answer_bank, language):
    return str(value) if value is not None else ""

 # --- Conversations Generation for Label Data ---
-def generate_field_level_conversations(labels_path, image_root, system_prompt_path, questions_path, answers_path, output_path):
+def generate_vqa_conversations(labels_path, image_root, system_prompt_path, questions_path, answers_path, output_path):
    """
    Generates multiple conversational VQA pairs for each field in a label file,
    and handles multi-page documents.
@@ -173,10 +173,9 @@ def generate_field_level_conversations(labels_path, image_root, system_prompt_pa
    print(f"Formatted data saved to: {output_path}")

 # --- Conversations Generation for only Images ---
-def generate_image_only_conversations(image_root, system_prompt_path, questions_path, output_path):
+def generate_vq_question(image_root, system_prompt_path, questions_path, output_path):
    """
    Generates conversational VQA pairs for each document based on images only (no labels).
-    Groups all images with the same prefix (including _1_scale, _2_scale, etc.) into the same conversation.
    Each conversation contains a system and user message for each question in the question bank.
    """
    system_prompt = read_text_file(system_prompt_path)
@@ -234,4 +233,4 @@ if __name__ == "__main__":
    
    # Run the main generation function
    # generate_field_level_conversations(LABELS_FILE, IMAGE_ROOT, SYSTEM_PROMPT_FILE, QUESTION_BANK_FILE, ANSWER_BANK_FILE, OUTPUT_FILE)
-    generate_image_only_conversations(IMAGE_ROOT, SYSTEM_PROMPT_FILE, QUESTION_BANK_FILE, OUTPUT_FILE)
+    generate_vq_question(IMAGE_ROOT, SYSTEM_PROMPT_FILE, QUESTION_BANK_FILE, OUTPUT_FILE)