modify gen_vqa_bank

This commit is contained in:
2025-08-07 15:45:55 +00:00
parent 0637599c3a
commit bbefb444a9

View File

@@ -7,7 +7,7 @@ import re
def load_json(filepath):
"""
Loads a JSON file with robust error handling.
Loads a JSON file .
"""
try:
with open(filepath, 'r', encoding='utf-8') as f:
@@ -21,7 +21,7 @@ def load_json(filepath):
def read_text_file(filepath):
"""
Loads a simple text file.
Loads a prompt from a text file.
"""
try:
with open(filepath, 'r', encoding='utf-8') as f:
@@ -32,7 +32,7 @@ def read_text_file(filepath):
def format_items_list(items, language):
"""
Formats a list of item dictionaries into a human-readable string.
Formats a list of item dictionaries (services) into a human-readable string.
"""
if not items:
return ""
@@ -92,7 +92,7 @@ def get_conversational_answer(field, label_data, answer_bank, language):
return str(value) if value is not None else ""
# --- Conversations Generation for Label Data ---
def generate_field_level_conversations(labels_path, image_root, system_prompt_path, questions_path, answers_path, output_path):
def generate_vqa_conversations(labels_path, image_root, system_prompt_path, questions_path, answers_path, output_path):
"""
Generates multiple conversational VQA pairs for each field in a label file,
and handles multi-page documents.
@@ -173,10 +173,9 @@ def generate_field_level_conversations(labels_path, image_root, system_prompt_pa
print(f"Formatted data saved to: {output_path}")
# --- Conversations Generation for only Images ---
def generate_image_only_conversations(image_root, system_prompt_path, questions_path, output_path):
def generate_vq_question(image_root, system_prompt_path, questions_path, output_path):
"""
Generates conversational VQA pairs for each document based on images only (no labels).
Groups all images with the same prefix (including _1_scale, _2_scale, etc.) into the same conversation.
Each conversation contains a system and user message for each question in the question bank.
"""
system_prompt = read_text_file(system_prompt_path)
@@ -234,4 +233,4 @@ if __name__ == "__main__":
# Run the main generation function
# generate_field_level_conversations(LABELS_FILE, IMAGE_ROOT, SYSTEM_PROMPT_FILE, QUESTION_BANK_FILE, ANSWER_BANK_FILE, OUTPUT_FILE)
generate_image_only_conversations(IMAGE_ROOT, SYSTEM_PROMPT_FILE, QUESTION_BANK_FILE, OUTPUT_FILE)
generate_vq_question(IMAGE_ROOT, SYSTEM_PROMPT_FILE, QUESTION_BANK_FILE, OUTPUT_FILE)