modify gen_vqa_bank
This commit is contained in:
@@ -7,7 +7,7 @@ import re
|
||||
|
||||
def load_json(filepath):
|
||||
"""
|
||||
Loads a JSON file with robust error handling.
|
||||
Loads a JSON file .
|
||||
"""
|
||||
try:
|
||||
with open(filepath, 'r', encoding='utf-8') as f:
|
||||
@@ -21,7 +21,7 @@ def load_json(filepath):
|
||||
|
||||
def read_text_file(filepath):
|
||||
"""
|
||||
Loads a simple text file.
|
||||
Loads a prompt from a text file.
|
||||
"""
|
||||
try:
|
||||
with open(filepath, 'r', encoding='utf-8') as f:
|
||||
@@ -32,7 +32,7 @@ def read_text_file(filepath):
|
||||
|
||||
def format_items_list(items, language):
|
||||
"""
|
||||
Formats a list of item dictionaries into a human-readable string.
|
||||
Formats a list of item dictionaries (services) into a human-readable string.
|
||||
"""
|
||||
if not items:
|
||||
return ""
|
||||
@@ -92,7 +92,7 @@ def get_conversational_answer(field, label_data, answer_bank, language):
|
||||
return str(value) if value is not None else ""
|
||||
|
||||
# --- Conversations Generation for Label Data ---
|
||||
def generate_field_level_conversations(labels_path, image_root, system_prompt_path, questions_path, answers_path, output_path):
|
||||
def generate_vqa_conversations(labels_path, image_root, system_prompt_path, questions_path, answers_path, output_path):
|
||||
"""
|
||||
Generates multiple conversational VQA pairs for each field in a label file,
|
||||
and handles multi-page documents.
|
||||
@@ -173,10 +173,9 @@ def generate_field_level_conversations(labels_path, image_root, system_prompt_pa
|
||||
print(f"Formatted data saved to: {output_path}")
|
||||
|
||||
# --- Conversations Generation for only Images ---
|
||||
def generate_image_only_conversations(image_root, system_prompt_path, questions_path, output_path):
|
||||
def generate_vq_question(image_root, system_prompt_path, questions_path, output_path):
|
||||
"""
|
||||
Generates conversational VQA pairs for each document based on images only (no labels).
|
||||
Groups all images with the same prefix (including _1_scale, _2_scale, etc.) into the same conversation.
|
||||
Each conversation contains a system and user message for each question in the question bank.
|
||||
"""
|
||||
system_prompt = read_text_file(system_prompt_path)
|
||||
@@ -234,4 +233,4 @@ if __name__ == "__main__":
|
||||
|
||||
# Run the main generation function
|
||||
# generate_field_level_conversations(LABELS_FILE, IMAGE_ROOT, SYSTEM_PROMPT_FILE, QUESTION_BANK_FILE, ANSWER_BANK_FILE, OUTPUT_FILE)
|
||||
generate_image_only_conversations(IMAGE_ROOT, SYSTEM_PROMPT_FILE, QUESTION_BANK_FILE, OUTPUT_FILE)
|
||||
generate_vq_question(IMAGE_ROOT, SYSTEM_PROMPT_FILE, QUESTION_BANK_FILE, OUTPUT_FILE)
|
Reference in New Issue
Block a user