add argument

This commit is contained in:
2025-08-12 14:32:40 +00:00
committed by lphatnguyen
parent da0cae0b87
commit 96fa4efa49

View File

@@ -4,6 +4,7 @@ import random
from pathlib import Path
import glob
import re
import argparse
def load_json(filepath):
@@ -276,26 +277,36 @@ def generate_vq_question(
# --- Main Execution Block ---
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Generate VQA conversations from label data.")
parser.add_argument("--image_root", type=str, default="/home/nguyendc/docai_dataset/factures/distill_data/docai_mgp_facture_v2_0", help="Root directory containing images.")
parser.add_argument("--labels", type=str, default="/home/nguyendc/docai_dataset/factures/distill_data/docai_mgp_facture_v2_0/label_data.json", help="Path to the label data JSON file.")
parser.add_argument("--system_prompt", type=str, default="/home/nguyendc/phong-dev/distillation/easydistill/mmkd/dev-vqa/qa_bank/unstructured_prompt.txt", help="Path to the system prompt text file.")
parser.add_argument("--questions", type=str, default="/home/nguyendc/phong-dev/distill/prompt/question_bank.json", help="Path to the question bank JSON file.")
parser.add_argument("--answers", type=str, default="/home/nguyendc/phong-dev/distill/prompt/answer_bank.json", help="Path to the answer bank JSON file.")
parser.add_argument("--output", type=str, default="/home/nguyendc/phong-dev/distill/vqa_label.json", help="Path to save the output VQA conversations JSON file.")
parser.add_argument("--ratio", type=float, default=0.4, help="Ratio of fields to sample for questions (default: 0.4).")
args = parser.parse_args()
# Define file paths
IMAGE_ROOT = "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1"
LABELS_FILE = os.path.join(IMAGE_ROOT, "label_data.json")
SYSTEM_PROMPT_FILE = os.path.join(IMAGE_ROOT, "system_prompt.txt")
UNSTRUCTURED_PROMPT_FILE = "/home/nguyendc/phong-dev/distillation/easydistill/mmkd/dev-vqa/qa_bank/unstructured_prompt.txt"
QUESTION_BANK_FILE = "/home/nguyendc/phong-dev/distill/prompt/question_bank.json"
ANSWER_BANK_FILE = "/home/nguyendc/phong-dev/distill/prompt/answer_bank.json"
OUTPUT_FILE = "/home/nguyendc/phong-dev/distill/vqa_label.json"
QUESTION_RATIO = 0.4
# IMAGE_ROOT = "/home/nguyendc/docai_dataset/factures/distill_data/lentille_distill_part_1_15"
# LABELS_FILE = os.path.join(IMAGE_ROOT, "label_data.json")
# UNSTRUCTURED_PROMPT_FILE = "/home/nguyendc/phong-dev/distillation/easydistill/mmkd/dev-vqa/qa_bank/unstructured_prompt.txt"
# QUESTION_BANK_FILE = "/home/nguyendc/phong-dev/distill/prompt/question_bank.json"
# ANSWER_BANK_FILE = "/home/nguyendc/phong-dev/distill/prompt/answer_bank.json"
# OUTPUT_FILE = "/home/nguyendc/phong-dev/distill/vqa_label_lentille.json"
# QUESTION_RATIO = 0.4
# Run the main generation function
generate_vqa_conversations(
LABELS_FILE,
IMAGE_ROOT,
UNSTRUCTURED_PROMPT_FILE,
QUESTION_BANK_FILE,
ANSWER_BANK_FILE,
OUTPUT_FILE,
QUESTION_RATIO,
)
generate_vqa_conversations(args.labels, args.image_root, args.system_prompt, args.questions, args.answers, args.output, args.ratio)
# generate_vqa_conversations(
# LABELS_FILE,
# IMAGE_ROOT,
# UNSTRUCTURED_PROMPT_FILE,
# QUESTION_BANK_FILE,
# ANSWER_BANK_FILE,
# OUTPUT_FILE,
# QUESTION_RATIO,
# )
# generate_vq_question(
# IMAGE_ROOT,
# UNSTRUCTURED_PROMPT_FILE,