add argument

This commit is contained in:
2025-08-12 14:32:40 +00:00
committed by lphatnguyen
parent da0cae0b87
commit 96fa4efa49

View File

@@ -4,6 +4,7 @@ import random
from pathlib import Path from pathlib import Path
import glob import glob
import re import re
import argparse
def load_json(filepath): def load_json(filepath):
@@ -275,27 +276,37 @@ def generate_vq_question(
# --- Main Execution Block --- # --- Main Execution Block ---
if __name__ == "__main__": if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Generate VQA conversations from label data.")
parser.add_argument("--image_root", type=str, default="/home/nguyendc/docai_dataset/factures/distill_data/docai_mgp_facture_v2_0", help="Root directory containing images.")
parser.add_argument("--labels", type=str, default="/home/nguyendc/docai_dataset/factures/distill_data/docai_mgp_facture_v2_0/label_data.json", help="Path to the label data JSON file.")
parser.add_argument("--system_prompt", type=str, default="/home/nguyendc/phong-dev/distillation/easydistill/mmkd/dev-vqa/qa_bank/unstructured_prompt.txt", help="Path to the system prompt text file.")
parser.add_argument("--questions", type=str, default="/home/nguyendc/phong-dev/distill/prompt/question_bank.json", help="Path to the question bank JSON file.")
parser.add_argument("--answers", type=str, default="/home/nguyendc/phong-dev/distill/prompt/answer_bank.json", help="Path to the answer bank JSON file.")
parser.add_argument("--output", type=str, default="/home/nguyendc/phong-dev/distill/vqa_label.json", help="Path to save the output VQA conversations JSON file.")
parser.add_argument("--ratio", type=float, default=0.4, help="Ratio of fields to sample for questions (default: 0.4).")
args = parser.parse_args()
# Define file paths # Define file paths
IMAGE_ROOT = "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1" # IMAGE_ROOT = "/home/nguyendc/docai_dataset/factures/distill_data/lentille_distill_part_1_15"
LABELS_FILE = os.path.join(IMAGE_ROOT, "label_data.json") # LABELS_FILE = os.path.join(IMAGE_ROOT, "label_data.json")
SYSTEM_PROMPT_FILE = os.path.join(IMAGE_ROOT, "system_prompt.txt") # UNSTRUCTURED_PROMPT_FILE = "/home/nguyendc/phong-dev/distillation/easydistill/mmkd/dev-vqa/qa_bank/unstructured_prompt.txt"
UNSTRUCTURED_PROMPT_FILE = "/home/nguyendc/phong-dev/distillation/easydistill/mmkd/dev-vqa/qa_bank/unstructured_prompt.txt" # QUESTION_BANK_FILE = "/home/nguyendc/phong-dev/distill/prompt/question_bank.json"
QUESTION_BANK_FILE = "/home/nguyendc/phong-dev/distill/prompt/question_bank.json" # ANSWER_BANK_FILE = "/home/nguyendc/phong-dev/distill/prompt/answer_bank.json"
ANSWER_BANK_FILE = "/home/nguyendc/phong-dev/distill/prompt/answer_bank.json" # OUTPUT_FILE = "/home/nguyendc/phong-dev/distill/vqa_label_lentille.json"
OUTPUT_FILE = "/home/nguyendc/phong-dev/distill/vqa_label.json" # QUESTION_RATIO = 0.4
QUESTION_RATIO = 0.4
# Run the main generation function # Run the main generation function
generate_vqa_conversations( generate_vqa_conversations(args.labels, args.image_root, args.system_prompt, args.questions, args.answers, args.output, args.ratio)
LABELS_FILE, # generate_vqa_conversations(
IMAGE_ROOT, # LABELS_FILE,
UNSTRUCTURED_PROMPT_FILE, # IMAGE_ROOT,
QUESTION_BANK_FILE, # UNSTRUCTURED_PROMPT_FILE,
ANSWER_BANK_FILE, # QUESTION_BANK_FILE,
OUTPUT_FILE, # ANSWER_BANK_FILE,
QUESTION_RATIO, # OUTPUT_FILE,
) # QUESTION_RATIO,
# )
# generate_vq_question( # generate_vq_question(
# IMAGE_ROOT, # IMAGE_ROOT,
# UNSTRUCTURED_PROMPT_FILE, # UNSTRUCTURED_PROMPT_FILE,