add argument

2025-08-12 14:32:40 +00:00
parent da0cae0b87
commit 96fa4efa49
1 changed files with 28 additions and 17 deletions
--- a/easydistill/mmkd/dev-vqa/gen_vqa_bank.py
+++ b/easydistill/mmkd/dev-vqa/gen_vqa_bank.py
@@ -4,6 +4,7 @@ import random
 from pathlib import Path
 import glob
 import re
 import argparse
 def load_json(filepath):
@@ -275,27 +276,37 @@ def generate_vq_question(
 # --- Main Execution Block ---
 if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Generate VQA conversations from label data.")
    parser.add_argument("--image_root", type=str, default="/home/nguyendc/docai_dataset/factures/distill_data/docai_mgp_facture_v2_0", help="Root directory containing images.")
    parser.add_argument("--labels", type=str, default="/home/nguyendc/docai_dataset/factures/distill_data/docai_mgp_facture_v2_0/label_data.json", help="Path to the label data JSON file.")
    parser.add_argument("--system_prompt", type=str, default="/home/nguyendc/phong-dev/distillation/easydistill/mmkd/dev-vqa/qa_bank/unstructured_prompt.txt", help="Path to the system prompt text file.")
    parser.add_argument("--questions", type=str, default="/home/nguyendc/phong-dev/distill/prompt/question_bank.json", help="Path to the question bank JSON file.")
    parser.add_argument("--answers", type=str, default="/home/nguyendc/phong-dev/distill/prompt/answer_bank.json", help="Path to the answer bank JSON file.")
    parser.add_argument("--output", type=str, default="/home/nguyendc/phong-dev/distill/vqa_label.json", help="Path to save the output VQA conversations JSON file.")
    parser.add_argument("--ratio", type=float, default=0.4, help="Ratio of fields to sample for questions (default: 0.4).")
    args = parser.parse_args()
    # Define file paths
-    IMAGE_ROOT = "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1"
+    # IMAGE_ROOT = "/home/nguyendc/docai_dataset/factures/distill_data/lentille_distill_part_1_15"
-    LABELS_FILE = os.path.join(IMAGE_ROOT, "label_data.json")
+    # LABELS_FILE = os.path.join(IMAGE_ROOT, "label_data.json")
-    SYSTEM_PROMPT_FILE = os.path.join(IMAGE_ROOT, "system_prompt.txt")
+    # UNSTRUCTURED_PROMPT_FILE = "/home/nguyendc/phong-dev/distillation/easydistill/mmkd/dev-vqa/qa_bank/unstructured_prompt.txt"
-    UNSTRUCTURED_PROMPT_FILE = "/home/nguyendc/phong-dev/distillation/easydistill/mmkd/dev-vqa/qa_bank/unstructured_prompt.txt"
+    # QUESTION_BANK_FILE = "/home/nguyendc/phong-dev/distill/prompt/question_bank.json"
-    QUESTION_BANK_FILE = "/home/nguyendc/phong-dev/distill/prompt/question_bank.json"
+    # ANSWER_BANK_FILE = "/home/nguyendc/phong-dev/distill/prompt/answer_bank.json"
-    ANSWER_BANK_FILE = "/home/nguyendc/phong-dev/distill/prompt/answer_bank.json"
+    # OUTPUT_FILE = "/home/nguyendc/phong-dev/distill/vqa_label_lentille.json"
-    OUTPUT_FILE = "/home/nguyendc/phong-dev/distill/vqa_label.json"
+    # QUESTION_RATIO = 0.4
    QUESTION_RATIO = 0.4
    # Run the main generation function
-    generate_vqa_conversations(
+    generate_vqa_conversations(args.labels, args.image_root, args.system_prompt, args.questions, args.answers, args.output, args.ratio)
-        LABELS_FILE,
+    # generate_vqa_conversations(
-        IMAGE_ROOT,
+    #     LABELS_FILE,
-        UNSTRUCTURED_PROMPT_FILE,
+    #     IMAGE_ROOT,
-        QUESTION_BANK_FILE,
+    #     UNSTRUCTURED_PROMPT_FILE,
-        ANSWER_BANK_FILE,
+    #     QUESTION_BANK_FILE,
-        OUTPUT_FILE,
+    #     ANSWER_BANK_FILE,
-        QUESTION_RATIO,
+    #     OUTPUT_FILE,
-    )
+    #     QUESTION_RATIO,
    # )
    # generate_vq_question(
    #     IMAGE_ROOT,
    #     UNSTRUCTURED_PROMPT_FILE,