add argument
This commit is contained in:
@@ -4,6 +4,7 @@ import random
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
import glob
|
import glob
|
||||||
import re
|
import re
|
||||||
|
import argparse
|
||||||
|
|
||||||
|
|
||||||
def load_json(filepath):
|
def load_json(filepath):
|
||||||
@@ -275,27 +276,37 @@ def generate_vq_question(
|
|||||||
|
|
||||||
# --- Main Execution Block ---
|
# --- Main Execution Block ---
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|
||||||
|
parser = argparse.ArgumentParser(description="Generate VQA conversations from label data.")
|
||||||
|
parser.add_argument("--image_root", type=str, default="/home/nguyendc/docai_dataset/factures/distill_data/docai_mgp_facture_v2_0", help="Root directory containing images.")
|
||||||
|
parser.add_argument("--labels", type=str, default="/home/nguyendc/docai_dataset/factures/distill_data/docai_mgp_facture_v2_0/label_data.json", help="Path to the label data JSON file.")
|
||||||
|
parser.add_argument("--system_prompt", type=str, default="/home/nguyendc/phong-dev/distillation/easydistill/mmkd/dev-vqa/qa_bank/unstructured_prompt.txt", help="Path to the system prompt text file.")
|
||||||
|
parser.add_argument("--questions", type=str, default="/home/nguyendc/phong-dev/distill/prompt/question_bank.json", help="Path to the question bank JSON file.")
|
||||||
|
parser.add_argument("--answers", type=str, default="/home/nguyendc/phong-dev/distill/prompt/answer_bank.json", help="Path to the answer bank JSON file.")
|
||||||
|
parser.add_argument("--output", type=str, default="/home/nguyendc/phong-dev/distill/vqa_label.json", help="Path to save the output VQA conversations JSON file.")
|
||||||
|
parser.add_argument("--ratio", type=float, default=0.4, help="Ratio of fields to sample for questions (default: 0.4).")
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
# Define file paths
|
# Define file paths
|
||||||
IMAGE_ROOT = "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_1"
|
# IMAGE_ROOT = "/home/nguyendc/docai_dataset/factures/distill_data/lentille_distill_part_1_15"
|
||||||
LABELS_FILE = os.path.join(IMAGE_ROOT, "label_data.json")
|
# LABELS_FILE = os.path.join(IMAGE_ROOT, "label_data.json")
|
||||||
SYSTEM_PROMPT_FILE = os.path.join(IMAGE_ROOT, "system_prompt.txt")
|
# UNSTRUCTURED_PROMPT_FILE = "/home/nguyendc/phong-dev/distillation/easydistill/mmkd/dev-vqa/qa_bank/unstructured_prompt.txt"
|
||||||
UNSTRUCTURED_PROMPT_FILE = "/home/nguyendc/phong-dev/distillation/easydistill/mmkd/dev-vqa/qa_bank/unstructured_prompt.txt"
|
# QUESTION_BANK_FILE = "/home/nguyendc/phong-dev/distill/prompt/question_bank.json"
|
||||||
QUESTION_BANK_FILE = "/home/nguyendc/phong-dev/distill/prompt/question_bank.json"
|
# ANSWER_BANK_FILE = "/home/nguyendc/phong-dev/distill/prompt/answer_bank.json"
|
||||||
ANSWER_BANK_FILE = "/home/nguyendc/phong-dev/distill/prompt/answer_bank.json"
|
# OUTPUT_FILE = "/home/nguyendc/phong-dev/distill/vqa_label_lentille.json"
|
||||||
OUTPUT_FILE = "/home/nguyendc/phong-dev/distill/vqa_label.json"
|
# QUESTION_RATIO = 0.4
|
||||||
QUESTION_RATIO = 0.4
|
|
||||||
|
|
||||||
# Run the main generation function
|
# Run the main generation function
|
||||||
generate_vqa_conversations(
|
generate_vqa_conversations(args.labels, args.image_root, args.system_prompt, args.questions, args.answers, args.output, args.ratio)
|
||||||
LABELS_FILE,
|
# generate_vqa_conversations(
|
||||||
IMAGE_ROOT,
|
# LABELS_FILE,
|
||||||
UNSTRUCTURED_PROMPT_FILE,
|
# IMAGE_ROOT,
|
||||||
QUESTION_BANK_FILE,
|
# UNSTRUCTURED_PROMPT_FILE,
|
||||||
ANSWER_BANK_FILE,
|
# QUESTION_BANK_FILE,
|
||||||
OUTPUT_FILE,
|
# ANSWER_BANK_FILE,
|
||||||
QUESTION_RATIO,
|
# OUTPUT_FILE,
|
||||||
)
|
# QUESTION_RATIO,
|
||||||
|
# )
|
||||||
# generate_vq_question(
|
# generate_vq_question(
|
||||||
# IMAGE_ROOT,
|
# IMAGE_ROOT,
|
||||||
# UNSTRUCTURED_PROMPT_FILE,
|
# UNSTRUCTURED_PROMPT_FILE,
|
||||||
|
Reference in New Issue
Block a user