update training
This commit is contained in:
@@ -194,8 +194,8 @@ def generate_vqa_conversations(
|
||||
+ [{"type": "text", "text": "<image>" * len(found_image_paths) + question_text}],
|
||||
}
|
||||
|
||||
assistant_message = {"role": "assistant", "content": answer_text}
|
||||
|
||||
assistant_message = {"role": "assistant_gt", "content": answer_text} #[{"type": "text", "text": answer_text}]
|
||||
|
||||
conversation = [system_message, user_message, assistant_message]
|
||||
final_conversations.append(conversation)
|
||||
|
||||
@@ -283,7 +283,7 @@ def generate_multiturn_conversations(
|
||||
first_answer = get_conversational_answer(
|
||||
main_field, label_data, answer_bank, language
|
||||
)
|
||||
conversation.append({"role": "assistant", "content": first_answer})
|
||||
conversation.append({"role": "assistant_gt", "content": first_answer})
|
||||
|
||||
# 4. Follow-up Turns for related fields
|
||||
for follow_up_field in related_fields:
|
||||
@@ -299,7 +299,7 @@ def generate_multiturn_conversations(
|
||||
follow_up_answer = get_conversational_answer(
|
||||
follow_up_field, label_data, answer_bank, language
|
||||
)
|
||||
conversation.append({"role": "assistant", "content": follow_up_answer})
|
||||
conversation.append({"role": "assistant_gt", "content": follow_up_answer})
|
||||
|
||||
final_conversations.append(conversation)
|
||||
|
||||
@@ -454,12 +454,12 @@ def generate_multiturn_vq_question(
|
||||
if __name__ == "__main__":
|
||||
|
||||
parser = argparse.ArgumentParser(description="Generate VQA conversations from label data.")
|
||||
parser.add_argument("--image_root", type=str, default="/home/nguyendc/docai_dataset/factures/distill_data/docai_mgp_facture_v2_0", help="Root directory containing images.")
|
||||
parser.add_argument("--labels", type=str, default="/home/nguyendc/docai_dataset/factures/distill_data/docai_mgp_facture_v2_0/label_data.json", help="Path to the label data JSON file.")
|
||||
parser.add_argument("--system_prompt", type=str, default="/home/nguyendc/phong-dev/distillation/easydistill/mmkd/dev-vqa/qa_bank/unstructured_prompt.txt", help="Path to the system prompt text file.")
|
||||
parser.add_argument("--questions", type=str, default="/home/nguyendc/phong-dev/distill/prompt/question_bank.json", help="Path to the question bank JSON file.")
|
||||
parser.add_argument("--answers", type=str, default="/home/nguyendc/phong-dev/distill/prompt/answer_bank.json", help="Path to the answer bank JSON file.")
|
||||
parser.add_argument("--output", type=str, default="/home/nguyendc/phong-dev/distillation/data/vqa_label.json", help="Path to save the output VQA conversations JSON file.")
|
||||
parser.add_argument("--image_root", type=str, default="/home/nguyendc/docai_dataset/factures/distill_data/trial_2/psycho_distill_300", help="Root directory containing images.")
|
||||
parser.add_argument("--labels", type=str, default="/home/nguyendc/docai_dataset/factures/distill_data/trial_2/docai_mgp_facture_v2_0_400/label_data.json", help="Path to the label data JSON file.")
|
||||
parser.add_argument("--system_prompt", type=str, default="./dev-vqa/qa_bank/unstructured_prompt.txt", help="Path to the system prompt text file.")
|
||||
parser.add_argument("--questions", type=str, default="./dev-vqa/qa_bank/question_bank.json", help="Path to the question bank JSON file.")
|
||||
parser.add_argument("--answers", type=str, default="./dev-vqa/qa_bank/answer_bank.json", help="Path to the answer bank JSON file.")
|
||||
parser.add_argument("--output", type=str, default="./data/psycho_distill_300_vq_1_turn.json", help="Path to save the output VQA conversations JSON file.")
|
||||
parser.add_argument("--ratio", type=float, default=0.4, help="Ratio of fields to sample for questions (default: 0.4).")
|
||||
args = parser.parse_args()
|
||||
|
||||
|
Reference in New Issue
Block a user