diff --git a/configs/mmkd_white_box.json b/configs/mmkd_white_box.json new file mode 100644 index 0000000..9475a85 --- /dev/null +++ b/configs/mmkd_white_box.json @@ -0,0 +1,42 @@ +{ + "job_type": "mmkd_white_box", + "dataset": { + "instruction_path": "/mnt/workspace/yyh/easydistill/test_data/mllm_demo_reformat.json", + "labeled_path": "/mnt/data/yyh/easydistill/test_data/mllm_demo_distill.json", + "logits_path": "./logits.json", + "seed": 42 + }, + "inference":{ + "enable_chunked_prefill": true, + "seed": 777, + "gpu_memory_utilization": 0.9, + "temperature": 0.8, + "trust_remote_code": true, + "enforce_eager": false, + "max_model_len": 4096, + "max_new_tokens": 512, + "top_logits_num": 10 + }, + "distillation": { + "kd_ratio": 0.1, + "max_seq_length": 512, + "distillation_type": "forward_kld" + }, + "models": { + "teacher": "/mnt/data/yyh/models/Qwen2.5-VL-3B-Instruct", + "student": "/mnt/data/yyh/models/Qwen2.5-VL-3B-Instruct" + }, + "training": { + "output_dir": "./result/", + "num_train_epochs": 30, + "per_device_train_batch_size": 1, + "gradient_accumulation_steps": 8, + "max_length":512, + "save_steps": 1000, + "logging_steps": 1, + "learning_rate": 2e-5, + "weight_decay": 0.05, + "warmup_ratio": 0.1, + "lr_scheduler_type": "cosine" + } + } \ No newline at end of file