Files
distillation/dev_config/mmkd_white_box.json

42 lines
1.1 KiB
JSON
Raw Permalink Normal View History

{
"job_type": "mmkd_white_box",
"dataset": {
"instruction_path": "/home/nguyendc/phat-dev/easydistill/easydistill/mmkd/vqa.json",
"labeled_path": "./mllm_demo_distill.json",
"logits_path": "./logits.json",
"seed": 42
},
"inference": {
"enable_chunked_prefill": true,
"seed": 777,
"gpu_memory_utilization": 0.99,
"temperature": 0.8,
"trust_remote_code": true,
"enforce_eager": false,
"max_model_len": 16000,
"max_new_tokens": 1024,
"top_logits_num": 20
},
"distillation": {
"kd_ratio": 0.1,
"max_seq_length": 512,
"distillation_type": "forward_kld"
},
"models": {
"teacher": "Qwen/Qwen2.5-VL-32B-Instruct",
"student": "Qwen/Qwen2.5-VL-3B-Instruct"
},
"training": {
"output_dir": "./result/",
"num_train_epochs": 30,
"per_device_train_batch_size": 1,
"gradient_accumulation_steps": 8,
"max_length": 512,
"save_steps": 1000,
"logging_steps": 1,
"learning_rate": 2e-5,
"weight_decay": 0.05,
"warmup_ratio": 0.1,
"lr_scheduler_type": "cosine"
}
}