{ "job_type": "mmkd_white_box", "dataset": { "instruction_path": "/home/nguyendc/phat-dev/easydistill/easydistill/mmkd/vqa.json", "labeled_path": "./mllm_demo_distill.json", "logits_path": "./logits.json", "seed": 42 }, "inference": { "enable_chunked_prefill": true, "seed": 777, "gpu_memory_utilization": 0.99, "temperature": 0.8, "trust_remote_code": true, "enforce_eager": false, "max_model_len": 16000, "max_new_tokens": 1024, "top_logits_num": 20 }, "distillation": { "kd_ratio": 0.1, "max_seq_length": 512, "distillation_type": "forward_kld" }, "models": { "teacher": "Qwen/Qwen2.5-VL-32B-Instruct", "student": "Qwen/Qwen2.5-VL-3B-Instruct" }, "training": { "output_dir": "./result/", "num_train_epochs": 30, "per_device_train_batch_size": 1, "gradient_accumulation_steps": 8, "max_length": 512, "save_steps": 1000, "logging_steps": 1, "learning_rate": 2e-5, "weight_decay": 0.05, "warmup_ratio": 0.1, "lr_scheduler_type": "cosine" } }