{
  "job_type": "mmkd_white_box",
  "dataset": {
    "instruction_path": "/home/nguyendc/phat-dev/easydistill/easydistill/mmkd/vqa.json",
    "labeled_path": "./mllm_demo_distill.json",
    "logits_path": "./logits.json",
    "seed": 42
  },
  "inference": {
    "enable_chunked_prefill": true,
    "seed": 777,
    "gpu_memory_utilization": 0.99,
    "temperature": 0.8,
    "trust_remote_code": true,
    "enforce_eager": false,
    "max_model_len": 16000,
    "max_new_tokens": 1024,
    "top_logits_num": 20
  },
  "distillation": {
    "kd_ratio": 0.1,
    "max_seq_length": 512,
    "distillation_type": "forward_kld"
  },
  "models": {
    "teacher": "Qwen/Qwen2.5-VL-32B-Instruct",
    "student": "Qwen/Qwen2.5-VL-3B-Instruct"
  },
  "training": {
    "output_dir": "./result/",
    "num_train_epochs": 30,
    "per_device_train_batch_size": 1,
    "gradient_accumulation_steps": 8,
    "max_length": 512,
    "save_steps": 1000,
    "logging_steps": 1,
    "learning_rate": 2e-5,
    "weight_decay": 0.05,
    "warmup_ratio": 0.1,
    "lr_scheduler_type": "cosine"
  }
}