distillation/configs/mmkd_white_box.json

{
    "job_type": "mmkd_white_box",
    "dataset": {
      "instruction_path": "data/mllm_demo_reformat.json",
      "labeled_path": "data/mllm_demo_distill.json",
      "logits_path": "./logits.json",
      "seed": 42
    },
    "inference":{
      "enable_chunked_prefill": true,
      "seed": 777,
      "gpu_memory_utilization": 0.9,
      "temperature": 0.8,
      "trust_remote_code": true,
      "enforce_eager": false,
      "max_model_len": 4096,
      "max_new_tokens": 512,
      "top_logits_num": 10
    },
    "distillation": {
        "kd_ratio": 0.1,
        "max_seq_length": 512,
        "distillation_type": "forward_kld"
    },
    "models": {
      "teacher": "Qwen/Qwen2.5-VL-72B-Instruct",
      "student": "Qwen/Qwen2.5-VL-3B-Instruct"
    },
    "training": {
      "output_dir": "./result/",
      "num_train_epochs": 30,
      "per_device_train_batch_size": 1,
      "gradient_accumulation_steps": 8,
      "max_length":512,
      "save_steps": 1000,
      "logging_steps": 1,
      "learning_rate": 2e-5,
      "weight_decay": 0.05,
      "warmup_ratio": 0.1,
      "lr_scheduler_type": "cosine"
    }
  }
Add files via upload 2025-07-24 11:41:15 +08:00			`{`
			`"job_type": "mmkd_white_box",`
			`"dataset": {`
Update mmkd_white_box.json 2025-07-24 11:42:31 +08:00			`"instruction_path": "data/mllm_demo_reformat.json",`
			`"labeled_path": "data/mllm_demo_distill.json",`
Add files via upload 2025-07-24 11:41:15 +08:00			`"logits_path": "./logits.json",`
			`"seed": 42`
			`},`
			`"inference":{`
			`"enable_chunked_prefill": true,`
			`"seed": 777,`
			`"gpu_memory_utilization": 0.9,`
			`"temperature": 0.8,`
			`"trust_remote_code": true,`
			`"enforce_eager": false,`
			`"max_model_len": 4096,`
			`"max_new_tokens": 512,`
			`"top_logits_num": 10`
			`},`
			`"distillation": {`
			`"kd_ratio": 0.1,`
			`"max_seq_length": 512,`
			`"distillation_type": "forward_kld"`
			`},`
			`"models": {`
Update mmkd_white_box.json 2025-07-24 11:42:31 +08:00			`"teacher": "Qwen/Qwen2.5-VL-72B-Instruct",`
			`"student": "Qwen/Qwen2.5-VL-3B-Instruct"`
Add files via upload 2025-07-24 11:41:15 +08:00			`},`
			`"training": {`
			`"output_dir": "./result/",`
			`"num_train_epochs": 30,`
			`"per_device_train_batch_size": 1,`
			`"gradient_accumulation_steps": 8,`
			`"max_length":512,`
			`"save_steps": 1000,`
			`"logging_steps": 1,`
			`"learning_rate": 2e-5,`
			`"weight_decay": 0.05,`
			`"warmup_ratio": 0.1,`
			`"lr_scheduler_type": "cosine"`
			`}`
Update mmkd_white_box.json 2025-07-24 11:42:31 +08:00			`}`