Files
distillation/configs/rl_grpo.json

25 lines
570 B
JSON
Raw Permalink Normal View History

2025-05-27 18:55:46 +08:00
{
"job_type": "rl_grpo",
"dataset": {
"instruction_path": "sample.json",
"template" : "chat_template_kd.jinja",
"train_ratio": 0.7,
"seed": 42
},
"models": {
"reward": "reward/",
"student": "Qwen/Qwen2.5-0.5B-Instruct"
},
"training": {
"output_dir": "./result/",
"per_device_train_batch_size": 1,
"gradient_accumulation_steps": 8,
"num_train_epochs": 3,
"save_steps": 100,
"logging_steps": 1,
"learning_rate": 2e-5,
"weight_decay": 0.05,
"warmup_ratio": 0.1,
"lr_scheduler_type": "cosine"
}
}