28 lines
655 B
JSON
28 lines
655 B
JSON
{
|
|
"job_type": "rl_ppo",
|
|
"dataset": {
|
|
"instruction_path": "sample.json",
|
|
"template" : "chat_template_kd.jinja",
|
|
"train_ratio": 0.7,
|
|
"seed": 42
|
|
},
|
|
"models": {
|
|
"reward": "reward/",
|
|
"student": "Qwen/Qwen2.5-0.5B-Instruct"
|
|
},
|
|
"training": {
|
|
"output_dir": "./result/",
|
|
"total_episodes": 1000,
|
|
"per_device_train_batch_size": 1,
|
|
"gradient_accumulation_steps": 8,
|
|
"save_steps": 100,
|
|
"logging_steps": 1,
|
|
"learning_rate": 2e-5,
|
|
"weight_decay": 0.05,
|
|
"warmup_ratio": 0.1,
|
|
"lr_scheduler_type": "cosine",
|
|
"missing_eos_penalty": 1.0,
|
|
"stop_token": "eos",
|
|
"response_length": 512
|
|
}
|
|
} |