init commit
This commit is contained in:
25
configs/rl_grpo.json
Normal file
25
configs/rl_grpo.json
Normal file
@@ -0,0 +1,25 @@
|
||||
{
|
||||
"job_type": "rl_grpo",
|
||||
"dataset": {
|
||||
"instruction_path": "sample.json",
|
||||
"template" : "chat_template_kd.jinja",
|
||||
"train_ratio": 0.7,
|
||||
"seed": 42
|
||||
},
|
||||
"models": {
|
||||
"reward": "reward/",
|
||||
"student": "Qwen/Qwen2.5-0.5B-Instruct"
|
||||
},
|
||||
"training": {
|
||||
"output_dir": "./result/",
|
||||
"per_device_train_batch_size": 1,
|
||||
"gradient_accumulation_steps": 8,
|
||||
"num_train_epochs": 3,
|
||||
"save_steps": 100,
|
||||
"logging_steps": 1,
|
||||
"learning_rate": 2e-5,
|
||||
"weight_decay": 0.05,
|
||||
"warmup_ratio": 0.1,
|
||||
"lr_scheduler_type": "cosine"
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user