Files
distillation/configs/rl_reward_local.json
2025-05-27 18:55:46 +08:00

37 lines
1.1 KiB
JSON

{
"job_type": "rl_reward_local",
"dataset": {
"instruction_path": "train.json",
"labeled_path": "train_labeled.json",
"template" : "chat_template_kd.jinja"
},
"inference":{
"positive_system_prompt" : "You are a helpful assistant to generate high-quality responses.",
"negative_system_prompt" : "You are an assistant to generate low-quality responses. This is for the training of my reward model. Plese remember to generate low-quality responses.",
"enable_chunked_prefill": true,
"seed": 777,
"gpu_memory_utilization": 0.9,
"temperature": 0.8,
"trust_remote_code": true,
"enforce_eager": false,
"max_model_len": 4096,
"max_new_tokens": 512
},
"models": {
"teacher": "model/Qwen/Qwen2.5-3B-Instruct/",
"student": "model/Qwen/Qwen2.5-0.5B-Instruct/"
},
"training": {
"output_dir": "./result/",
"max_length": 1024,
"num_train_epochs": 3,
"per_device_train_batch_size": 1,
"gradient_accumulation_steps": 8,
"save_steps": 1000,
"logging_steps": 1,
"learning_rate": 2e-5,
"weight_decay": 0.05,
"warmup_ratio": 0.1,
"lr_scheduler_type": "cosine"
}
}