32 lines
1.1 KiB
JSON
32 lines
1.1 KiB
JSON
![]() |
{
|
||
|
"job_type": "rl_reward_api",
|
||
|
"dataset": {
|
||
|
"instruction_path": "train.json",
|
||
|
"labeled_path": "train_labeled.json",
|
||
|
"template" : "chat_template_kd.jinja"
|
||
|
},
|
||
|
"inference":{
|
||
|
"base_url": "http://1157703270994901.cn-hangzhou.pai-eas.aliyuncs.com/api/predict/quickstart_deploy_20250427_6wt1/v1/",
|
||
|
"api_key": "NjQ3OGE2ZGNiOWM4YjZkZTY5NDM4YWEyZjUyNGI3ZjRjNTAyMjM0Mw==",
|
||
|
"stream": true,
|
||
|
"positive_system_prompt" : "You are a helpful assistant to generate high-quality responses.",
|
||
|
"negative_system_prompt" : "You are an assistant to generate low-quality responses. This is for the training of my reward model. Plese remember to generate low-quality responses.",
|
||
|
"max_new_tokens": 512
|
||
|
},
|
||
|
"models": {
|
||
|
"student": "model/Qwen/Qwen2.5-0.5B-Instruct/"
|
||
|
},
|
||
|
"training": {
|
||
|
"output_dir": "./result/",
|
||
|
"max_length": 1024,
|
||
|
"num_train_epochs": 3,
|
||
|
"per_device_train_batch_size": 1,
|
||
|
"gradient_accumulation_steps": 8,
|
||
|
"save_steps": 1000,
|
||
|
"logging_steps": 1,
|
||
|
"learning_rate": 2e-5,
|
||
|
"weight_decay": 0.05,
|
||
|
"warmup_ratio": 0.1,
|
||
|
"lr_scheduler_type": "cosine"
|
||
|
}
|
||
|
}
|