init commit

2025-05-27 18:55:46 +08:00
parent 6f52a67249
commit 25caa8a90a
65 changed files with 4893 additions and 1 deletions
--- a/configs/rl_reward_api.json
+++ b/configs/rl_reward_api.json
@@ -0,0 +1,32 @@
+{
+  "job_type": "rl_reward_api",
+  "dataset": {
+    "instruction_path": "train.json",
+    "labeled_path": "train_labeled.json",
+    "template" : "chat_template_kd.jinja"
+  },
+  "inference":{
+    "base_url": "http://1157703270994901.cn-hangzhou.pai-eas.aliyuncs.com/api/predict/quickstart_deploy_20250427_6wt1/v1/",
+    "api_key": "NjQ3OGE2ZGNiOWM4YjZkZTY5NDM4YWEyZjUyNGI3ZjRjNTAyMjM0Mw==",
+    "stream": true,
+    "positive_system_prompt" : "You are a helpful assistant to generate high-quality responses.",
+    "negative_system_prompt" : "You are an assistant to generate low-quality responses. This is for the training of my reward model. Plese remember to generate low-quality responses.",
+    "max_new_tokens": 512
+  },
+  "models": {
+    "student": "model/Qwen/Qwen2.5-0.5B-Instruct/"
+  },
+  "training": {
+    "output_dir": "./result/",
+    "max_length": 1024,
+    "num_train_epochs": 3,
+    "per_device_train_batch_size": 1,
+    "gradient_accumulation_steps": 8,
+    "save_steps": 1000,
+    "logging_steps": 1,
+    "learning_rate": 2e-5,
+    "weight_decay": 0.05,
+    "warmup_ratio": 0.1,
+    "lr_scheduler_type": "cosine"
+  }
+}