init commit

2025-05-27 18:55:46 +08:00
parent 6f52a67249
commit 25caa8a90a
65 changed files with 4893 additions and 1 deletions
--- a/configs/accelerate_config/muti_gpu.yaml
+++ b/configs/accelerate_config/muti_gpu.yaml
@@ -0,0 +1,19 @@
+compute_environment: LOCAL_MACHINE
+debug: false
+deepspeed_config:
+ gradient_clipping: 1.0
+ offload_optimizer_device: cpu
+ offload_param_device: cpu
+ zero_stage: 2
+distributed_type: DEEPSPEED
+gpu_ids: all
+machine_rank: 0
+main_training_function: main
+num_machines: 1
+num_processes: 8
+rdzv_backend: static
+same_network: true
+tpu_env: []
+tpu_use_cluster: false
+tpu_use_sudo: false
+use_cpu: false
--- a/configs/chat_template/chat_template_kd.jinja
+++ b/configs/chat_template/chat_template_kd.jinja
@@ -0,0 +1,8 @@
+{{'<|im_start|>system\nYou are a helpful assistant.<|im_end|>'}}
+{{'<|im_start|>user\n' + message['content'] + '<|im_end|>'-}}
+{% if add_generation_prompt %}
+  {{'<|im_start|>assistant'-}}
+{% endif %}
+{% if add_output %}
+  {{'<|im_start|>assistant\n' + message['output'] + '<|im_end|>-'}}
+{% endif %}
--- a/configs/cot_generation_api.json
+++ b/configs/cot_generation_api.json
@@ -0,0 +1,14 @@
+{
+    "job_type": "cot_generation_api",
+    "dataset": {
+      "input_path": "./cot_question.json",
+      "output_path": "./cot_question_with_answer.json"
+    },
+    "inference":{
+      "base_url": "ENDPOINT",
+      "api_key": "TOKEN",
+      "stream": true,
+      "prompt" : "Your role as an assistant involves thoroughly exploring questions through a systematic long thinking process before providing the final precise and accurate solutions. This requires engaging in a comprehensive cycle of analysis, summarizing, exploration, reassessment, reflection, backtracing, and iteration to develop well-considered thinking process. Please structure your response into two main sections: Thought and Solution. In the Thought section, detail your reasoning process using the specified format: <|begin_of_thought|> {thought with steps separated with '\n\n'} <|end_of_thought|> Each step should include detailed considerations such as analisying questions, summarizing relevant findings, brainstorming new ideas, verifying the accuracy of the current steps, refining any errors, and revisiting previous steps. In the Solution section, based on various attempts, explorations, and reflections from the Thought section, systematically present the final solution that you deem correct. The solution should remain a logical, accurate, concise expression style and detail necessary step needed to reach the conclusion, formatted as follows: <|begin_of_solution|> {final formatted, precise, and clear solution} <|end_of_solution|> Now, try to solve the following question through the above guidelines:",
+      "max_new_tokens": 1024
+    }
+}
--- a/configs/cot_generation_batch.json
+++ b/configs/cot_generation_batch.json
@@ -0,0 +1,22 @@
+{
+    "job_type": "cot_generation_batch",
+    "dataset": {
+      "input_path": "./cot_question.json",
+      "output_path": "./cot_question_with_answer.json",
+      "template" : "./chat_template/chat_template_kd.jinja"
+    },
+    "models": {
+      "teacher": "teacher/Qwen/Qwen2.5-7B-Instruct/"
+    },
+    "inference":{
+      "prompt" : "Your role as an assistant involves thoroughly exploring questions through a systematic long thinking process before providing the final precise and accurate solutions. This requires engaging in a comprehensive cycle of analysis, summarizing, exploration, reassessment, reflection, backtracing, and iteration to develop well-considered thinking process. Please structure your response into two main sections: Thought and Solution. In the Thought section, detail your reasoning process using the specified format: <|begin_of_thought|> {thought with steps separated with '\n\n'} <|end_of_thought|> Each step should include detailed considerations such as analisying questions, summarizing relevant findings, brainstorming new ideas, verifying the accuracy of the current steps, refining any errors, and revisiting previous steps. In the Solution section, based on various attempts, explorations, and reflections from the Thought section, systematically present the final solution that you deem correct. The solution should remain a logical, accurate, concise expression style and detail necessary step needed to reach the conclusion, formatted as follows: <|begin_of_solution|> {final formatted, precise, and clear solution} <|end_of_solution|> Now, try to solve the following question through the above guidelines:",
+      "enable_chunked_prefill": true,
+      "seed": 777,
+      "gpu_memory_utilization": 0.9,
+      "temperature": 0.8,
+      "trust_remote_code": true,
+      "enforce_eager": false,
+      "max_model_len": 4096,
+      "max_new_tokens": 512
+    }
+}
--- a/configs/cot_long2short_api.json
+++ b/configs/cot_long2short_api.json
@@ -0,0 +1,14 @@
+{
+    "job_type": "cot_long2short_api",
+    "dataset": {
+      "input_path": "./raw.json",
+      "output_path": "./raw_simplified.json"
+    },
+    "inference":{
+      "base_url": "ENDPOINT",
+      "api_key": "TOKEN",
+      "stream": true,
+      "prompt" : "You are a helpful assistant who is highly skilled at simplifying reasoning processes. Given a problem, its answer and its reasoning process, your task is to simplify the reasoning process so that a small language model (e.g., a 7B model) can reliably follow the steps to solve the problem. If the original reasoning process is divided into multiple steps separated by two newline characters (\n\n), your output must preserve this formatting. You must output ONLY the simplified reasoning process with no additional explanation or commentary.",
+      "max_new_tokens": 1024
+    }
+}
--- a/configs/cot_long2short_batch.json
+++ b/configs/cot_long2short_batch.json
@@ -0,0 +1,22 @@
+{
+    "job_type": "cot_long2short_batch",
+    "dataset": {
+        "input_path": "./train.json",
+        "output_path": "./train_simplified.json",
+        "template" : "./chat_template/chat_template_kd.jinja"
+    },
+    "models": {
+      "teacher": "teacher/Qwen/Qwen2.5-7B-Instruct/"
+    },
+    "inference":{
+      "prompt" : "You are a helpful assistant who is highly skilled at simplifying reasoning processes. Given a problem, its answer and its reasoning process, your task is to simplify the reasoning process so that a small language model (e.g., a 7B model) can reliably follow the steps to solve the problem. If the original reasoning process is divided into multiple steps separated by two newline characters (\n\n), your output must preserve this formatting. You must output ONLY the simplified reasoning process with no additional explanation or commentary.",
+      "enable_chunked_prefill": true,
+      "seed": 777,
+      "gpu_memory_utilization": 0.9,
+      "temperature": 0.8,
+      "trust_remote_code": true,
+      "enforce_eager": false,
+      "max_model_len": 4096,
+      "max_new_tokens": 512
+    }
+}
--- a/configs/cot_short2long_api.json
+++ b/configs/cot_short2long_api.json
@@ -0,0 +1,14 @@
+{
+    "job_type": "cot_short2long_api",
+    "dataset": {
+      "input_path": "./raw.json",
+      "output_path": "./raw_extended.json"
+    },
+    "inference":{
+      "base_url": "ENDPOINT",
+      "api_key": "TOKEN",
+      "stream": true,
+      "prompt" : "You are a helpful assistant who is highly skilled at extending reasoning processes. Given a problem ,its answer and its reasoning process, your task is to extend the reasoning process by adding necessary details and intermediate steps, so that a small language model (e.g., a 7B model) can follow the extended reasoning process to solve the problem. If the original reasoning process is divided into multiple steps separated by two newline characters (\\n\\n), your output must preserve this formatting. You must output ONLY the extended reasoning process with no additional explanation or commentary.",
+      "max_new_tokens": 1024
+    }
+}
--- a/configs/cot_short2long_batch.json
+++ b/configs/cot_short2long_batch.json
@@ -0,0 +1,22 @@
+{
+    "job_type": "cot_short2long_batch",
+    "dataset": {
+        "input_path": "./train.json",
+        "output_path": "./train_extended.json",
+        "template" : "./chat_template/chat_template_kd.jinja"
+    },
+    "models": {
+      "teacher": "teacher/Qwen/Qwen2.5-7B-Instruct/"
+    },
+    "inference":{
+      "prompt" : "You are a helpful assistant who is highly skilled at extending reasoning processes. Given a problem ,its answer and its reasoning process, your task is to extend the reasoning process by adding necessary details and intermediate steps, so that a small language model (e.g., a 7B model) can follow the extended reasoning process to solve the problem. If the original reasoning process is divided into multiple steps separated by two newline characters (\\n\\n), your output must preserve this formatting. You must output ONLY the extended reasoning process with no additional explanation or commentary.",
+      "enable_chunked_prefill": true,
+      "seed": 777,
+      "gpu_memory_utilization": 0.9,
+      "temperature": 0.8,
+      "trust_remote_code": true,
+      "enforce_eager": false,
+      "max_model_len": 4096,
+      "max_new_tokens": 512
+    }
+}
--- a/configs/instruction_expansion_api.json
+++ b/configs/instruction_expansion_api.json
@@ -0,0 +1,16 @@
+{
+  "job_type": "instruction_expansion_api",
+  "dataset": {
+    "input_path": "./train.json",
+    "output_path": "./train_extended.json",
+    "num_in_context_samples": 3,
+    "num_output_samples": 10
+  },
+  "inference":{
+    "base_url": "ENDPOINT",
+    "api_key": "TOKEN",
+    "stream": true,
+    "prompt" : "Assume you are a data synthesis expert. Given a few instructions as in-context examples, you should generate a new instruction similar to the examples to support the training of large language models. You should place your answer enclosed within <answer></answer> tags. The examples are as follows:",
+    "max_new_tokens": 512
+  }
+}
--- a/configs/instruction_expansion_batch.json
+++ b/configs/instruction_expansion_batch.json
@@ -0,0 +1,24 @@
+{
+  "job_type": "instruction_expansion_batch",
+  "dataset": {
+    "input_path": "./train.json",
+    "output_path": "./train_extended.json",
+    "template" : "./chat_template/chat_template_kd.jinja",
+    "num_in_context_samples": 3,
+    "num_output_samples": 10
+  },
+  "models": {
+    "teacher": "teacher/Qwen/Qwen2.5-7B-Instruct/"
+  },
+  "inference":{
+    "prompt" : "Assume you are a data synthesis expert. Given a few instructions as in-context examples, you should generate a new instruction similar to the examples to support the training of large language models. You should place your answer enclosed within <answer></answer> tags. The examples are as follows:",
+    "enable_chunked_prefill": true,
+    "seed": 777,
+    "gpu_memory_utilization": 0.9,
+    "temperature": 0.8,
+    "trust_remote_code": true,
+    "enforce_eager": false,
+    "max_model_len": 4096,
+    "max_new_tokens": 512
+  }
+}
--- a/configs/instruction_refinement_api.json
+++ b/configs/instruction_refinement_api.json
@@ -0,0 +1,14 @@
+{
+  "job_type": "instruction_refinement_api",
+  "dataset": {
+    "input_path": "./train.json",
+    "output_path": "./train_refined.json"
+  },
+  "inference":{
+    "base_url": "ENDPOINT",
+    "api_key": "TOKEN",
+    "stream": true,
+    "prompt" : "Assume you are a prompt re-writing expert. Given an instruction as input, you should generate a new instruction semantically similar to the input to support the training of large language models. Transform the input raw prompt into a detailed prompt that comprehensively captures the user’s request. Make sure to maintain the original intent while significantly enhancing clarity and depth. You should place your answer enclosed within <answer></answer> tags. The input prompt is as follows:",
+    "max_new_tokens": 512
+  }
+}
--- a/configs/instruction_refinement_batch.json
+++ b/configs/instruction_refinement_batch.json
@@ -0,0 +1,22 @@
+{
+  "job_type": "instruction_refinement_batch",
+  "dataset": {
+    "input_path": "./train.json",
+    "output_path": "./train_refined.json",
+    "template" : "./chat_template/chat_template_kd.jinja"
+  },
+  "models": {
+    "teacher": "teacher/Qwen/Qwen2.5-7B-Instruct/"
+},
+  "inference": {
+    "prompt" : "Assume you are a prompt re-writing expert. Given an instruction as input, you should generate a new instruction semantically similar to the input to support the training of large language models. Transform the input raw prompt into a detailed prompt that comprehensively captures the user’s request. Make sure to maintain the original intent while significantly enhancing clarity and depth. You should place your answer enclosed within <answer></answer> tags. The input prompt is as follows:",
+    "enable_chunked_prefill": true,
+    "seed": 777,
+    "gpu_memory_utilization": 0.9,
+    "temperature": 0.8,
+    "trust_remote_code": true,
+    "enforce_eager": false,
+    "max_model_len": 4096,
+    "max_new_tokens": 512
+  }
+}
--- a/configs/instruction_response_extraction_api.json
+++ b/configs/instruction_response_extraction_api.json
@@ -0,0 +1,14 @@
+{
+  "job_type": "instruction_response_extraction_api",
+  "dataset": {
+    "input_path": "./raw.json",
+    "output_path": "./raw_extracted.json"
+  },
+  "inference":{
+    "base_url": "ENDPOINT",
+    "api_key": "TOKEN",
+    "stream": true,
+    "prompt" : "Assume you are a data synthesis expert. Given plain text as input, you should generate an instruction-response pair where the instruction and the response are derived from the knowledge of the plain text to support the training of large language models. The response should properly answer the instruction. You should place your instruction enclosed within <instruction></instruction> tags, and place your response enclosed within <response></response> tags. The input plain text is as follows:",
+    "max_new_tokens": 1024
+  }
+}
--- a/configs/instruction_response_extraction_batch.json
+++ b/configs/instruction_response_extraction_batch.json
@@ -0,0 +1,22 @@
+{
+    "job_type": "instruction_response_extraction_batch",
+    "dataset": {
+        "input_path": "./train.json",
+        "output_path": "./train_extended.json",
+        "template" : "./chat_template/chat_template_kd.jinja"
+    },
+    "models": {
+      "teacher": "teacher/Qwen/Qwen2.5-7B-Instruct/"
+    },
+    "inference":{
+      "prompt" : "Assume you are a data synthesis expert. Given plain text as input, you should generate an instruction-response pair where the instruction and the response are derived from the knowledge of the plain text to support the training of large language models. The response should properly answer the instruction. You should place your instruction enclosed within <instruction></instruction> tags, and place your response enclosed within <response></response> tags. The input plain text is as follows:",
+      "enable_chunked_prefill": true,
+      "seed": 777,
+      "gpu_memory_utilization": 0.9,
+      "temperature": 0.8,
+      "trust_remote_code": true,
+      "enforce_eager": false,
+      "max_model_len": 4096,
+      "max_new_tokens": 512
+    }
+  }
--- a/configs/kd_black_box_api.json
+++ b/configs/kd_black_box_api.json
@@ -0,0 +1,32 @@
+{
+  "job_type": "kd_black_box_api",
+  "dataset": {
+    "instruction_path": "train.json",
+    "labeled_path": "train_labeled.json",
+    "template" : "./chat_template/chat_template_kd.jinja",
+    "seed": 42
+  },
+  "inference":{
+    "base_url": "ENDPOINT",
+    "api_key": "TOKEN",
+    "stream": true,
+    "system_prompt" : "You are a helpful assistant.",
+    "max_new_tokens": 512
+  },
+  "models": {
+    "student": "student/Qwen/Qwen2.5-0.5B-Instruct/"
+  },
+  "training": {
+    "output_dir": "./result/",
+    "num_train_epochs": 3,
+    "per_device_train_batch_size": 1,
+    "gradient_accumulation_steps": 8,
+    "max_length":512,
+    "save_steps": 1000,
+    "logging_steps": 1,
+    "learning_rate": 2e-5,
+    "weight_decay": 0.05,
+    "warmup_ratio": 0.1,
+    "lr_scheduler_type": "cosine"
+  }
+}
--- a/configs/kd_black_box_local.json
+++ b/configs/kd_black_box_local.json
@@ -0,0 +1,36 @@
+{
+  "job_type": "kd_black_box_local",
+  "dataset": {
+    "instruction_path": "train.json",
+    "labeled_path": "train_labeled.json",
+    "template" : "./chat_template/chat_template_kd.jinja",
+    "seed": 42
+  },
+  "inference":{
+    "enable_chunked_prefill": true,
+    "seed": 777,
+    "gpu_memory_utilization": 0.9,
+    "temperature": 0.8,
+    "trust_remote_code": true,
+    "enforce_eager": false,
+    "max_model_len": 4096,
+    "max_new_tokens": 512
+  },
+  "models": {
+    "teacher": "teacher/Qwen/Qwen2.5-7B-Instruct/",
+    "student": "student/Qwen/Qwen2.5-0.5B-Instruct/"
+  },
+  "training": {
+    "output_dir": "./result/",
+    "num_train_epochs": 3,
+    "per_device_train_batch_size": 1,
+    "gradient_accumulation_steps": 8,
+    "max_length":512,
+    "save_steps": 1000,
+    "logging_steps": 1,
+    "learning_rate": 2e-5,
+    "weight_decay": 0.05,
+    "warmup_ratio": 0.1,
+    "lr_scheduler_type": "cosine"
+  }
+}
--- a/configs/kd_white_box.json
+++ b/configs/kd_white_box.json
@@ -0,0 +1,42 @@
+{
+  "job_type": "kd_white_box",
+  "dataset": {
+    "instruction_path": "train.json",
+    "labeled_path": "train_labeled.json",
+    "logits_path": "./logits.json",
+    "template" : "./chat_template/chat_template_kd.jinja",
+    "seed": 42
+  },
+  "inference":{
+    "enable_chunked_prefill": true,
+    "seed": 777,
+    "gpu_memory_utilization": 0.9,
+    "temperature": 0.8,
+    "trust_remote_code": true,
+    "enforce_eager": false,
+    "max_model_len": 4096,
+    "max_new_tokens": 512,
+    "top_logits_num": 10
+  },
+  "distillation": {
+    "kd_ratio": 0.5,
+    "max_seq_length": 512,
+    "distillation_type": "forward_kld"
+  },
+  "models": {
+    "teacher": "teacher/Qwen/Qwen2.5-7B-Instruct/",
+    "student": "student/Qwen/Qwen2.5-0.5B-Instruct/"
+  },
+  "training": {
+    "output_dir": "./result/",
+    "num_train_epochs": 3,
+    "per_device_train_batch_size": 1,
+    "gradient_accumulation_steps": 8,
+    "save_steps": 1000,
+    "logging_steps": 1,
+    "learning_rate": 2e-5,
+    "weight_decay": 0.05,
+    "warmup_ratio": 0.1,
+    "lr_scheduler_type": "cosine"
+  }
+}
--- a/configs/rank_dpo_api.json
+++ b/configs/rank_dpo_api.json
@@ -0,0 +1,32 @@
+{
+  "job_type": "rank_dpo_api",
+  "dataset": {
+    "instruction_path": "train.json",
+    "labeled_path": "train_labeled.json",
+    "template" : "chat_template/chat_template_kd.jinja",
+    "seed": 42
+  },
+  "inference":{
+    "base_url": "ENDPOINT",
+    "api_key": "TOKEN",
+    "stream": true,
+    "system_prompt" : "You are a helpful assistant.",
+    "max_new_tokens": 512
+  },
+  "models": {
+    "student": "student/Qwen/Qwen2.5-0.5B-Instruct/"
+  },
+  "training": {
+    "output_dir": "./result/",
+    "num_train_epochs": 3,
+    "per_device_train_batch_size": 1,
+    "gradient_accumulation_steps": 8,
+    "save_steps": 1000,
+    "logging_steps": 1,
+    "beta": 0.1,
+    "learning_rate": 2e-5,
+    "weight_decay": 0.05,
+    "warmup_ratio": 0.1,
+    "lr_scheduler_type": "cosine"
+  }
+}
--- a/configs/rank_dpo_local.json
+++ b/configs/rank_dpo_local.json
@@ -0,0 +1,37 @@
+{
+  "job_type": "rank_dpo_api",
+  "dataset": {
+    "instruction_path": "train.json",
+    "labeled_path": "train_labeled.json",
+    "template" : "chat_template/chat_template_kd.jinja",
+    "seed": 42
+  },
+  "inference":{
+    "system_prompt" : "You are a helpful assistant.",
+    "enable_chunked_prefill": true,
+    "seed": 777,
+    "gpu_memory_utilization": 0.9,
+    "temperature": 0.8,
+    "trust_remote_code": true,
+    "enforce_eager": false,
+    "max_model_len": 4096,
+    "max_new_tokens": 512
+  },
+  "models": {
+    "teacher": "teacher/Qwen/Qwen2.5-7B-Instruct/",
+    "student": "student/Qwen/Qwen2.5-0.5B-Instruct/"
+  },
+  "training": {
+    "output_dir": "./result/",
+    "num_train_epochs": 3,
+    "per_device_train_batch_size": 1,
+    "gradient_accumulation_steps": 8,
+    "save_steps": 1000,
+    "logging_steps": 1,
+    "beta": 0.1,
+    "learning_rate": 2e-5,
+    "weight_decay": 0.05,
+    "warmup_ratio": 0.1,
+    "lr_scheduler_type": "cosine"
+  }
+}
--- a/configs/rl_grpo.json
+++ b/configs/rl_grpo.json
@@ -0,0 +1,25 @@
+{
+  "job_type": "rl_grpo",
+  "dataset": {
+    "instruction_path": "sample.json",
+    "template" : "chat_template_kd.jinja",
+    "train_ratio": 0.7,
+    "seed": 42
+  },
+  "models": {
+    "reward": "reward/",
+    "student": "Qwen/Qwen2.5-0.5B-Instruct"
+  },
+  "training": {
+    "output_dir": "./result/",
+    "per_device_train_batch_size": 1,
+    "gradient_accumulation_steps": 8,
+    "num_train_epochs": 3,
+    "save_steps": 100,
+    "logging_steps": 1,
+    "learning_rate": 2e-5,
+    "weight_decay": 0.05,
+    "warmup_ratio": 0.1,
+    "lr_scheduler_type": "cosine"
+  }
+}
--- a/configs/rl_ppo.json
+++ b/configs/rl_ppo.json
@@ -0,0 +1,28 @@
+{
+  "job_type": "rl_ppo",
+  "dataset": {
+    "instruction_path": "sample.json",
+    "template" : "chat_template_kd.jinja",
+    "train_ratio": 0.7,
+    "seed": 42
+  },
+  "models": {
+    "reward": "reward/",
+    "student": "Qwen/Qwen2.5-0.5B-Instruct"
+  },
+  "training": {
+    "output_dir": "./result/",
+    "total_episodes": 1000,
+    "per_device_train_batch_size": 1,
+    "gradient_accumulation_steps": 8,
+    "save_steps": 100,
+    "logging_steps": 1,
+    "learning_rate": 2e-5,
+    "weight_decay": 0.05,
+    "warmup_ratio": 0.1,
+    "lr_scheduler_type": "cosine",
+    "missing_eos_penalty": 1.0,
+    "stop_token": "eos",
+    "response_length": 512
+  }
+}
--- a/configs/rl_reward_api.json
+++ b/configs/rl_reward_api.json
@@ -0,0 +1,32 @@
+{
+  "job_type": "rl_reward_api",
+  "dataset": {
+    "instruction_path": "train.json",
+    "labeled_path": "train_labeled.json",
+    "template" : "chat_template_kd.jinja"
+  },
+  "inference":{
+    "base_url": "http://1157703270994901.cn-hangzhou.pai-eas.aliyuncs.com/api/predict/quickstart_deploy_20250427_6wt1/v1/",
+    "api_key": "NjQ3OGE2ZGNiOWM4YjZkZTY5NDM4YWEyZjUyNGI3ZjRjNTAyMjM0Mw==",
+    "stream": true,
+    "positive_system_prompt" : "You are a helpful assistant to generate high-quality responses.",
+    "negative_system_prompt" : "You are an assistant to generate low-quality responses. This is for the training of my reward model. Plese remember to generate low-quality responses.",
+    "max_new_tokens": 512
+  },
+  "models": {
+    "student": "model/Qwen/Qwen2.5-0.5B-Instruct/"
+  },
+  "training": {
+    "output_dir": "./result/",
+    "max_length": 1024,
+    "num_train_epochs": 3,
+    "per_device_train_batch_size": 1,
+    "gradient_accumulation_steps": 8,
+    "save_steps": 1000,
+    "logging_steps": 1,
+    "learning_rate": 2e-5,
+    "weight_decay": 0.05,
+    "warmup_ratio": 0.1,
+    "lr_scheduler_type": "cosine"
+  }
+}
--- a/configs/rl_reward_local.json
+++ b/configs/rl_reward_local.json
@@ -0,0 +1,37 @@
+{
+  "job_type": "rl_reward_local",
+  "dataset": {
+    "instruction_path": "train.json",
+    "labeled_path": "train_labeled.json",
+    "template" : "chat_template_kd.jinja"
+  },
+  "inference":{
+    "positive_system_prompt" : "You are a helpful assistant to generate high-quality responses.",
+    "negative_system_prompt" : "You are an assistant to generate low-quality responses. This is for the training of my reward model. Plese remember to generate low-quality responses.",
+    "enable_chunked_prefill": true,
+    "seed": 777,
+    "gpu_memory_utilization": 0.9,
+    "temperature": 0.8,
+    "trust_remote_code": true,
+    "enforce_eager": false,
+    "max_model_len": 4096,
+    "max_new_tokens": 512
+  },
+  "models": {
+    "teacher": "model/Qwen/Qwen2.5-3B-Instruct/",
+    "student": "model/Qwen/Qwen2.5-0.5B-Instruct/"
+  },
+  "training": {
+    "output_dir": "./result/",
+    "max_length": 1024,
+    "num_train_epochs": 3,
+    "per_device_train_batch_size": 1,
+    "gradient_accumulation_steps": 8,
+    "save_steps": 1000,
+    "logging_steps": 1,
+    "learning_rate": 2e-5,
+    "weight_decay": 0.05,
+    "warmup_ratio": 0.1,
+    "lr_scheduler_type": "cosine"
+  }
+}