init commit
This commit is contained in:
19
configs/accelerate_config/muti_gpu.yaml
Normal file
19
configs/accelerate_config/muti_gpu.yaml
Normal file
@@ -0,0 +1,19 @@
|
||||
compute_environment: LOCAL_MACHINE
|
||||
debug: false
|
||||
deepspeed_config:
|
||||
gradient_clipping: 1.0
|
||||
offload_optimizer_device: cpu
|
||||
offload_param_device: cpu
|
||||
zero_stage: 2
|
||||
distributed_type: DEEPSPEED
|
||||
gpu_ids: all
|
||||
machine_rank: 0
|
||||
main_training_function: main
|
||||
num_machines: 1
|
||||
num_processes: 8
|
||||
rdzv_backend: static
|
||||
same_network: true
|
||||
tpu_env: []
|
||||
tpu_use_cluster: false
|
||||
tpu_use_sudo: false
|
||||
use_cpu: false
|
8
configs/chat_template/chat_template_kd.jinja
Normal file
8
configs/chat_template/chat_template_kd.jinja
Normal file
@@ -0,0 +1,8 @@
|
||||
{{'<|im_start|>system\nYou are a helpful assistant.<|im_end|>'}}
|
||||
{{'<|im_start|>user\n' + message['content'] + '<|im_end|>'-}}
|
||||
{% if add_generation_prompt %}
|
||||
{{'<|im_start|>assistant'-}}
|
||||
{% endif %}
|
||||
{% if add_output %}
|
||||
{{'<|im_start|>assistant\n' + message['output'] + '<|im_end|>-'}}
|
||||
{% endif %}
|
14
configs/cot_generation_api.json
Normal file
14
configs/cot_generation_api.json
Normal file
@@ -0,0 +1,14 @@
|
||||
{
|
||||
"job_type": "cot_generation_api",
|
||||
"dataset": {
|
||||
"input_path": "./cot_question.json",
|
||||
"output_path": "./cot_question_with_answer.json"
|
||||
},
|
||||
"inference":{
|
||||
"base_url": "ENDPOINT",
|
||||
"api_key": "TOKEN",
|
||||
"stream": true,
|
||||
"prompt" : "Your role as an assistant involves thoroughly exploring questions through a systematic long thinking process before providing the final precise and accurate solutions. This requires engaging in a comprehensive cycle of analysis, summarizing, exploration, reassessment, reflection, backtracing, and iteration to develop well-considered thinking process. Please structure your response into two main sections: Thought and Solution. In the Thought section, detail your reasoning process using the specified format: <|begin_of_thought|> {thought with steps separated with '\n\n'} <|end_of_thought|> Each step should include detailed considerations such as analisying questions, summarizing relevant findings, brainstorming new ideas, verifying the accuracy of the current steps, refining any errors, and revisiting previous steps. In the Solution section, based on various attempts, explorations, and reflections from the Thought section, systematically present the final solution that you deem correct. The solution should remain a logical, accurate, concise expression style and detail necessary step needed to reach the conclusion, formatted as follows: <|begin_of_solution|> {final formatted, precise, and clear solution} <|end_of_solution|> Now, try to solve the following question through the above guidelines:",
|
||||
"max_new_tokens": 1024
|
||||
}
|
||||
}
|
22
configs/cot_generation_batch.json
Normal file
22
configs/cot_generation_batch.json
Normal file
@@ -0,0 +1,22 @@
|
||||
{
|
||||
"job_type": "cot_generation_batch",
|
||||
"dataset": {
|
||||
"input_path": "./cot_question.json",
|
||||
"output_path": "./cot_question_with_answer.json",
|
||||
"template" : "./chat_template/chat_template_kd.jinja"
|
||||
},
|
||||
"models": {
|
||||
"teacher": "teacher/Qwen/Qwen2.5-7B-Instruct/"
|
||||
},
|
||||
"inference":{
|
||||
"prompt" : "Your role as an assistant involves thoroughly exploring questions through a systematic long thinking process before providing the final precise and accurate solutions. This requires engaging in a comprehensive cycle of analysis, summarizing, exploration, reassessment, reflection, backtracing, and iteration to develop well-considered thinking process. Please structure your response into two main sections: Thought and Solution. In the Thought section, detail your reasoning process using the specified format: <|begin_of_thought|> {thought with steps separated with '\n\n'} <|end_of_thought|> Each step should include detailed considerations such as analisying questions, summarizing relevant findings, brainstorming new ideas, verifying the accuracy of the current steps, refining any errors, and revisiting previous steps. In the Solution section, based on various attempts, explorations, and reflections from the Thought section, systematically present the final solution that you deem correct. The solution should remain a logical, accurate, concise expression style and detail necessary step needed to reach the conclusion, formatted as follows: <|begin_of_solution|> {final formatted, precise, and clear solution} <|end_of_solution|> Now, try to solve the following question through the above guidelines:",
|
||||
"enable_chunked_prefill": true,
|
||||
"seed": 777,
|
||||
"gpu_memory_utilization": 0.9,
|
||||
"temperature": 0.8,
|
||||
"trust_remote_code": true,
|
||||
"enforce_eager": false,
|
||||
"max_model_len": 4096,
|
||||
"max_new_tokens": 512
|
||||
}
|
||||
}
|
14
configs/cot_long2short_api.json
Normal file
14
configs/cot_long2short_api.json
Normal file
@@ -0,0 +1,14 @@
|
||||
{
|
||||
"job_type": "cot_long2short_api",
|
||||
"dataset": {
|
||||
"input_path": "./raw.json",
|
||||
"output_path": "./raw_simplified.json"
|
||||
},
|
||||
"inference":{
|
||||
"base_url": "ENDPOINT",
|
||||
"api_key": "TOKEN",
|
||||
"stream": true,
|
||||
"prompt" : "You are a helpful assistant who is highly skilled at simplifying reasoning processes. Given a problem, its answer and its reasoning process, your task is to simplify the reasoning process so that a small language model (e.g., a 7B model) can reliably follow the steps to solve the problem. If the original reasoning process is divided into multiple steps separated by two newline characters (\n\n), your output must preserve this formatting. You must output ONLY the simplified reasoning process with no additional explanation or commentary.",
|
||||
"max_new_tokens": 1024
|
||||
}
|
||||
}
|
22
configs/cot_long2short_batch.json
Normal file
22
configs/cot_long2short_batch.json
Normal file
@@ -0,0 +1,22 @@
|
||||
{
|
||||
"job_type": "cot_long2short_batch",
|
||||
"dataset": {
|
||||
"input_path": "./train.json",
|
||||
"output_path": "./train_simplified.json",
|
||||
"template" : "./chat_template/chat_template_kd.jinja"
|
||||
},
|
||||
"models": {
|
||||
"teacher": "teacher/Qwen/Qwen2.5-7B-Instruct/"
|
||||
},
|
||||
"inference":{
|
||||
"prompt" : "You are a helpful assistant who is highly skilled at simplifying reasoning processes. Given a problem, its answer and its reasoning process, your task is to simplify the reasoning process so that a small language model (e.g., a 7B model) can reliably follow the steps to solve the problem. If the original reasoning process is divided into multiple steps separated by two newline characters (\n\n), your output must preserve this formatting. You must output ONLY the simplified reasoning process with no additional explanation or commentary.",
|
||||
"enable_chunked_prefill": true,
|
||||
"seed": 777,
|
||||
"gpu_memory_utilization": 0.9,
|
||||
"temperature": 0.8,
|
||||
"trust_remote_code": true,
|
||||
"enforce_eager": false,
|
||||
"max_model_len": 4096,
|
||||
"max_new_tokens": 512
|
||||
}
|
||||
}
|
14
configs/cot_short2long_api.json
Normal file
14
configs/cot_short2long_api.json
Normal file
@@ -0,0 +1,14 @@
|
||||
{
|
||||
"job_type": "cot_short2long_api",
|
||||
"dataset": {
|
||||
"input_path": "./raw.json",
|
||||
"output_path": "./raw_extended.json"
|
||||
},
|
||||
"inference":{
|
||||
"base_url": "ENDPOINT",
|
||||
"api_key": "TOKEN",
|
||||
"stream": true,
|
||||
"prompt" : "You are a helpful assistant who is highly skilled at extending reasoning processes. Given a problem ,its answer and its reasoning process, your task is to extend the reasoning process by adding necessary details and intermediate steps, so that a small language model (e.g., a 7B model) can follow the extended reasoning process to solve the problem. If the original reasoning process is divided into multiple steps separated by two newline characters (\\n\\n), your output must preserve this formatting. You must output ONLY the extended reasoning process with no additional explanation or commentary.",
|
||||
"max_new_tokens": 1024
|
||||
}
|
||||
}
|
22
configs/cot_short2long_batch.json
Normal file
22
configs/cot_short2long_batch.json
Normal file
@@ -0,0 +1,22 @@
|
||||
{
|
||||
"job_type": "cot_short2long_batch",
|
||||
"dataset": {
|
||||
"input_path": "./train.json",
|
||||
"output_path": "./train_extended.json",
|
||||
"template" : "./chat_template/chat_template_kd.jinja"
|
||||
},
|
||||
"models": {
|
||||
"teacher": "teacher/Qwen/Qwen2.5-7B-Instruct/"
|
||||
},
|
||||
"inference":{
|
||||
"prompt" : "You are a helpful assistant who is highly skilled at extending reasoning processes. Given a problem ,its answer and its reasoning process, your task is to extend the reasoning process by adding necessary details and intermediate steps, so that a small language model (e.g., a 7B model) can follow the extended reasoning process to solve the problem. If the original reasoning process is divided into multiple steps separated by two newline characters (\\n\\n), your output must preserve this formatting. You must output ONLY the extended reasoning process with no additional explanation or commentary.",
|
||||
"enable_chunked_prefill": true,
|
||||
"seed": 777,
|
||||
"gpu_memory_utilization": 0.9,
|
||||
"temperature": 0.8,
|
||||
"trust_remote_code": true,
|
||||
"enforce_eager": false,
|
||||
"max_model_len": 4096,
|
||||
"max_new_tokens": 512
|
||||
}
|
||||
}
|
16
configs/instruction_expansion_api.json
Normal file
16
configs/instruction_expansion_api.json
Normal file
@@ -0,0 +1,16 @@
|
||||
{
|
||||
"job_type": "instruction_expansion_api",
|
||||
"dataset": {
|
||||
"input_path": "./train.json",
|
||||
"output_path": "./train_extended.json",
|
||||
"num_in_context_samples": 3,
|
||||
"num_output_samples": 10
|
||||
},
|
||||
"inference":{
|
||||
"base_url": "ENDPOINT",
|
||||
"api_key": "TOKEN",
|
||||
"stream": true,
|
||||
"prompt" : "Assume you are a data synthesis expert. Given a few instructions as in-context examples, you should generate a new instruction similar to the examples to support the training of large language models. You should place your answer enclosed within <answer></answer> tags. The examples are as follows:",
|
||||
"max_new_tokens": 512
|
||||
}
|
||||
}
|
24
configs/instruction_expansion_batch.json
Normal file
24
configs/instruction_expansion_batch.json
Normal file
@@ -0,0 +1,24 @@
|
||||
{
|
||||
"job_type": "instruction_expansion_batch",
|
||||
"dataset": {
|
||||
"input_path": "./train.json",
|
||||
"output_path": "./train_extended.json",
|
||||
"template" : "./chat_template/chat_template_kd.jinja",
|
||||
"num_in_context_samples": 3,
|
||||
"num_output_samples": 10
|
||||
},
|
||||
"models": {
|
||||
"teacher": "teacher/Qwen/Qwen2.5-7B-Instruct/"
|
||||
},
|
||||
"inference":{
|
||||
"prompt" : "Assume you are a data synthesis expert. Given a few instructions as in-context examples, you should generate a new instruction similar to the examples to support the training of large language models. You should place your answer enclosed within <answer></answer> tags. The examples are as follows:",
|
||||
"enable_chunked_prefill": true,
|
||||
"seed": 777,
|
||||
"gpu_memory_utilization": 0.9,
|
||||
"temperature": 0.8,
|
||||
"trust_remote_code": true,
|
||||
"enforce_eager": false,
|
||||
"max_model_len": 4096,
|
||||
"max_new_tokens": 512
|
||||
}
|
||||
}
|
14
configs/instruction_refinement_api.json
Normal file
14
configs/instruction_refinement_api.json
Normal file
@@ -0,0 +1,14 @@
|
||||
{
|
||||
"job_type": "instruction_refinement_api",
|
||||
"dataset": {
|
||||
"input_path": "./train.json",
|
||||
"output_path": "./train_refined.json"
|
||||
},
|
||||
"inference":{
|
||||
"base_url": "ENDPOINT",
|
||||
"api_key": "TOKEN",
|
||||
"stream": true,
|
||||
"prompt" : "Assume you are a prompt re-writing expert. Given an instruction as input, you should generate a new instruction semantically similar to the input to support the training of large language models. Transform the input raw prompt into a detailed prompt that comprehensively captures the user’s request. Make sure to maintain the original intent while significantly enhancing clarity and depth. You should place your answer enclosed within <answer></answer> tags. The input prompt is as follows:",
|
||||
"max_new_tokens": 512
|
||||
}
|
||||
}
|
22
configs/instruction_refinement_batch.json
Normal file
22
configs/instruction_refinement_batch.json
Normal file
@@ -0,0 +1,22 @@
|
||||
{
|
||||
"job_type": "instruction_refinement_batch",
|
||||
"dataset": {
|
||||
"input_path": "./train.json",
|
||||
"output_path": "./train_refined.json",
|
||||
"template" : "./chat_template/chat_template_kd.jinja"
|
||||
},
|
||||
"models": {
|
||||
"teacher": "teacher/Qwen/Qwen2.5-7B-Instruct/"
|
||||
},
|
||||
"inference": {
|
||||
"prompt" : "Assume you are a prompt re-writing expert. Given an instruction as input, you should generate a new instruction semantically similar to the input to support the training of large language models. Transform the input raw prompt into a detailed prompt that comprehensively captures the user’s request. Make sure to maintain the original intent while significantly enhancing clarity and depth. You should place your answer enclosed within <answer></answer> tags. The input prompt is as follows:",
|
||||
"enable_chunked_prefill": true,
|
||||
"seed": 777,
|
||||
"gpu_memory_utilization": 0.9,
|
||||
"temperature": 0.8,
|
||||
"trust_remote_code": true,
|
||||
"enforce_eager": false,
|
||||
"max_model_len": 4096,
|
||||
"max_new_tokens": 512
|
||||
}
|
||||
}
|
14
configs/instruction_response_extraction_api.json
Normal file
14
configs/instruction_response_extraction_api.json
Normal file
@@ -0,0 +1,14 @@
|
||||
{
|
||||
"job_type": "instruction_response_extraction_api",
|
||||
"dataset": {
|
||||
"input_path": "./raw.json",
|
||||
"output_path": "./raw_extracted.json"
|
||||
},
|
||||
"inference":{
|
||||
"base_url": "ENDPOINT",
|
||||
"api_key": "TOKEN",
|
||||
"stream": true,
|
||||
"prompt" : "Assume you are a data synthesis expert. Given plain text as input, you should generate an instruction-response pair where the instruction and the response are derived from the knowledge of the plain text to support the training of large language models. The response should properly answer the instruction. You should place your instruction enclosed within <instruction></instruction> tags, and place your response enclosed within <response></response> tags. The input plain text is as follows:",
|
||||
"max_new_tokens": 1024
|
||||
}
|
||||
}
|
22
configs/instruction_response_extraction_batch.json
Normal file
22
configs/instruction_response_extraction_batch.json
Normal file
@@ -0,0 +1,22 @@
|
||||
{
|
||||
"job_type": "instruction_response_extraction_batch",
|
||||
"dataset": {
|
||||
"input_path": "./train.json",
|
||||
"output_path": "./train_extended.json",
|
||||
"template" : "./chat_template/chat_template_kd.jinja"
|
||||
},
|
||||
"models": {
|
||||
"teacher": "teacher/Qwen/Qwen2.5-7B-Instruct/"
|
||||
},
|
||||
"inference":{
|
||||
"prompt" : "Assume you are a data synthesis expert. Given plain text as input, you should generate an instruction-response pair where the instruction and the response are derived from the knowledge of the plain text to support the training of large language models. The response should properly answer the instruction. You should place your instruction enclosed within <instruction></instruction> tags, and place your response enclosed within <response></response> tags. The input plain text is as follows:",
|
||||
"enable_chunked_prefill": true,
|
||||
"seed": 777,
|
||||
"gpu_memory_utilization": 0.9,
|
||||
"temperature": 0.8,
|
||||
"trust_remote_code": true,
|
||||
"enforce_eager": false,
|
||||
"max_model_len": 4096,
|
||||
"max_new_tokens": 512
|
||||
}
|
||||
}
|
32
configs/kd_black_box_api.json
Normal file
32
configs/kd_black_box_api.json
Normal file
@@ -0,0 +1,32 @@
|
||||
{
|
||||
"job_type": "kd_black_box_api",
|
||||
"dataset": {
|
||||
"instruction_path": "train.json",
|
||||
"labeled_path": "train_labeled.json",
|
||||
"template" : "./chat_template/chat_template_kd.jinja",
|
||||
"seed": 42
|
||||
},
|
||||
"inference":{
|
||||
"base_url": "ENDPOINT",
|
||||
"api_key": "TOKEN",
|
||||
"stream": true,
|
||||
"system_prompt" : "You are a helpful assistant.",
|
||||
"max_new_tokens": 512
|
||||
},
|
||||
"models": {
|
||||
"student": "student/Qwen/Qwen2.5-0.5B-Instruct/"
|
||||
},
|
||||
"training": {
|
||||
"output_dir": "./result/",
|
||||
"num_train_epochs": 3,
|
||||
"per_device_train_batch_size": 1,
|
||||
"gradient_accumulation_steps": 8,
|
||||
"max_length":512,
|
||||
"save_steps": 1000,
|
||||
"logging_steps": 1,
|
||||
"learning_rate": 2e-5,
|
||||
"weight_decay": 0.05,
|
||||
"warmup_ratio": 0.1,
|
||||
"lr_scheduler_type": "cosine"
|
||||
}
|
||||
}
|
36
configs/kd_black_box_local.json
Normal file
36
configs/kd_black_box_local.json
Normal file
@@ -0,0 +1,36 @@
|
||||
{
|
||||
"job_type": "kd_black_box_local",
|
||||
"dataset": {
|
||||
"instruction_path": "train.json",
|
||||
"labeled_path": "train_labeled.json",
|
||||
"template" : "./chat_template/chat_template_kd.jinja",
|
||||
"seed": 42
|
||||
},
|
||||
"inference":{
|
||||
"enable_chunked_prefill": true,
|
||||
"seed": 777,
|
||||
"gpu_memory_utilization": 0.9,
|
||||
"temperature": 0.8,
|
||||
"trust_remote_code": true,
|
||||
"enforce_eager": false,
|
||||
"max_model_len": 4096,
|
||||
"max_new_tokens": 512
|
||||
},
|
||||
"models": {
|
||||
"teacher": "teacher/Qwen/Qwen2.5-7B-Instruct/",
|
||||
"student": "student/Qwen/Qwen2.5-0.5B-Instruct/"
|
||||
},
|
||||
"training": {
|
||||
"output_dir": "./result/",
|
||||
"num_train_epochs": 3,
|
||||
"per_device_train_batch_size": 1,
|
||||
"gradient_accumulation_steps": 8,
|
||||
"max_length":512,
|
||||
"save_steps": 1000,
|
||||
"logging_steps": 1,
|
||||
"learning_rate": 2e-5,
|
||||
"weight_decay": 0.05,
|
||||
"warmup_ratio": 0.1,
|
||||
"lr_scheduler_type": "cosine"
|
||||
}
|
||||
}
|
42
configs/kd_white_box.json
Normal file
42
configs/kd_white_box.json
Normal file
@@ -0,0 +1,42 @@
|
||||
{
|
||||
"job_type": "kd_white_box",
|
||||
"dataset": {
|
||||
"instruction_path": "train.json",
|
||||
"labeled_path": "train_labeled.json",
|
||||
"logits_path": "./logits.json",
|
||||
"template" : "./chat_template/chat_template_kd.jinja",
|
||||
"seed": 42
|
||||
},
|
||||
"inference":{
|
||||
"enable_chunked_prefill": true,
|
||||
"seed": 777,
|
||||
"gpu_memory_utilization": 0.9,
|
||||
"temperature": 0.8,
|
||||
"trust_remote_code": true,
|
||||
"enforce_eager": false,
|
||||
"max_model_len": 4096,
|
||||
"max_new_tokens": 512,
|
||||
"top_logits_num": 10
|
||||
},
|
||||
"distillation": {
|
||||
"kd_ratio": 0.5,
|
||||
"max_seq_length": 512,
|
||||
"distillation_type": "forward_kld"
|
||||
},
|
||||
"models": {
|
||||
"teacher": "teacher/Qwen/Qwen2.5-7B-Instruct/",
|
||||
"student": "student/Qwen/Qwen2.5-0.5B-Instruct/"
|
||||
},
|
||||
"training": {
|
||||
"output_dir": "./result/",
|
||||
"num_train_epochs": 3,
|
||||
"per_device_train_batch_size": 1,
|
||||
"gradient_accumulation_steps": 8,
|
||||
"save_steps": 1000,
|
||||
"logging_steps": 1,
|
||||
"learning_rate": 2e-5,
|
||||
"weight_decay": 0.05,
|
||||
"warmup_ratio": 0.1,
|
||||
"lr_scheduler_type": "cosine"
|
||||
}
|
||||
}
|
32
configs/rank_dpo_api.json
Normal file
32
configs/rank_dpo_api.json
Normal file
@@ -0,0 +1,32 @@
|
||||
{
|
||||
"job_type": "rank_dpo_api",
|
||||
"dataset": {
|
||||
"instruction_path": "train.json",
|
||||
"labeled_path": "train_labeled.json",
|
||||
"template" : "chat_template/chat_template_kd.jinja",
|
||||
"seed": 42
|
||||
},
|
||||
"inference":{
|
||||
"base_url": "ENDPOINT",
|
||||
"api_key": "TOKEN",
|
||||
"stream": true,
|
||||
"system_prompt" : "You are a helpful assistant.",
|
||||
"max_new_tokens": 512
|
||||
},
|
||||
"models": {
|
||||
"student": "student/Qwen/Qwen2.5-0.5B-Instruct/"
|
||||
},
|
||||
"training": {
|
||||
"output_dir": "./result/",
|
||||
"num_train_epochs": 3,
|
||||
"per_device_train_batch_size": 1,
|
||||
"gradient_accumulation_steps": 8,
|
||||
"save_steps": 1000,
|
||||
"logging_steps": 1,
|
||||
"beta": 0.1,
|
||||
"learning_rate": 2e-5,
|
||||
"weight_decay": 0.05,
|
||||
"warmup_ratio": 0.1,
|
||||
"lr_scheduler_type": "cosine"
|
||||
}
|
||||
}
|
37
configs/rank_dpo_local.json
Normal file
37
configs/rank_dpo_local.json
Normal file
@@ -0,0 +1,37 @@
|
||||
{
|
||||
"job_type": "rank_dpo_api",
|
||||
"dataset": {
|
||||
"instruction_path": "train.json",
|
||||
"labeled_path": "train_labeled.json",
|
||||
"template" : "chat_template/chat_template_kd.jinja",
|
||||
"seed": 42
|
||||
},
|
||||
"inference":{
|
||||
"system_prompt" : "You are a helpful assistant.",
|
||||
"enable_chunked_prefill": true,
|
||||
"seed": 777,
|
||||
"gpu_memory_utilization": 0.9,
|
||||
"temperature": 0.8,
|
||||
"trust_remote_code": true,
|
||||
"enforce_eager": false,
|
||||
"max_model_len": 4096,
|
||||
"max_new_tokens": 512
|
||||
},
|
||||
"models": {
|
||||
"teacher": "teacher/Qwen/Qwen2.5-7B-Instruct/",
|
||||
"student": "student/Qwen/Qwen2.5-0.5B-Instruct/"
|
||||
},
|
||||
"training": {
|
||||
"output_dir": "./result/",
|
||||
"num_train_epochs": 3,
|
||||
"per_device_train_batch_size": 1,
|
||||
"gradient_accumulation_steps": 8,
|
||||
"save_steps": 1000,
|
||||
"logging_steps": 1,
|
||||
"beta": 0.1,
|
||||
"learning_rate": 2e-5,
|
||||
"weight_decay": 0.05,
|
||||
"warmup_ratio": 0.1,
|
||||
"lr_scheduler_type": "cosine"
|
||||
}
|
||||
}
|
25
configs/rl_grpo.json
Normal file
25
configs/rl_grpo.json
Normal file
@@ -0,0 +1,25 @@
|
||||
{
|
||||
"job_type": "rl_grpo",
|
||||
"dataset": {
|
||||
"instruction_path": "sample.json",
|
||||
"template" : "chat_template_kd.jinja",
|
||||
"train_ratio": 0.7,
|
||||
"seed": 42
|
||||
},
|
||||
"models": {
|
||||
"reward": "reward/",
|
||||
"student": "Qwen/Qwen2.5-0.5B-Instruct"
|
||||
},
|
||||
"training": {
|
||||
"output_dir": "./result/",
|
||||
"per_device_train_batch_size": 1,
|
||||
"gradient_accumulation_steps": 8,
|
||||
"num_train_epochs": 3,
|
||||
"save_steps": 100,
|
||||
"logging_steps": 1,
|
||||
"learning_rate": 2e-5,
|
||||
"weight_decay": 0.05,
|
||||
"warmup_ratio": 0.1,
|
||||
"lr_scheduler_type": "cosine"
|
||||
}
|
||||
}
|
28
configs/rl_ppo.json
Normal file
28
configs/rl_ppo.json
Normal file
@@ -0,0 +1,28 @@
|
||||
{
|
||||
"job_type": "rl_ppo",
|
||||
"dataset": {
|
||||
"instruction_path": "sample.json",
|
||||
"template" : "chat_template_kd.jinja",
|
||||
"train_ratio": 0.7,
|
||||
"seed": 42
|
||||
},
|
||||
"models": {
|
||||
"reward": "reward/",
|
||||
"student": "Qwen/Qwen2.5-0.5B-Instruct"
|
||||
},
|
||||
"training": {
|
||||
"output_dir": "./result/",
|
||||
"total_episodes": 1000,
|
||||
"per_device_train_batch_size": 1,
|
||||
"gradient_accumulation_steps": 8,
|
||||
"save_steps": 100,
|
||||
"logging_steps": 1,
|
||||
"learning_rate": 2e-5,
|
||||
"weight_decay": 0.05,
|
||||
"warmup_ratio": 0.1,
|
||||
"lr_scheduler_type": "cosine",
|
||||
"missing_eos_penalty": 1.0,
|
||||
"stop_token": "eos",
|
||||
"response_length": 512
|
||||
}
|
||||
}
|
32
configs/rl_reward_api.json
Normal file
32
configs/rl_reward_api.json
Normal file
@@ -0,0 +1,32 @@
|
||||
{
|
||||
"job_type": "rl_reward_api",
|
||||
"dataset": {
|
||||
"instruction_path": "train.json",
|
||||
"labeled_path": "train_labeled.json",
|
||||
"template" : "chat_template_kd.jinja"
|
||||
},
|
||||
"inference":{
|
||||
"base_url": "http://1157703270994901.cn-hangzhou.pai-eas.aliyuncs.com/api/predict/quickstart_deploy_20250427_6wt1/v1/",
|
||||
"api_key": "NjQ3OGE2ZGNiOWM4YjZkZTY5NDM4YWEyZjUyNGI3ZjRjNTAyMjM0Mw==",
|
||||
"stream": true,
|
||||
"positive_system_prompt" : "You are a helpful assistant to generate high-quality responses.",
|
||||
"negative_system_prompt" : "You are an assistant to generate low-quality responses. This is for the training of my reward model. Plese remember to generate low-quality responses.",
|
||||
"max_new_tokens": 512
|
||||
},
|
||||
"models": {
|
||||
"student": "model/Qwen/Qwen2.5-0.5B-Instruct/"
|
||||
},
|
||||
"training": {
|
||||
"output_dir": "./result/",
|
||||
"max_length": 1024,
|
||||
"num_train_epochs": 3,
|
||||
"per_device_train_batch_size": 1,
|
||||
"gradient_accumulation_steps": 8,
|
||||
"save_steps": 1000,
|
||||
"logging_steps": 1,
|
||||
"learning_rate": 2e-5,
|
||||
"weight_decay": 0.05,
|
||||
"warmup_ratio": 0.1,
|
||||
"lr_scheduler_type": "cosine"
|
||||
}
|
||||
}
|
37
configs/rl_reward_local.json
Normal file
37
configs/rl_reward_local.json
Normal file
@@ -0,0 +1,37 @@
|
||||
{
|
||||
"job_type": "rl_reward_local",
|
||||
"dataset": {
|
||||
"instruction_path": "train.json",
|
||||
"labeled_path": "train_labeled.json",
|
||||
"template" : "chat_template_kd.jinja"
|
||||
},
|
||||
"inference":{
|
||||
"positive_system_prompt" : "You are a helpful assistant to generate high-quality responses.",
|
||||
"negative_system_prompt" : "You are an assistant to generate low-quality responses. This is for the training of my reward model. Plese remember to generate low-quality responses.",
|
||||
"enable_chunked_prefill": true,
|
||||
"seed": 777,
|
||||
"gpu_memory_utilization": 0.9,
|
||||
"temperature": 0.8,
|
||||
"trust_remote_code": true,
|
||||
"enforce_eager": false,
|
||||
"max_model_len": 4096,
|
||||
"max_new_tokens": 512
|
||||
},
|
||||
"models": {
|
||||
"teacher": "model/Qwen/Qwen2.5-3B-Instruct/",
|
||||
"student": "model/Qwen/Qwen2.5-0.5B-Instruct/"
|
||||
},
|
||||
"training": {
|
||||
"output_dir": "./result/",
|
||||
"max_length": 1024,
|
||||
"num_train_epochs": 3,
|
||||
"per_device_train_batch_size": 1,
|
||||
"gradient_accumulation_steps": 8,
|
||||
"save_steps": 1000,
|
||||
"logging_steps": 1,
|
||||
"learning_rate": 2e-5,
|
||||
"weight_decay": 0.05,
|
||||
"warmup_ratio": 0.1,
|
||||
"lr_scheduler_type": "cosine"
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user