init commit

This commit is contained in:
熊兮
2025-05-27 18:55:46 +08:00
parent 6f52a67249
commit 25caa8a90a
65 changed files with 4893 additions and 1 deletions

View File

@@ -0,0 +1,19 @@
compute_environment: LOCAL_MACHINE
debug: false
deepspeed_config:
gradient_clipping: 1.0
offload_optimizer_device: cpu
offload_param_device: cpu
zero_stage: 2
distributed_type: DEEPSPEED
gpu_ids: all
machine_rank: 0
main_training_function: main
num_machines: 1
num_processes: 8
rdzv_backend: static
same_network: true
tpu_env: []
tpu_use_cluster: false
tpu_use_sudo: false
use_cpu: false

View File

@@ -0,0 +1,8 @@
{{'<|im_start|>system\nYou are a helpful assistant.<|im_end|>'}}
{{'<|im_start|>user\n' + message['content'] + '<|im_end|>'-}}
{% if add_generation_prompt %}
{{'<|im_start|>assistant'-}}
{% endif %}
{% if add_output %}
{{'<|im_start|>assistant\n' + message['output'] + '<|im_end|>-'}}
{% endif %}

View File

@@ -0,0 +1,14 @@
{
"job_type": "cot_generation_api",
"dataset": {
"input_path": "./cot_question.json",
"output_path": "./cot_question_with_answer.json"
},
"inference":{
"base_url": "ENDPOINT",
"api_key": "TOKEN",
"stream": true,
"prompt" : "Your role as an assistant involves thoroughly exploring questions through a systematic long thinking process before providing the final precise and accurate solutions. This requires engaging in a comprehensive cycle of analysis, summarizing, exploration, reassessment, reflection, backtracing, and iteration to develop well-considered thinking process. Please structure your response into two main sections: Thought and Solution. In the Thought section, detail your reasoning process using the specified format: <|begin_of_thought|> {thought with steps separated with '\n\n'} <|end_of_thought|> Each step should include detailed considerations such as analisying questions, summarizing relevant findings, brainstorming new ideas, verifying the accuracy of the current steps, refining any errors, and revisiting previous steps. In the Solution section, based on various attempts, explorations, and reflections from the Thought section, systematically present the final solution that you deem correct. The solution should remain a logical, accurate, concise expression style and detail necessary step needed to reach the conclusion, formatted as follows: <|begin_of_solution|> {final formatted, precise, and clear solution} <|end_of_solution|> Now, try to solve the following question through the above guidelines:",
"max_new_tokens": 1024
}
}

View File

@@ -0,0 +1,22 @@
{
"job_type": "cot_generation_batch",
"dataset": {
"input_path": "./cot_question.json",
"output_path": "./cot_question_with_answer.json",
"template" : "./chat_template/chat_template_kd.jinja"
},
"models": {
"teacher": "teacher/Qwen/Qwen2.5-7B-Instruct/"
},
"inference":{
"prompt" : "Your role as an assistant involves thoroughly exploring questions through a systematic long thinking process before providing the final precise and accurate solutions. This requires engaging in a comprehensive cycle of analysis, summarizing, exploration, reassessment, reflection, backtracing, and iteration to develop well-considered thinking process. Please structure your response into two main sections: Thought and Solution. In the Thought section, detail your reasoning process using the specified format: <|begin_of_thought|> {thought with steps separated with '\n\n'} <|end_of_thought|> Each step should include detailed considerations such as analisying questions, summarizing relevant findings, brainstorming new ideas, verifying the accuracy of the current steps, refining any errors, and revisiting previous steps. In the Solution section, based on various attempts, explorations, and reflections from the Thought section, systematically present the final solution that you deem correct. The solution should remain a logical, accurate, concise expression style and detail necessary step needed to reach the conclusion, formatted as follows: <|begin_of_solution|> {final formatted, precise, and clear solution} <|end_of_solution|> Now, try to solve the following question through the above guidelines:",
"enable_chunked_prefill": true,
"seed": 777,
"gpu_memory_utilization": 0.9,
"temperature": 0.8,
"trust_remote_code": true,
"enforce_eager": false,
"max_model_len": 4096,
"max_new_tokens": 512
}
}

View File

@@ -0,0 +1,14 @@
{
"job_type": "cot_long2short_api",
"dataset": {
"input_path": "./raw.json",
"output_path": "./raw_simplified.json"
},
"inference":{
"base_url": "ENDPOINT",
"api_key": "TOKEN",
"stream": true,
"prompt" : "You are a helpful assistant who is highly skilled at simplifying reasoning processes. Given a problem, its answer and its reasoning process, your task is to simplify the reasoning process so that a small language model (e.g., a 7B model) can reliably follow the steps to solve the problem. If the original reasoning process is divided into multiple steps separated by two newline characters (\n\n), your output must preserve this formatting. You must output ONLY the simplified reasoning process with no additional explanation or commentary.",
"max_new_tokens": 1024
}
}

View File

@@ -0,0 +1,22 @@
{
"job_type": "cot_long2short_batch",
"dataset": {
"input_path": "./train.json",
"output_path": "./train_simplified.json",
"template" : "./chat_template/chat_template_kd.jinja"
},
"models": {
"teacher": "teacher/Qwen/Qwen2.5-7B-Instruct/"
},
"inference":{
"prompt" : "You are a helpful assistant who is highly skilled at simplifying reasoning processes. Given a problem, its answer and its reasoning process, your task is to simplify the reasoning process so that a small language model (e.g., a 7B model) can reliably follow the steps to solve the problem. If the original reasoning process is divided into multiple steps separated by two newline characters (\n\n), your output must preserve this formatting. You must output ONLY the simplified reasoning process with no additional explanation or commentary.",
"enable_chunked_prefill": true,
"seed": 777,
"gpu_memory_utilization": 0.9,
"temperature": 0.8,
"trust_remote_code": true,
"enforce_eager": false,
"max_model_len": 4096,
"max_new_tokens": 512
}
}

View File

@@ -0,0 +1,14 @@
{
"job_type": "cot_short2long_api",
"dataset": {
"input_path": "./raw.json",
"output_path": "./raw_extended.json"
},
"inference":{
"base_url": "ENDPOINT",
"api_key": "TOKEN",
"stream": true,
"prompt" : "You are a helpful assistant who is highly skilled at extending reasoning processes. Given a problem ,its answer and its reasoning process, your task is to extend the reasoning process by adding necessary details and intermediate steps, so that a small language model (e.g., a 7B model) can follow the extended reasoning process to solve the problem. If the original reasoning process is divided into multiple steps separated by two newline characters (\\n\\n), your output must preserve this formatting. You must output ONLY the extended reasoning process with no additional explanation or commentary.",
"max_new_tokens": 1024
}
}

View File

@@ -0,0 +1,22 @@
{
"job_type": "cot_short2long_batch",
"dataset": {
"input_path": "./train.json",
"output_path": "./train_extended.json",
"template" : "./chat_template/chat_template_kd.jinja"
},
"models": {
"teacher": "teacher/Qwen/Qwen2.5-7B-Instruct/"
},
"inference":{
"prompt" : "You are a helpful assistant who is highly skilled at extending reasoning processes. Given a problem ,its answer and its reasoning process, your task is to extend the reasoning process by adding necessary details and intermediate steps, so that a small language model (e.g., a 7B model) can follow the extended reasoning process to solve the problem. If the original reasoning process is divided into multiple steps separated by two newline characters (\\n\\n), your output must preserve this formatting. You must output ONLY the extended reasoning process with no additional explanation or commentary.",
"enable_chunked_prefill": true,
"seed": 777,
"gpu_memory_utilization": 0.9,
"temperature": 0.8,
"trust_remote_code": true,
"enforce_eager": false,
"max_model_len": 4096,
"max_new_tokens": 512
}
}

View File

@@ -0,0 +1,16 @@
{
"job_type": "instruction_expansion_api",
"dataset": {
"input_path": "./train.json",
"output_path": "./train_extended.json",
"num_in_context_samples": 3,
"num_output_samples": 10
},
"inference":{
"base_url": "ENDPOINT",
"api_key": "TOKEN",
"stream": true,
"prompt" : "Assume you are a data synthesis expert. Given a few instructions as in-context examples, you should generate a new instruction similar to the examples to support the training of large language models. You should place your answer enclosed within <answer></answer> tags. The examples are as follows:",
"max_new_tokens": 512
}
}

View File

@@ -0,0 +1,24 @@
{
"job_type": "instruction_expansion_batch",
"dataset": {
"input_path": "./train.json",
"output_path": "./train_extended.json",
"template" : "./chat_template/chat_template_kd.jinja",
"num_in_context_samples": 3,
"num_output_samples": 10
},
"models": {
"teacher": "teacher/Qwen/Qwen2.5-7B-Instruct/"
},
"inference":{
"prompt" : "Assume you are a data synthesis expert. Given a few instructions as in-context examples, you should generate a new instruction similar to the examples to support the training of large language models. You should place your answer enclosed within <answer></answer> tags. The examples are as follows:",
"enable_chunked_prefill": true,
"seed": 777,
"gpu_memory_utilization": 0.9,
"temperature": 0.8,
"trust_remote_code": true,
"enforce_eager": false,
"max_model_len": 4096,
"max_new_tokens": 512
}
}

View File

@@ -0,0 +1,14 @@
{
"job_type": "instruction_refinement_api",
"dataset": {
"input_path": "./train.json",
"output_path": "./train_refined.json"
},
"inference":{
"base_url": "ENDPOINT",
"api_key": "TOKEN",
"stream": true,
"prompt" : "Assume you are a prompt re-writing expert. Given an instruction as input, you should generate a new instruction semantically similar to the input to support the training of large language models. Transform the input raw prompt into a detailed prompt that comprehensively captures the users request. Make sure to maintain the original intent while significantly enhancing clarity and depth. You should place your answer enclosed within <answer></answer> tags. The input prompt is as follows:",
"max_new_tokens": 512
}
}

View File

@@ -0,0 +1,22 @@
{
"job_type": "instruction_refinement_batch",
"dataset": {
"input_path": "./train.json",
"output_path": "./train_refined.json",
"template" : "./chat_template/chat_template_kd.jinja"
},
"models": {
"teacher": "teacher/Qwen/Qwen2.5-7B-Instruct/"
},
"inference": {
"prompt" : "Assume you are a prompt re-writing expert. Given an instruction as input, you should generate a new instruction semantically similar to the input to support the training of large language models. Transform the input raw prompt into a detailed prompt that comprehensively captures the users request. Make sure to maintain the original intent while significantly enhancing clarity and depth. You should place your answer enclosed within <answer></answer> tags. The input prompt is as follows:",
"enable_chunked_prefill": true,
"seed": 777,
"gpu_memory_utilization": 0.9,
"temperature": 0.8,
"trust_remote_code": true,
"enforce_eager": false,
"max_model_len": 4096,
"max_new_tokens": 512
}
}

View File

@@ -0,0 +1,14 @@
{
"job_type": "instruction_response_extraction_api",
"dataset": {
"input_path": "./raw.json",
"output_path": "./raw_extracted.json"
},
"inference":{
"base_url": "ENDPOINT",
"api_key": "TOKEN",
"stream": true,
"prompt" : "Assume you are a data synthesis expert. Given plain text as input, you should generate an instruction-response pair where the instruction and the response are derived from the knowledge of the plain text to support the training of large language models. The response should properly answer the instruction. You should place your instruction enclosed within <instruction></instruction> tags, and place your response enclosed within <response></response> tags. The input plain text is as follows:",
"max_new_tokens": 1024
}
}

View File

@@ -0,0 +1,22 @@
{
"job_type": "instruction_response_extraction_batch",
"dataset": {
"input_path": "./train.json",
"output_path": "./train_extended.json",
"template" : "./chat_template/chat_template_kd.jinja"
},
"models": {
"teacher": "teacher/Qwen/Qwen2.5-7B-Instruct/"
},
"inference":{
"prompt" : "Assume you are a data synthesis expert. Given plain text as input, you should generate an instruction-response pair where the instruction and the response are derived from the knowledge of the plain text to support the training of large language models. The response should properly answer the instruction. You should place your instruction enclosed within <instruction></instruction> tags, and place your response enclosed within <response></response> tags. The input plain text is as follows:",
"enable_chunked_prefill": true,
"seed": 777,
"gpu_memory_utilization": 0.9,
"temperature": 0.8,
"trust_remote_code": true,
"enforce_eager": false,
"max_model_len": 4096,
"max_new_tokens": 512
}
}

View File

@@ -0,0 +1,32 @@
{
"job_type": "kd_black_box_api",
"dataset": {
"instruction_path": "train.json",
"labeled_path": "train_labeled.json",
"template" : "./chat_template/chat_template_kd.jinja",
"seed": 42
},
"inference":{
"base_url": "ENDPOINT",
"api_key": "TOKEN",
"stream": true,
"system_prompt" : "You are a helpful assistant.",
"max_new_tokens": 512
},
"models": {
"student": "student/Qwen/Qwen2.5-0.5B-Instruct/"
},
"training": {
"output_dir": "./result/",
"num_train_epochs": 3,
"per_device_train_batch_size": 1,
"gradient_accumulation_steps": 8,
"max_length":512,
"save_steps": 1000,
"logging_steps": 1,
"learning_rate": 2e-5,
"weight_decay": 0.05,
"warmup_ratio": 0.1,
"lr_scheduler_type": "cosine"
}
}

View File

@@ -0,0 +1,36 @@
{
"job_type": "kd_black_box_local",
"dataset": {
"instruction_path": "train.json",
"labeled_path": "train_labeled.json",
"template" : "./chat_template/chat_template_kd.jinja",
"seed": 42
},
"inference":{
"enable_chunked_prefill": true,
"seed": 777,
"gpu_memory_utilization": 0.9,
"temperature": 0.8,
"trust_remote_code": true,
"enforce_eager": false,
"max_model_len": 4096,
"max_new_tokens": 512
},
"models": {
"teacher": "teacher/Qwen/Qwen2.5-7B-Instruct/",
"student": "student/Qwen/Qwen2.5-0.5B-Instruct/"
},
"training": {
"output_dir": "./result/",
"num_train_epochs": 3,
"per_device_train_batch_size": 1,
"gradient_accumulation_steps": 8,
"max_length":512,
"save_steps": 1000,
"logging_steps": 1,
"learning_rate": 2e-5,
"weight_decay": 0.05,
"warmup_ratio": 0.1,
"lr_scheduler_type": "cosine"
}
}

42
configs/kd_white_box.json Normal file
View File

@@ -0,0 +1,42 @@
{
"job_type": "kd_white_box",
"dataset": {
"instruction_path": "train.json",
"labeled_path": "train_labeled.json",
"logits_path": "./logits.json",
"template" : "./chat_template/chat_template_kd.jinja",
"seed": 42
},
"inference":{
"enable_chunked_prefill": true,
"seed": 777,
"gpu_memory_utilization": 0.9,
"temperature": 0.8,
"trust_remote_code": true,
"enforce_eager": false,
"max_model_len": 4096,
"max_new_tokens": 512,
"top_logits_num": 10
},
"distillation": {
"kd_ratio": 0.5,
"max_seq_length": 512,
"distillation_type": "forward_kld"
},
"models": {
"teacher": "teacher/Qwen/Qwen2.5-7B-Instruct/",
"student": "student/Qwen/Qwen2.5-0.5B-Instruct/"
},
"training": {
"output_dir": "./result/",
"num_train_epochs": 3,
"per_device_train_batch_size": 1,
"gradient_accumulation_steps": 8,
"save_steps": 1000,
"logging_steps": 1,
"learning_rate": 2e-5,
"weight_decay": 0.05,
"warmup_ratio": 0.1,
"lr_scheduler_type": "cosine"
}
}

32
configs/rank_dpo_api.json Normal file
View File

@@ -0,0 +1,32 @@
{
"job_type": "rank_dpo_api",
"dataset": {
"instruction_path": "train.json",
"labeled_path": "train_labeled.json",
"template" : "chat_template/chat_template_kd.jinja",
"seed": 42
},
"inference":{
"base_url": "ENDPOINT",
"api_key": "TOKEN",
"stream": true,
"system_prompt" : "You are a helpful assistant.",
"max_new_tokens": 512
},
"models": {
"student": "student/Qwen/Qwen2.5-0.5B-Instruct/"
},
"training": {
"output_dir": "./result/",
"num_train_epochs": 3,
"per_device_train_batch_size": 1,
"gradient_accumulation_steps": 8,
"save_steps": 1000,
"logging_steps": 1,
"beta": 0.1,
"learning_rate": 2e-5,
"weight_decay": 0.05,
"warmup_ratio": 0.1,
"lr_scheduler_type": "cosine"
}
}

View File

@@ -0,0 +1,37 @@
{
"job_type": "rank_dpo_api",
"dataset": {
"instruction_path": "train.json",
"labeled_path": "train_labeled.json",
"template" : "chat_template/chat_template_kd.jinja",
"seed": 42
},
"inference":{
"system_prompt" : "You are a helpful assistant.",
"enable_chunked_prefill": true,
"seed": 777,
"gpu_memory_utilization": 0.9,
"temperature": 0.8,
"trust_remote_code": true,
"enforce_eager": false,
"max_model_len": 4096,
"max_new_tokens": 512
},
"models": {
"teacher": "teacher/Qwen/Qwen2.5-7B-Instruct/",
"student": "student/Qwen/Qwen2.5-0.5B-Instruct/"
},
"training": {
"output_dir": "./result/",
"num_train_epochs": 3,
"per_device_train_batch_size": 1,
"gradient_accumulation_steps": 8,
"save_steps": 1000,
"logging_steps": 1,
"beta": 0.1,
"learning_rate": 2e-5,
"weight_decay": 0.05,
"warmup_ratio": 0.1,
"lr_scheduler_type": "cosine"
}
}

25
configs/rl_grpo.json Normal file
View File

@@ -0,0 +1,25 @@
{
"job_type": "rl_grpo",
"dataset": {
"instruction_path": "sample.json",
"template" : "chat_template_kd.jinja",
"train_ratio": 0.7,
"seed": 42
},
"models": {
"reward": "reward/",
"student": "Qwen/Qwen2.5-0.5B-Instruct"
},
"training": {
"output_dir": "./result/",
"per_device_train_batch_size": 1,
"gradient_accumulation_steps": 8,
"num_train_epochs": 3,
"save_steps": 100,
"logging_steps": 1,
"learning_rate": 2e-5,
"weight_decay": 0.05,
"warmup_ratio": 0.1,
"lr_scheduler_type": "cosine"
}
}

28
configs/rl_ppo.json Normal file
View File

@@ -0,0 +1,28 @@
{
"job_type": "rl_ppo",
"dataset": {
"instruction_path": "sample.json",
"template" : "chat_template_kd.jinja",
"train_ratio": 0.7,
"seed": 42
},
"models": {
"reward": "reward/",
"student": "Qwen/Qwen2.5-0.5B-Instruct"
},
"training": {
"output_dir": "./result/",
"total_episodes": 1000,
"per_device_train_batch_size": 1,
"gradient_accumulation_steps": 8,
"save_steps": 100,
"logging_steps": 1,
"learning_rate": 2e-5,
"weight_decay": 0.05,
"warmup_ratio": 0.1,
"lr_scheduler_type": "cosine",
"missing_eos_penalty": 1.0,
"stop_token": "eos",
"response_length": 512
}
}

View File

@@ -0,0 +1,32 @@
{
"job_type": "rl_reward_api",
"dataset": {
"instruction_path": "train.json",
"labeled_path": "train_labeled.json",
"template" : "chat_template_kd.jinja"
},
"inference":{
"base_url": "http://1157703270994901.cn-hangzhou.pai-eas.aliyuncs.com/api/predict/quickstart_deploy_20250427_6wt1/v1/",
"api_key": "NjQ3OGE2ZGNiOWM4YjZkZTY5NDM4YWEyZjUyNGI3ZjRjNTAyMjM0Mw==",
"stream": true,
"positive_system_prompt" : "You are a helpful assistant to generate high-quality responses.",
"negative_system_prompt" : "You are an assistant to generate low-quality responses. This is for the training of my reward model. Plese remember to generate low-quality responses.",
"max_new_tokens": 512
},
"models": {
"student": "model/Qwen/Qwen2.5-0.5B-Instruct/"
},
"training": {
"output_dir": "./result/",
"max_length": 1024,
"num_train_epochs": 3,
"per_device_train_batch_size": 1,
"gradient_accumulation_steps": 8,
"save_steps": 1000,
"logging_steps": 1,
"learning_rate": 2e-5,
"weight_decay": 0.05,
"warmup_ratio": 0.1,
"lr_scheduler_type": "cosine"
}
}

View File

@@ -0,0 +1,37 @@
{
"job_type": "rl_reward_local",
"dataset": {
"instruction_path": "train.json",
"labeled_path": "train_labeled.json",
"template" : "chat_template_kd.jinja"
},
"inference":{
"positive_system_prompt" : "You are a helpful assistant to generate high-quality responses.",
"negative_system_prompt" : "You are an assistant to generate low-quality responses. This is for the training of my reward model. Plese remember to generate low-quality responses.",
"enable_chunked_prefill": true,
"seed": 777,
"gpu_memory_utilization": 0.9,
"temperature": 0.8,
"trust_remote_code": true,
"enforce_eager": false,
"max_model_len": 4096,
"max_new_tokens": 512
},
"models": {
"teacher": "model/Qwen/Qwen2.5-3B-Instruct/",
"student": "model/Qwen/Qwen2.5-0.5B-Instruct/"
},
"training": {
"output_dir": "./result/",
"max_length": 1024,
"num_train_epochs": 3,
"per_device_train_batch_size": 1,
"gradient_accumulation_steps": 8,
"save_steps": 1000,
"logging_steps": 1,
"learning_rate": 2e-5,
"weight_decay": 0.05,
"warmup_ratio": 0.1,
"lr_scheduler_type": "cosine"
}
}