distillation/configs/instruction_expansion_batch.json

{
  "job_type": "instruction_expansion_batch",
  "dataset": {
    "input_path": "./train.json",
    "output_path": "./train_extended.json",
    "template" : "./chat_template/chat_template_kd.jinja",
    "num_in_context_samples": 3,
    "num_output_samples": 10
  },
  "models": {
    "teacher": "teacher/Qwen/Qwen2.5-7B-Instruct/"
  },
  "inference":{
    "prompt" : "Assume you are a data synthesis expert. Given a few instructions as in-context examples, you should generate a new instruction similar to the examples to support the training of large language models. You should place your answer enclosed within <answer></answer> tags. The examples are as follows:",
    "enable_chunked_prefill": true,
    "seed": 777,
    "gpu_memory_utilization": 0.9,
    "temperature": 0.8,
    "trust_remote_code": true,
    "enforce_eager": false,
    "max_model_len": 4096,
    "max_new_tokens": 512
  }
}
init commit 2025-05-27 18:55:46 +08:00			`{`
			`"job_type": "instruction_expansion_batch",`
			`"dataset": {`
			`"input_path": "./train.json",`
			`"output_path": "./train_extended.json",`
			`"template" : "./chat_template/chat_template_kd.jinja",`
			`"num_in_context_samples": 3,`
			`"num_output_samples": 10`
			`},`
			`"models": {`
			`"teacher": "teacher/Qwen/Qwen2.5-7B-Instruct/"`
			`},`
			`"inference":{`
			`"prompt" : "Assume you are a data synthesis expert. Given a few instructions as in-context examples, you should generate a new instruction similar to the examples to support the training of large language models. You should place your answer enclosed within <answer></answer> tags. The examples are as follows:",`
			`"enable_chunked_prefill": true,`
			`"seed": 777,`
			`"gpu_memory_utilization": 0.9,`
			`"temperature": 0.8,`
			`"trust_remote_code": true,`
			`"enforce_eager": false,`
			`"max_model_len": 4096,`
			`"max_new_tokens": 512`
			`}`
			`}`