distillation/configs/instruction_response_extraction_api.json

{
  "job_type": "instruction_response_extraction_api",
  "dataset": {
    "input_path": "./raw.json",
    "output_path": "./raw_extracted.json"
  },
  "inference":{
    "base_url": "ENDPOINT",
    "api_key": "TOKEN",
    "stream": true,
    "prompt" : "Assume you are a data synthesis expert. Given plain text as input, you should generate an instruction-response pair where the instruction and the response are derived from the knowledge of the plain text to support the training of large language models. The response should properly answer the instruction. You should place your instruction enclosed within <instruction></instruction> tags, and place your response enclosed within <response></response> tags. The input plain text is as follows:",
    "max_new_tokens": 1024
  }
}
init commit 2025-05-27 18:55:46 +08:00			`{`
			`"job_type": "instruction_response_extraction_api",`
			`"dataset": {`
			`"input_path": "./raw.json",`
			`"output_path": "./raw_extracted.json"`
			`},`
			`"inference":{`
			`"base_url": "ENDPOINT",`
			`"api_key": "TOKEN",`
			`"stream": true,`
			"prompt" : "Assume you are a data synthesis expert. Given plain text as input, you should generate an instruction-response pair where the instruction and the response are derived from the knowledge of the plain text to support the training of large language models. The response should properly answer the instruction. You should place your instruction enclosed within <instruction></instruction> tags, and place your response enclosed within <response></response> tags. The input plain text is as follows:",
			`"max_new_tokens": 1024`
			`}`
			`}`