init commit

2025-05-27 18:55:46 +08:00
parent 6f52a67249
commit 25caa8a90a
65 changed files with 4893 additions and 1 deletions
--- a/configs/instruction_response_extraction_api.json
+++ b/configs/instruction_response_extraction_api.json
@@ -0,0 +1,14 @@
+{
+  "job_type": "instruction_response_extraction_api",
+  "dataset": {
+    "input_path": "./raw.json",
+    "output_path": "./raw_extracted.json"
+  },
+  "inference":{
+    "base_url": "ENDPOINT",
+    "api_key": "TOKEN",
+    "stream": true,
+    "prompt" : "Assume you are a data synthesis expert. Given plain text as input, you should generate an instruction-response pair where the instruction and the response are derived from the knowledge of the plain text to support the training of large language models. The response should properly answer the instruction. You should place your instruction enclosed within <instruction></instruction> tags, and place your response enclosed within <response></response> tags. The input plain text is as follows:",
+    "max_new_tokens": 1024
+  }
+}