From a4b27d083507497eb0595f12a61d1e983cf992da Mon Sep 17 00:00:00 2001
From: Andrewyan123 <49424389+Andrewyan123@users.noreply.github.com>
Date: Wed, 25 Jun 2025 17:14:21 +0800
Subject: [PATCH] Add files via upload

---
 .../distillqwen2.5-thoughtX/filter.py         | 37 +++++++++++++++++++
 1 file changed, 37 insertions(+)
 create mode 100644 recipes/distilqwen_series/distillqwen2.5-thoughtX/filter.py

diff --git a/recipes/distilqwen_series/distillqwen2.5-thoughtX/filter.py b/recipes/distilqwen_series/distillqwen2.5-thoughtX/filter.py
new file mode 100644
index 0000000..189f3e4
--- /dev/null
+++ b/recipes/distilqwen_series/distillqwen2.5-thoughtX/filter.py
@@ -0,0 +1,37 @@
+import json
+
+def filter_dataset(input_file, output_file, rv_condition, cd_condition):
+    with open(input_file, 'r', encoding='utf-8') as f:
+        data = json.load(f)
+    
+    filtered_data = []
+    
+    for item in data:
+        if item.get('logical_correctness', 0) == 1 and \
+           rv_condition(item.get('reasoning_verbosity', 0)) and \
+           cd_condition(item.get('cognitive_difficulty', 0)):
+            
+            filtered_item = {
+                'instruction': item['instruction'],
+                'output': item['output']
+            }
+            filtered_data.append(filtered_item)
+    
+    with open(output_file, 'w', encoding='utf-8') as f:
+        json.dump(filtered_data, f, ensure_ascii=False, indent=2)
+    
+    print(f"筛选完成，共找到{len(filtered_data)}条符合条件的记录，已保存到{output_file}")
+
+if __name__ == "__main__":
+    def rv_condition(score):
+        return score >= 3 and score <=5
+    
+    def cd_condition(score):
+        return score == 4
+    
+    filter_dataset(
+        input_file='input.json',  
+        output_file='filtered_output.json',  
+        rv_condition=rv_condition,
+        cd_condition=cd_condition
+    )
\ No newline at end of file