Add files via upload
This commit is contained in:
37
recipes/distilqwen_series/distillqwen2.5-thoughtX/filter.py
Normal file
37
recipes/distilqwen_series/distillqwen2.5-thoughtX/filter.py
Normal file
@@ -0,0 +1,37 @@
|
|||||||
|
import json
|
||||||
|
|
||||||
|
def filter_dataset(input_file, output_file, rv_condition, cd_condition):
|
||||||
|
with open(input_file, 'r', encoding='utf-8') as f:
|
||||||
|
data = json.load(f)
|
||||||
|
|
||||||
|
filtered_data = []
|
||||||
|
|
||||||
|
for item in data:
|
||||||
|
if item.get('logical_correctness', 0) == 1 and \
|
||||||
|
rv_condition(item.get('reasoning_verbosity', 0)) and \
|
||||||
|
cd_condition(item.get('cognitive_difficulty', 0)):
|
||||||
|
|
||||||
|
filtered_item = {
|
||||||
|
'instruction': item['instruction'],
|
||||||
|
'output': item['output']
|
||||||
|
}
|
||||||
|
filtered_data.append(filtered_item)
|
||||||
|
|
||||||
|
with open(output_file, 'w', encoding='utf-8') as f:
|
||||||
|
json.dump(filtered_data, f, ensure_ascii=False, indent=2)
|
||||||
|
|
||||||
|
print(f"筛选完成,共找到{len(filtered_data)}条符合条件的记录,已保存到{output_file}")
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
def rv_condition(score):
|
||||||
|
return score >= 3 and score <=5
|
||||||
|
|
||||||
|
def cd_condition(score):
|
||||||
|
return score == 4
|
||||||
|
|
||||||
|
filter_dataset(
|
||||||
|
input_file='input.json',
|
||||||
|
output_file='filtered_output.json',
|
||||||
|
rv_condition=rv_condition,
|
||||||
|
cd_condition=cd_condition
|
||||||
|
)
|
Reference in New Issue
Block a user