add
This commit is contained in:
58
models/LLaVA/build/lib/llava/data/alpaca-converter.py
Normal file
58
models/LLaVA/build/lib/llava/data/alpaca-converter.py
Normal file
@@ -0,0 +1,58 @@
|
||||
import argparse
|
||||
import json
|
||||
import pathlib
|
||||
|
||||
# Prompt from stanford alpaca's training script
|
||||
PROMPT_DICT = {
|
||||
"prompt_input": (
|
||||
"Below is an instruction that describes a task, paired with an input that provides further context. "
|
||||
"Write a response that appropriately completes the request.\n\n"
|
||||
"### Instruction:\n{instruction}\n\n### Input:\n{input}\n\n### Response:"
|
||||
),
|
||||
"prompt_no_input": (
|
||||
"Below is an instruction that describes a task. "
|
||||
"Write a response that appropriately completes the request.\n\n"
|
||||
"### Instruction:\n{instruction}\n\n### Response:"
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
def main(args):
|
||||
data_path = pathlib.Path(args.data_path)
|
||||
with data_path.open() as f:
|
||||
data = json.load(f)
|
||||
|
||||
prompt_input, prompt_no_input = PROMPT_DICT["prompt_input"], PROMPT_DICT["prompt_no_input"]
|
||||
sources = [
|
||||
prompt_input.format_map(example) if example.get("input", "") != "" else prompt_no_input.format_map(example)
|
||||
for example in data
|
||||
]
|
||||
targets = [example['output'] for example in data]
|
||||
|
||||
new_data = []
|
||||
cnt = 1
|
||||
for s, t in zip(sources, targets):
|
||||
new_data.append({
|
||||
'id': str(cnt),
|
||||
'conversations': [
|
||||
{
|
||||
'from': 'human',
|
||||
'value': s,
|
||||
},
|
||||
{
|
||||
'from': 'gpt',
|
||||
'value': t,
|
||||
}
|
||||
]
|
||||
})
|
||||
cnt += 1
|
||||
|
||||
json.dump(new_data, open(args.output_path, 'w'), indent=2)
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--data_path', type=str, default='alpaca-data.json')
|
||||
parser.add_argument('--output_path', type=str, default='alpaca-data-conversation.json')
|
||||
args = parser.parse_args()
|
||||
main(args)
|
||||
|
Reference in New Issue
Block a user