53 lines
1.8 KiB
Python
53 lines
1.8 KiB
Python
|
import json
|
||
|
from tqdm import tqdm
|
||
|
from PIL import Image
|
||
|
|
||
|
def get_total_pixels(image_paths):
|
||
|
total_pixels = 0
|
||
|
for path in image_paths:
|
||
|
try:
|
||
|
with Image.open(path) as img:
|
||
|
width, height = img.size
|
||
|
total_pixels += width * height
|
||
|
except Exception as e:
|
||
|
print(f"Error processing {path}: {e}")
|
||
|
return total_pixels
|
||
|
|
||
|
json_label_path = "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/docai_mgp_facture_v2_1.json"
|
||
|
|
||
|
with open(json_label_path) as file:
|
||
|
json_data = json.load(file)
|
||
|
|
||
|
home_dir = "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/"
|
||
|
# create VQA using json_data
|
||
|
vqa = []
|
||
|
for item in tqdm(json_data):
|
||
|
conversations = []
|
||
|
system_object = {
|
||
|
"role": "system",
|
||
|
"content": item["conversations"][0]["value"]
|
||
|
}
|
||
|
conversations.append(system_object)
|
||
|
image_paths = [home_dir + image_path for image_path in item["images"]]
|
||
|
# if get_total_pixels(image_paths) > 3200000:
|
||
|
# continue
|
||
|
image_contents = [{"type": "image", "image": image_path} for image_path in image_paths]
|
||
|
# user content is shown in each odd position
|
||
|
for i in range(1, len(item["conversations"])):
|
||
|
if i%2 == 1:
|
||
|
user_object = {
|
||
|
"role": "user",
|
||
|
"content": image_contents + [{"type": "text", "text": item["conversations"][i]["value"]}]
|
||
|
}
|
||
|
conversations.append(user_object)
|
||
|
else:
|
||
|
assistant_object = {
|
||
|
"role": "assistant",
|
||
|
"content": item["conversations"][i]["value"]
|
||
|
}
|
||
|
conversations.append(assistant_object)
|
||
|
vqa.append(conversations)
|
||
|
|
||
|
# save vqa to json file
|
||
|
with open("vqa.json", "w") as file:
|
||
|
json.dump(vqa, file,indent=4)
|