import json from tqdm import tqdm from PIL import Image def get_total_pixels(image_paths): total_pixels = 0 for path in image_paths: try: with Image.open(path) as img: width, height = img.size total_pixels += width * height except Exception as e: print(f"Error processing {path}: {e}") return total_pixels json_label_path = "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/docai_mgp_facture_v2_1.json" with open(json_label_path) as file: json_data = json.load(file) home_dir = "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/" # create VQA using json_data vqa = [] for item in tqdm(json_data): conversations = [] system_object = { "role": "system", "content": item["conversations"][0]["value"] } conversations.append(system_object) image_paths = [home_dir + image_path for image_path in item["images"]] # if get_total_pixels(image_paths) > 3200000: # continue image_contents = [{"type": "image", "image": image_path} for image_path in image_paths] # user content is shown in each odd position for i in range(1, len(item["conversations"])): if i%2 == 1: user_object = { "role": "user", "content": image_contents + [{"type": "text", "text": item["conversations"][i]["value"]}] } conversations.append(user_object) else: assistant_object = { "role": "assistant", "content": item["conversations"][i]["value"] } conversations.append(assistant_object) vqa.append(conversations) # save vqa to json file with open("vqa.json", "w") as file: json.dump(vqa, file,indent=4)