distillation/easydistill/mmkd/create_vqa.py

import json
from tqdm import tqdm
from PIL import Image

def get_total_pixels(image_paths):
    total_pixels = 0
    for path in image_paths:
        try:
            with Image.open(path) as img:
                width, height = img.size
                total_pixels += width * height
        except Exception as e:
            print(f"Error processing {path}: {e}")
    return total_pixels

json_label_path = "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/docai_mgp_facture_v2_1.json"

with open(json_label_path) as file:
    json_data = json.load(file)

home_dir = "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/"
# create VQA using json_data
vqa = []
for item in tqdm(json_data):
    conversations = []
    system_object = {
        "role": "system",
        "content": item["conversations"][0]["value"]
    }
    conversations.append(system_object)
    image_paths = [home_dir + image_path for image_path in item["images"]]
    # if get_total_pixels(image_paths) > 3200000:
    #     continue
    image_contents = [{"type": "image", "image": image_path} for image_path in image_paths]
    # user content is shown in each odd position
    for i in range(1, len(item["conversations"])):
        if i%2 == 1:
            user_object = {
                "role": "user",
                "content": image_contents + [{"type": "text", "text": item["conversations"][i]["value"]}]
            }
            conversations.append(user_object)
        else:
            assistant_object = {
                "role": "assistant",
                "content": item["conversations"][i]["value"]
            }
            conversations.append(assistant_object)
    vqa.append(conversations)

# save vqa to json file
with open("vqa.json", "w") as file:
    json.dump(vqa, file,indent=4)
[Init] Init easy distill for Knowledge distillation 2025-08-07 08:38:26 +00:00			`import json`
			`from tqdm import tqdm`
			`from PIL import Image`

			`def get_total_pixels(image_paths):`
			`total_pixels = 0`
			`for path in image_paths:`
			`try:`
			`with Image.open(path) as img:`
			`width, height = img.size`
			`total_pixels += width * height`
			`except Exception as e:`
			`print(f"Error processing {path}: {e}")`
			`return total_pixels`

			`json_label_path = "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/docai_mgp_facture_v2_1.json"`

			`with open(json_label_path) as file:`
			`json_data = json.load(file)`

			`home_dir = "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/"`
			`# create VQA using json_data`
			`vqa = []`
			`for item in tqdm(json_data):`
			`conversations = []`
			`system_object = {`
			`"role": "system",`
			`"content": item["conversations"][0]["value"]`
			`}`
			`conversations.append(system_object)`
			`image_paths = [home_dir + image_path for image_path in item["images"]]`
			`# if get_total_pixels(image_paths) > 3200000:`
			`# continue`
			`image_contents = [{"type": "image", "image": image_path} for image_path in image_paths]`
			`# user content is shown in each odd position`
			`for i in range(1, len(item["conversations"])):`
			`if i%2 == 1:`
			`user_object = {`
			`"role": "user",`
			`"content": image_contents + [{"type": "text", "text": item["conversations"][i]["value"]}]`
			`}`
			`conversations.append(user_object)`
			`else:`
			`assistant_object = {`
			`"role": "assistant",`
			`"content": item["conversations"][i]["value"]`
			`}`
			`conversations.append(assistant_object)`
			`vqa.append(conversations)`

			`# save vqa to json file`
			`with open("vqa.json", "w") as file:`
			`json.dump(vqa, file,indent=4)`