[Init] Init easy distill for Knowledge distillation

2025-08-07 08:38:26 +00:00
parent 2f21aaae17
commit 0637599c3a
19 changed files with 170614 additions and 3 deletions
--- a/easydistill/mmkd/create_vqa.py
+++ b/easydistill/mmkd/create_vqa.py
@@ -0,0 +1,53 @@
+import json
+from tqdm import tqdm
+from PIL import Image
+
+def get_total_pixels(image_paths):
+    total_pixels = 0
+    for path in image_paths:
+        try:
+            with Image.open(path) as img:
+                width, height = img.size
+                total_pixels += width * height
+        except Exception as e:
+            print(f"Error processing {path}: {e}")
+    return total_pixels
+
+json_label_path = "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/docai_mgp_facture_v2_1.json"
+
+with open(json_label_path) as file:
+    json_data = json.load(file)
+
+home_dir = "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/"
+# create VQA using json_data
+vqa = []
+for item in tqdm(json_data):
+    conversations = []
+    system_object = {
+        "role": "system",
+        "content": item["conversations"][0]["value"]
+    }
+    conversations.append(system_object)
+    image_paths = [home_dir + image_path for image_path in item["images"]]
+    # if get_total_pixels(image_paths) > 3200000:
+    #     continue
+    image_contents = [{"type": "image", "image": image_path} for image_path in image_paths]
+    # user content is shown in each odd position
+    for i in range(1, len(item["conversations"])):
+        if i%2 == 1:
+            user_object = {
+                "role": "user",
+                "content": image_contents + [{"type": "text", "text": item["conversations"][i]["value"]}]
+            }
+            conversations.append(user_object)
+        else:
+            assistant_object = {
+                "role": "assistant",
+                "content": item["conversations"][i]["value"]
+            }
+            conversations.append(assistant_object)
+    vqa.append(conversations)
+
+# save vqa to json file
+with open("vqa.json", "w") as file:
+    json.dump(vqa, file,indent=4)