fix: form with multi-part python, np format for model

2025-08-17 14:12:26 +00:00
parent 147c52de71
commit e09128f94c
3 changed files with 26 additions and 10 deletions
--- a/app.py
+++ b/app.py
@@ -5,7 +5,7 @@ from pathlib import Path
 from typing import Literal, Optional
 import base64

-import cv2
+import numpy as np
 import supervision as sv
 import uvicorn
 import yaml
@@ -24,7 +24,7 @@ PROCESSED_FOLDER = Path(os.environ.get("TEMP_IMG_FOLDER", "temp_dir"))
 PROCESSED_FOLDER.mkdir(parents=True, exist_ok=True)
 BASE_URL = "http://127.0.0.1:8000"

-API_PARTNER_KEY = os.environ.get("API_PARTNER_KEY", "")
+API_PARTNER_KEY = os.environ.get("API_PARTNER_KEY", "dev-AfghDgr3fgf74vc")
 API_KEY_NAME = "x-api-key"
 api_key_header = APIKeyHeader(name=API_KEY_NAME, auto_error=False)

@@ -38,10 +38,14 @@ app.add_middleware(
    allow_headers=["*"],
 )

-pdf_converter: PdfConverter = PdfConverter()
+pdf_converter: PdfConverter = PdfConverter(dpi=150)
 grounding_model = Model(
-    model_config_path=os.environ.get("GROUNDING_DINO_CONFIG"),
-    model_checkpoint_path=os.environ.get("GROUNDING_DINO_CHECKPOINT"),
+    model_config_path=os.environ.get(
+        "GROUNDING_DINO_CONFIG", "groundingdino/config/GroundingDINO_SwinT_OGC.py"
+    ),
+    model_checkpoint_path=os.environ.get(
+        "GROUNDING_DINO_CHECKPOINT", "gdino_checkpoints/groundingdino_swint_ogc.pth"
+    ),
    device="cuda:0",
 )
 BOX_THRESHOLD = 0.4
@@ -112,8 +116,8 @@ async def api_key_middleware(request: Request, call_next):

@app.post("/crop_ooi")
 async def crop_object_of_interest(
-    document_file: Optional[UploadFile] = File(
-        None, description="The document to process."
+    document_file: UploadFile = File(
+        ..., description="The document to process."
    ),
    concept_list: Optional[list[str]] = Form(
        ["ID document"], description="List of concepts to detect e.g. dog, cat, rain"
@@ -144,7 +148,7 @@ async def crop_object_of_interest(
        detection_img_list = []
        for image in images:
            detections, labels = grounding_model.predict_with_caption(
-                image=images,
+                image=np.asarray(image),
                caption=caption,
                box_threshold=box_threshold,
                text_threshold=text_threshold,
@@ -158,7 +162,7 @@ async def crop_object_of_interest(
            ]
            for i, bbox in enumerate(detections.xyxy):
                x_min, y_min, x_max, y_max = tuple(bbox)
-                patch = image[int(y_min) : int(y_max), int(x_min) : int(x_max)]
+                patch = image.crop((int(y_min), int(y_max), int(x_min), int(x_max)))
                image_json = output_img(processed_img=patch, output=output)
                image_list.append(image_json)

@@ -179,7 +183,7 @@ async def crop_object_of_interest(

    except Exception as e:
        print(f"{e}")
-        return JSONResponse(status_code=500, content=str(e))
+        return JSONResponse(status_code=501, content=str(e))


 if __name__ == "__main__":