fix: form with multi-part python, np format for model

This commit is contained in:
kiennt
2025-08-17 14:12:26 +00:00
parent 147c52de71
commit e09128f94c
3 changed files with 26 additions and 10 deletions

24
app.py
View File

@@ -5,7 +5,7 @@ from pathlib import Path
from typing import Literal, Optional
import base64
import cv2
import numpy as np
import supervision as sv
import uvicorn
import yaml
@@ -24,7 +24,7 @@ PROCESSED_FOLDER = Path(os.environ.get("TEMP_IMG_FOLDER", "temp_dir"))
PROCESSED_FOLDER.mkdir(parents=True, exist_ok=True)
BASE_URL = "http://127.0.0.1:8000"
API_PARTNER_KEY = os.environ.get("API_PARTNER_KEY", "")
API_PARTNER_KEY = os.environ.get("API_PARTNER_KEY", "dev-AfghDgr3fgf74vc")
API_KEY_NAME = "x-api-key"
api_key_header = APIKeyHeader(name=API_KEY_NAME, auto_error=False)
@@ -38,10 +38,14 @@ app.add_middleware(
allow_headers=["*"],
)
pdf_converter: PdfConverter = PdfConverter()
pdf_converter: PdfConverter = PdfConverter(dpi=150)
grounding_model = Model(
model_config_path=os.environ.get("GROUNDING_DINO_CONFIG"),
model_checkpoint_path=os.environ.get("GROUNDING_DINO_CHECKPOINT"),
model_config_path=os.environ.get(
"GROUNDING_DINO_CONFIG", "groundingdino/config/GroundingDINO_SwinT_OGC.py"
),
model_checkpoint_path=os.environ.get(
"GROUNDING_DINO_CHECKPOINT", "gdino_checkpoints/groundingdino_swint_ogc.pth"
),
device="cuda:0",
)
BOX_THRESHOLD = 0.4
@@ -112,8 +116,8 @@ async def api_key_middleware(request: Request, call_next):
@app.post("/crop_ooi")
async def crop_object_of_interest(
document_file: Optional[UploadFile] = File(
None, description="The document to process."
document_file: UploadFile = File(
..., description="The document to process."
),
concept_list: Optional[list[str]] = Form(
["ID document"], description="List of concepts to detect e.g. dog, cat, rain"
@@ -144,7 +148,7 @@ async def crop_object_of_interest(
detection_img_list = []
for image in images:
detections, labels = grounding_model.predict_with_caption(
image=images,
image=np.asarray(image),
caption=caption,
box_threshold=box_threshold,
text_threshold=text_threshold,
@@ -158,7 +162,7 @@ async def crop_object_of_interest(
]
for i, bbox in enumerate(detections.xyxy):
x_min, y_min, x_max, y_max = tuple(bbox)
patch = image[int(y_min) : int(y_max), int(x_min) : int(x_max)]
patch = image.crop((int(y_min), int(y_max), int(x_min), int(x_max)))
image_json = output_img(processed_img=patch, output=output)
image_list.append(image_json)
@@ -179,7 +183,7 @@ async def crop_object_of_interest(
except Exception as e:
print(f"{e}")
return JSONResponse(status_code=500, content=str(e))
return JSONResponse(status_code=501, content=str(e))
if __name__ == "__main__":