diff --git a/src/model/text_detector/PaddleOCR/.gitignore b/src/model/text_detector/PaddleOCR/.gitignore new file mode 100644 index 0000000..9af7017 --- /dev/null +++ b/src/model/text_detector/PaddleOCR/.gitignore @@ -0,0 +1,32 @@ +# Re-include all image files and JSON within this folder (and subfolders) + +# PNG +!*.png +!**/*.png + +# JPG/JPEG +!*.jpg +!**/*.jpg +!*.jpeg +!**/*.jpeg + +# BMP/GIF/TIFF/WEBP +!*.bmp +!**/*.bmp +!*.gif +!**/*.gif +!*.tif +!**/*.tif +!*.tiff +!**/*.tiff +!*.webp +!**/*.webp + +# JSON +!*.json +!**/*.json + +# Ensure this file itself is tracked +!.gitignore + + diff --git a/src/model/text_detector/PaddleOCR/CNI.png b/src/model/text_detector/PaddleOCR/CNI.png new file mode 100644 index 0000000..b135d54 Binary files /dev/null and b/src/model/text_detector/PaddleOCR/CNI.png differ diff --git a/src/model/text_detector/PaddleOCR/CNI_r.json b/src/model/text_detector/PaddleOCR/CNI_r.json new file mode 100644 index 0000000..f0add32 --- /dev/null +++ b/src/model/text_detector/PaddleOCR/CNI_r.json @@ -0,0 +1,1544 @@ +{ + "image": "C:\\Users\\nguye\\Desktop\\IDcardsGenerator\\src\\model\\text_detector\\im1.png", + "language": "fr", + "num_items": 20, + "items": [ + { + "text": "RÉPUBLIQUE FRANÇASE", + "score": 0.9987003803253174, + "box": [ + [ + 298, + 42 + ], + [ + 1111, + 53 + ], + [ + 1110, + 121 + ], + [ + 297, + 109 + ] + ] + }, + { + "text": "FR", + "score": 0.9999539852142334, + "box": [ + [ + 1232, + 80 + ], + [ + 1292, + 80 + ], + [ + 1292, + 118 + ], + [ + 1232, + 118 + ] + ] + }, + { + "text": "CARTE NATIONALE D'IDENTITÉ / IDENTITY CARD", + "score": 0.9937702417373657, + "box": [ + [ + 300, + 114 + ], + [ + 1109, + 118 + ], + [ + 1109, + 154 + ], + [ + 300, + 151 + ] + ] + }, + { + "text": "NOM/Sumame", + "score": 0.9895237684249878, + "box": [ + [ + 560, + 164 + ], + [ + 716, + 163 + ], + [ + 716, + 198 + ], + [ + 560, + 199 + ] + ] + }, + { + "text": "KARA", + "score": 0.9997718334197998, + "box": [ + [ + 563, + 194 + ], + [ + 705, + 194 + ], + [ + 705, + 241 + ], + [ + 563, + 241 + ] + ] + }, + { + "text": "Prénoms / Given names", + "score": 0.9502800703048706, + "box": [ + [ + 561, + 283 + ], + [ + 784, + 285 + ], + [ + 784, + 317 + ], + [ + 561, + 315 + ] + ] + }, + { + "text": "Josiane,Marie", + "score": 0.9998210668563843, + "box": [ + [ + 564, + 317 + ], + [ + 858, + 317 + ], + [ + 858, + 357 + ], + [ + 564, + 357 + ] + ] + }, + { + "text": "SEXE/Sex NATIONALITÉ/Nationalty", + "score": 0.9432203769683838, + "box": [ + [ + 562, + 402 + ], + [ + 1000, + 400 + ], + [ + 1001, + 434 + ], + [ + 562, + 436 + ] + ] + }, + { + "text": "DATE DE NAISS. / Date of birth", + "score": 0.9768196940422058, + "box": [ + [ + 1037, + 399 + ], + [ + 1392, + 396 + ], + [ + 1392, + 431 + ], + [ + 1038, + 434 + ] + ] + }, + { + "text": "F", + "score": 0.8774709105491638, + "box": [ + [ + 561, + 435 + ], + [ + 714, + 435 + ], + [ + 714, + 477 + ], + [ + 561, + 477 + ] + ] + }, + { + "text": "FRA", + "score": 0.9999353289604187, + "box": [ + [ + 696, + 437 + ], + [ + 795, + 437 + ], + [ + 795, + 474 + ], + [ + 696, + 474 + ] + ] + }, + { + "text": "17 10 1949", + "score": 0.9964561462402344, + "box": [ + [ + 1040, + 437 + ], + [ + 1272, + 434 + ], + [ + 1272, + 476 + ], + [ + 1041, + 479 + ] + ] + }, + { + "text": "LIEU DE NAISSANCE / Place of birth", + "score": 0.960074245929718, + "box": [ + [ + 564, + 478 + ], + [ + 962, + 478 + ], + [ + 962, + 509 + ], + [ + 564, + 509 + ] + ] + }, + { + "text": "VANDGUVRE-LÈS-NANCY", + "score": 0.9907931089401245, + "box": [ + [ + 566, + 507 + ], + [ + 1114, + 503 + ], + [ + 1115, + 546 + ], + [ + 566, + 551 + ] + ] + }, + { + "text": "N° DU DOCUMENT / DocumBRH No", + "score": 0.8581311106681824, + "box": [ + [ + 561, + 628 + ], + [ + 951, + 624 + ], + [ + 952, + 658 + ], + [ + 561, + 662 + ] + ] + }, + { + "text": "DATE D'EXPIR/ Expiry date", + "score": 0.9548121094703674, + "box": [ + [ + 1045, + 626 + ], + [ + 1361, + 622 + ], + [ + 1361, + 660 + ], + [ + 1046, + 664 + ] + ] + }, + { + "text": "PM6LBDVJ2", + "score": 0.9999687671661377, + "box": [ + [ + 555, + 664 + ], + [ + 978, + 662 + ], + [ + 979, + 717 + ], + [ + 555, + 719 + ] + ] + }, + { + "text": "02 11 2033", + "score": 0.9822114109992981, + "box": [ + [ + 1046, + 662 + ], + [ + 1288, + 660 + ], + [ + 1288, + 705 + ], + [ + 1047, + 707 + ] + ] + }, + { + "text": "CD", + "score": 0.2982218861579895, + "box": [ + [ + 663, + 722 + ], + [ + 865, + 712 + ], + [ + 869, + 804 + ], + [ + 667, + 814 + ] + ] + }, + { + "text": "046413", + "score": 0.9999799728393555, + "box": [ + [ + 1044, + 736 + ], + [ + 1329, + 734 + ], + [ + 1329, + 795 + ], + [ + 1045, + 798 + ] + ] + } + ], + "poly_source": "auto", + "det_polygons": [ + [ + [ + 298, + 42 + ], + [ + 1111, + 53 + ], + [ + 1110, + 121 + ], + [ + 297, + 109 + ] + ], + [ + [ + 1232, + 80 + ], + [ + 1292, + 80 + ], + [ + 1292, + 118 + ], + [ + 1232, + 118 + ] + ], + [ + [ + 300, + 114 + ], + [ + 1109, + 118 + ], + [ + 1109, + 154 + ], + [ + 300, + 151 + ] + ], + [ + [ + 560, + 164 + ], + [ + 716, + 163 + ], + [ + 716, + 198 + ], + [ + 560, + 199 + ] + ], + [ + [ + 563, + 194 + ], + [ + 705, + 194 + ], + [ + 705, + 241 + ], + [ + 563, + 241 + ] + ], + [ + [ + 561, + 283 + ], + [ + 784, + 285 + ], + [ + 784, + 317 + ], + [ + 561, + 315 + ] + ], + [ + [ + 564, + 317 + ], + [ + 858, + 317 + ], + [ + 858, + 357 + ], + [ + 564, + 357 + ] + ], + [ + [ + 562, + 402 + ], + [ + 1000, + 400 + ], + [ + 1001, + 434 + ], + [ + 562, + 436 + ] + ], + [ + [ + 1037, + 399 + ], + [ + 1392, + 396 + ], + [ + 1392, + 431 + ], + [ + 1038, + 434 + ] + ], + [ + [ + 561, + 435 + ], + [ + 714, + 435 + ], + [ + 714, + 477 + ], + [ + 561, + 477 + ] + ], + [ + [ + 696, + 437 + ], + [ + 795, + 437 + ], + [ + 795, + 474 + ], + [ + 696, + 474 + ] + ], + [ + [ + 1040, + 437 + ], + [ + 1272, + 434 + ], + [ + 1272, + 476 + ], + [ + 1041, + 479 + ] + ], + [ + [ + 564, + 478 + ], + [ + 962, + 478 + ], + [ + 962, + 509 + ], + [ + 564, + 509 + ] + ], + [ + [ + 566, + 507 + ], + [ + 1114, + 503 + ], + [ + 1115, + 546 + ], + [ + 566, + 551 + ] + ], + [ + [ + 561, + 628 + ], + [ + 951, + 624 + ], + [ + 952, + 658 + ], + [ + 561, + 662 + ] + ], + [ + [ + 1045, + 626 + ], + [ + 1361, + 622 + ], + [ + 1361, + 660 + ], + [ + 1046, + 664 + ] + ], + [ + [ + 555, + 664 + ], + [ + 978, + 662 + ], + [ + 979, + 717 + ], + [ + 555, + 719 + ] + ], + [ + [ + 1046, + 662 + ], + [ + 1288, + 660 + ], + [ + 1288, + 705 + ], + [ + 1047, + 707 + ] + ], + [ + [ + 663, + 722 + ], + [ + 865, + 712 + ], + [ + 869, + 804 + ], + [ + 667, + 814 + ] + ], + [ + [ + 1044, + 736 + ], + [ + 1329, + 734 + ], + [ + 1329, + 795 + ], + [ + 1045, + 798 + ] + ] + ], + "rec_polygons": [ + [ + [ + 298, + 42 + ], + [ + 1111, + 53 + ], + [ + 1110, + 121 + ], + [ + 297, + 109 + ] + ], + [ + [ + 1232, + 80 + ], + [ + 1292, + 80 + ], + [ + 1292, + 118 + ], + [ + 1232, + 118 + ] + ], + [ + [ + 300, + 114 + ], + [ + 1109, + 118 + ], + [ + 1109, + 154 + ], + [ + 300, + 151 + ] + ], + [ + [ + 560, + 164 + ], + [ + 716, + 163 + ], + [ + 716, + 198 + ], + [ + 560, + 199 + ] + ], + [ + [ + 563, + 194 + ], + [ + 705, + 194 + ], + [ + 705, + 241 + ], + [ + 563, + 241 + ] + ], + [ + [ + 561, + 283 + ], + [ + 784, + 285 + ], + [ + 784, + 317 + ], + [ + 561, + 315 + ] + ], + [ + [ + 564, + 317 + ], + [ + 858, + 317 + ], + [ + 858, + 357 + ], + [ + 564, + 357 + ] + ], + [ + [ + 562, + 402 + ], + [ + 1000, + 400 + ], + [ + 1001, + 434 + ], + [ + 562, + 436 + ] + ], + [ + [ + 1037, + 399 + ], + [ + 1392, + 396 + ], + [ + 1392, + 431 + ], + [ + 1038, + 434 + ] + ], + [ + [ + 561, + 435 + ], + [ + 714, + 435 + ], + [ + 714, + 477 + ], + [ + 561, + 477 + ] + ], + [ + [ + 696, + 437 + ], + [ + 795, + 437 + ], + [ + 795, + 474 + ], + [ + 696, + 474 + ] + ], + [ + [ + 1040, + 437 + ], + [ + 1272, + 434 + ], + [ + 1272, + 476 + ], + [ + 1041, + 479 + ] + ], + [ + [ + 564, + 478 + ], + [ + 962, + 478 + ], + [ + 962, + 509 + ], + [ + 564, + 509 + ] + ], + [ + [ + 566, + 507 + ], + [ + 1114, + 503 + ], + [ + 1115, + 546 + ], + [ + 566, + 551 + ] + ], + [ + [ + 561, + 628 + ], + [ + 951, + 624 + ], + [ + 952, + 658 + ], + [ + 561, + 662 + ] + ], + [ + [ + 1045, + 626 + ], + [ + 1361, + 622 + ], + [ + 1361, + 660 + ], + [ + 1046, + 664 + ] + ], + [ + [ + 555, + 664 + ], + [ + 978, + 662 + ], + [ + 979, + 717 + ], + [ + 555, + 719 + ] + ], + [ + [ + 1046, + 662 + ], + [ + 1288, + 660 + ], + [ + 1288, + 705 + ], + [ + 1047, + 707 + ] + ], + [ + [ + 663, + 722 + ], + [ + 865, + 712 + ], + [ + 869, + 804 + ], + [ + 667, + 814 + ] + ], + [ + [ + 1044, + 736 + ], + [ + 1329, + 734 + ], + [ + 1329, + 795 + ], + [ + 1045, + 798 + ] + ] + ], + "enhance": false, + "recognition_model": "auto", + "detection_model": "auto", + "ocr_version": "auto", + "box_color": "#FF0000", + "fill_color": "#FF000033", + "box_width": 3, + "scale_ratio": null, + "offset_x": 0, + "offset_y": 0, + "vis_polygons": [ + [ + [ + 298, + 42 + ], + [ + 1111, + 53 + ], + [ + 1110, + 121 + ], + [ + 297, + 109 + ] + ], + [ + [ + 1232, + 80 + ], + [ + 1292, + 80 + ], + [ + 1292, + 118 + ], + [ + 1232, + 118 + ] + ], + [ + [ + 300, + 114 + ], + [ + 1109, + 118 + ], + [ + 1109, + 154 + ], + [ + 300, + 151 + ] + ], + [ + [ + 560, + 164 + ], + [ + 716, + 163 + ], + [ + 716, + 198 + ], + [ + 560, + 199 + ] + ], + [ + [ + 563, + 194 + ], + [ + 705, + 194 + ], + [ + 705, + 241 + ], + [ + 563, + 241 + ] + ], + [ + [ + 561, + 283 + ], + [ + 784, + 285 + ], + [ + 784, + 317 + ], + [ + 561, + 315 + ] + ], + [ + [ + 564, + 317 + ], + [ + 858, + 317 + ], + [ + 858, + 357 + ], + [ + 564, + 357 + ] + ], + [ + [ + 562, + 402 + ], + [ + 1000, + 400 + ], + [ + 1001, + 434 + ], + [ + 562, + 436 + ] + ], + [ + [ + 1037, + 399 + ], + [ + 1392, + 396 + ], + [ + 1392, + 431 + ], + [ + 1038, + 434 + ] + ], + [ + [ + 561, + 435 + ], + [ + 714, + 435 + ], + [ + 714, + 477 + ], + [ + 561, + 477 + ] + ], + [ + [ + 696, + 437 + ], + [ + 795, + 437 + ], + [ + 795, + 474 + ], + [ + 696, + 474 + ] + ], + [ + [ + 1040, + 437 + ], + [ + 1272, + 434 + ], + [ + 1272, + 476 + ], + [ + 1041, + 479 + ] + ], + [ + [ + 564, + 478 + ], + [ + 962, + 478 + ], + [ + 962, + 509 + ], + [ + 564, + 509 + ] + ], + [ + [ + 566, + 507 + ], + [ + 1114, + 503 + ], + [ + 1115, + 546 + ], + [ + 566, + 551 + ] + ], + [ + [ + 561, + 628 + ], + [ + 951, + 624 + ], + [ + 952, + 658 + ], + [ + 561, + 662 + ] + ], + [ + [ + 1045, + 626 + ], + [ + 1361, + 622 + ], + [ + 1361, + 660 + ], + [ + 1046, + 664 + ] + ], + [ + [ + 555, + 664 + ], + [ + 978, + 662 + ], + [ + 979, + 717 + ], + [ + 555, + 719 + ] + ], + [ + [ + 1046, + 662 + ], + [ + 1288, + 660 + ], + [ + 1288, + 705 + ], + [ + 1047, + 707 + ] + ], + [ + [ + 663, + 722 + ], + [ + 865, + 712 + ], + [ + 869, + 804 + ], + [ + 667, + 814 + ] + ], + [ + [ + 1044, + 736 + ], + [ + 1329, + 734 + ], + [ + 1329, + 795 + ], + [ + 1045, + 798 + ] + ] + ] +} \ No newline at end of file diff --git a/src/model/text_detector/PaddleOCR/CNI_r.png b/src/model/text_detector/PaddleOCR/CNI_r.png new file mode 100644 index 0000000..d8c2924 Binary files /dev/null and b/src/model/text_detector/PaddleOCR/CNI_r.png differ diff --git a/src/model/text_detector/PaddleOCR/im1.png b/src/model/text_detector/PaddleOCR/im1.png new file mode 100644 index 0000000..e7b6d27 Binary files /dev/null and b/src/model/text_detector/PaddleOCR/im1.png differ diff --git a/src/model/text_detector/PaddleOCR/main.py b/src/model/text_detector/PaddleOCR/main.py new file mode 100644 index 0000000..7dc2aae --- /dev/null +++ b/src/model/text_detector/PaddleOCR/main.py @@ -0,0 +1,643 @@ +import argparse +import json +import os +import sys +from typing import List, Tuple, Any, Optional, Dict + +from PIL import Image, ImageDraw, ImageFont, ImageEnhance, ImageOps +import numpy as np +import cv2 +from paddleocr import PaddleOCR +try: # Some paddleocr builds do not expose draw_ocr at top-level + from paddleocr import draw_ocr as paddle_draw_ocr # type: ignore +except Exception: # pragma: no cover - environment dependent + paddle_draw_ocr = None + + +def resolve_font_path() -> str: + """Return a system font path that supports Latin accents (Windows-friendly).""" + candidate_fonts: List[str] = [] + + # Prefer Arial Unicode or Arial on Windows + windir = os.environ.get("WINDIR", r"C:\\Windows") + candidate_fonts.extend( + [ + os.path.join(windir, "Fonts", "arialuni.ttf"), + os.path.join(windir, "Fonts", "arial.ttf"), + os.path.join(windir, "Fonts", "tahoma.ttf"), + os.path.join(windir, "Fonts", "seguiemj.ttf"), + ] + ) + + # Common fallbacks on other platforms + candidate_fonts.extend( + [ + "/System/Library/Fonts/Supplemental/Arial Unicode.ttf", # macOS + "/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf", # Linux + ] + ) + + for font in candidate_fonts: + if os.path.exists(font): + return font + return "" # paddleocr will still draw, but non-ASCII may not render correctly + + +def build_argparser() -> argparse.Namespace: + parser = argparse.ArgumentParser( + description="Simple PaddleOCR runner for single image" + ) + parser.add_argument( + "--image", + default="im1.png", + help="Path to input image (default: im1.png)", + ) + parser.add_argument( + "--lang", + default="fr", + help="Language code for PaddleOCR (try: fr, en, vi, etc.). Default: fr", + ) + parser.add_argument( + "--out-json", + default="ocr_result.json", + help="Path to save raw OCR results as JSON", + ) + parser.add_argument( + "--out-image", + default="ocr_vis.png", + help="Path to save visualization image with boxes and text", + ) + parser.add_argument( + "--poly-source", + choices=["dt", "rec", "auto"], + default="auto", + help="Which polygons to visualize: detection(dt), recognition(rec) or auto (prefer dt)", + ) + parser.add_argument( + "--box-color", + default="#FF0000", + help="Outline color for polygons (hex like #RRGGBB or 'R,G,B').", + ) + parser.add_argument( + "--fill-color", + default="#FF000033", + help="Fill color with alpha for polygons (e.g., #FF000033).", + ) + parser.add_argument( + "--box-width", + type=int, + default=3, + help="Outline width for polygons.", + ) + parser.add_argument( + "--scale-ratio", + type=float, + default=None, + help="Scale polygons around centroid (e.g., 0.96 to shrink, 1.05 to expand).", + ) + parser.add_argument( + "--shrink-ratio", + type=float, + default=None, + help="Deprecated: use --scale-ratio. If given, will be used when --scale-ratio is not set.", + ) + parser.add_argument( + "--offset-x", + type=int, + default=0, + help="Shift polygons horizontally (pixels). Positive moves right.", + ) + parser.add_argument( + "--offset-y", + type=int, + default=0, + help="Shift polygons vertically (pixels). Positive moves down.", + ) + parser.add_argument( + "--scale-x", + type=float, + default=None, + help="Non-uniform scale along X around centroid (e.g., 1.1). Overrides --scale-ratio on X if set.", + ) + parser.add_argument( + "--scale-y", + type=float, + default=None, + help="Non-uniform scale along Y around centroid (e.g., 1.05). Overrides --scale-ratio on Y if set.", + ) + parser.add_argument( + "--pad-x", + type=int, + default=0, + help="Pixels to pad left/right after scaling.", + ) + parser.add_argument( + "--pad-y", + type=int, + default=0, + help="Pixels to pad top/bottom after scaling.", + ) + parser.add_argument( + "--rect", + choices=["poly", "axis", "rotated"], + default="poly", + help="Visualization mode: polygon, axis-aligned rectangle, or rotated min-area rectangle.", + ) + parser.add_argument( + "--enhance", + action="store_true", + help="Apply light image enhancement (contrast/sharpen) before OCR.", + ) + parser.add_argument( + "--rec-model", + default="", + help=( + "Recognition model name to force (e.g. latin_PP-OCRv5_server_rec, " + "latin_PP-OCRv5_mobile_rec, en_PP-OCRv4_rec, en_PP-OCRv3_rec)." + ), + ) + parser.add_argument( + "--det-model", + default="", + help=( + "Detection model name to force (e.g. PP-OCRv5_server_det, " + "PP-OCRv5_mobile_det)." + ), + ) + parser.add_argument( + "--ocr-version", + default="", + help="Optional OCR version hint (e.g., PP-OCRv5).", + ) + parser.add_argument( + "--rec-score-thresh", + type=float, + default=0.0, + help="Minimum recognition score to keep a text (default 0.0 to keep all)", + ) + parser.add_argument( + "--det-db-thresh", + type=float, + default=None, + help="DB detector binary threshold (e.g., 0.2).", + ) + parser.add_argument( + "--det-db-box-thresh", + type=float, + default=None, + help="DB detector box threshold (e.g., 0.3-0.6). Lower to get more boxes.", + ) + parser.add_argument( + "--det-db-unclip", + type=float, + default=None, + help="DB detector unclip ratio (e.g., 1.5).", + ) + parser.add_argument( + "--det-limit-side", + type=int, + default=None, + help="text_det_limit_side_len. Smaller may speed up and merge boxes.", + ) + return parser.parse_args() + + +def initialize_ocr(preferred_lang: str, args: argparse.Namespace) -> Tuple[PaddleOCR, str]: + """Initialize PaddleOCR, trying the preferred language first then fallbacks.""" + tried_errors: List[str] = [] + candidate_langs: List[str] = [] + # Keep order but deduplicate + for code in [preferred_lang, "fr", "en"]: + if code not in candidate_langs: + candidate_langs.append(code) + + for code in candidate_langs: + # First, try with requested model overrides (if any) + try: + init_kwargs: Dict[str, Any] = { + "lang": code, + "use_textline_orientation": True, + } + if args.rec_model: + init_kwargs["text_recognition_model_name"] = args.rec_model + if args.det_model: + init_kwargs["text_detection_model_name"] = args.det_model + if args.ocr_version: + init_kwargs["ocr_version"] = args.ocr_version + # thresholds & limits (kwargs accepted by PaddleOCR >=3.x) + if args.rec_score_thresh is not None: + init_kwargs["text_rec_score_thresh"] = float(args.rec_score_thresh) + if args.det_db_thresh is not None: + init_kwargs["text_det_db_thresh"] = float(args.det_db_thresh) + if args.det_db_box_thresh is not None: + init_kwargs["text_det_db_box_thresh"] = float(args.det_db_box_thresh) + # Some builds may not support unclip ratio as kwarg; skip if unsupported + if args.det_limit_side is not None: + init_kwargs["text_det_limit_side_len"] = int(args.det_limit_side) + ocr = PaddleOCR(**init_kwargs) + return ocr, code + except Exception as exc_with_models: # pragma: no cover + tried_errors.append(f"{code} (with models): {exc_with_models}") + # Fallback: try default models for this language + try: + base_kwargs: Dict[str, Any] = { + "lang": code, + "use_textline_orientation": True, + } + if args.rec_score_thresh is not None: + base_kwargs["text_rec_score_thresh"] = float(args.rec_score_thresh) + if args.det_limit_side is not None: + base_kwargs["text_det_limit_side_len"] = int(args.det_limit_side) + if args.det_db_thresh is not None: + base_kwargs["text_det_db_thresh"] = float(args.det_db_thresh) + if args.det_db_box_thresh is not None: + base_kwargs["text_det_db_box_thresh"] = float(args.det_db_box_thresh) + # Skip setting unclip if unsupported in this build + ocr = PaddleOCR(**base_kwargs) + return ocr, code + except Exception as exc_default: + tried_errors.append(f"{code} (default): {exc_default}") + + raise RuntimeError( + "Failed to initialize PaddleOCR. Tried languages: " + + ", ".join(candidate_langs) + + "\nErrors: " + + " | ".join(tried_errors) + ) + + +def main() -> None: + args = build_argparser() + + if not os.path.exists(args.image): + print(f"[ERROR] Image not found: {args.image}") + sys.exit(1) + + try: + ocr, used_lang = initialize_ocr(args.lang, args) + except Exception as init_exc: + print(f"[ERROR] {init_exc}") + sys.exit(2) + + chosen_rec = args.rec_model if args.rec_model else "auto" + chosen_det = args.det_model if args.det_model else "auto" + print( + f"[INFO] Running OCR on '{args.image}' with lang='{used_lang}', " + f"rec='{chosen_rec}', det='{chosen_det}' ..." + ) + # Prefer new API for PaddleOCR >=3.x, fallback to legacy .ocr + try: + result: Any = ocr.predict(args.image) # type: ignore[assignment] + except Exception: + result = ocr.ocr(args.image) # type: ignore[assignment] + + # Optional light enhancement for Latin documents to make OCR tighter + def maybe_enhance_image(path: str) -> str: + if not args.enhance: + return path + try: + img = Image.open(path).convert("RGB") + # Slight auto-contrast and sharpening + img = ImageOps.autocontrast(img, cutoff=1) + img = ImageEnhance.Sharpness(img).enhance(1.4) + img = ImageEnhance.Contrast(img).enhance(1.1) + tmp_path = os.path.splitext(path)[0] + "_enh.png" + img.save(tmp_path) + return tmp_path + except Exception: + return path + + # Prefer to draw detection polygons only (no text) for clarity + def extract_polygons(res: Any) -> List[List[Tuple[int, int]]]: + def to_tuple_list(poly_any: Any) -> List[Tuple[int, int]]: + try: + return [(int(p[0]), int(p[1])) for p in list(poly_any)] + except Exception: + return [] + + # Common outputs in new pipelines + if isinstance(res, dict): + if args.poly_source in ("auto", "dt"): + polys = res.get("dt_polys") or res.get("det_polygons") + else: + polys = None + if polys is None and args.poly_source in ("auto", "rec"): + polys = res.get("rec_polys") + if polys is None: + polys = res.get("polygons") or res.get("boxes") + if polys is None: + return [] + return [to_tuple_list(poly) for poly in list(polys)] + + if isinstance(res, list) and len(res) > 0: + # Often result is a list with a single dict + if isinstance(res[0], dict): + return extract_polygons(res[0]) + + return [] + + # Normalize result across PaddleOCR versions + def poly_to_list(poly_any: Any) -> List[List[int]]: + try: + # numpy array path + if hasattr(poly_any, "tolist"): + lst = poly_any.tolist() + return [[int(p[0]), int(p[1])] for p in lst] + except Exception: + pass + try: + return [[int(p[0]), int(p[1])] for p in list(poly_any)] + except Exception: + return [] + def parse_lines(res: Any) -> List[Dict[str, Any]]: + items: List[Dict[str, Any]] = [] + # Case 0: top-level dict output (some 3.x pipelines) + if isinstance(res, dict): + polys = res.get("det_polygons") or res.get("boxes") or res.get("polygons") + texts = res.get("rec_texts") or res.get("rec_text") or res.get("texts") or [] + scores = res.get("rec_scores") or res.get("scores") or [] + if isinstance(texts, str): + texts = [texts] + if polys is None: + polys = [None] * len(texts) + if not isinstance(scores, list): + try: + scores = list(scores) + except Exception: + scores = [None] * len(texts) + if len(scores) < len(texts): + scores = list(scores) + [None] * (len(texts) - len(scores)) + for poly, text, score in zip(polys, texts, scores): + try: + score_val = float(score) if score is not None else None + except Exception: + score_val = None + items.append({"text": str(text), "score": score_val, "box": poly_to_list(poly) if poly is not None else None}) + return items + if isinstance(res, list) and len(res) > 0: + # Special: list with a single dict that holds batched arrays (rec_texts, rec_scores, dt_polys, ...) + if len(res) == 1 and isinstance(res[0], dict) and ( + "rec_texts" in res[0] or "texts" in res[0] + ): + obj = res[0] + texts = obj.get("rec_texts") or obj.get("texts") or [] + scores = obj.get("rec_scores") or obj.get("scores") or [] + boxes = obj.get("rec_polys") or obj.get("dt_polys") or [] + # Normalize lengths + n = min(len(texts), len(scores) if hasattr(scores, "__len__") else len(texts), len(boxes) if hasattr(boxes, "__len__") else len(texts)) + out: List[Dict[str, Any]] = [] + for i in range(n): + txt = texts[i] + try: + sc = float(scores[i]) + except Exception: + sc = None + bx = boxes[i] if i < len(boxes) else None + out.append({"text": str(txt), "score": sc, "box": poly_to_list(bx) if bx is not None else None}) + return out + + # Case A: legacy format [[ [poly], (text, score) ], ...] wrapped by [ ... ] + if isinstance(res[0], list) and len(res[0]) > 0 and isinstance(res[0][0], list): + lines_local = res[0] + for line in lines_local: + if not isinstance(line, (list, tuple)) or len(line) < 2: + continue + box = line[0] + text: str = "" + score: Optional[float] = None + payload = line[1] + if isinstance(payload, (list, tuple)) and len(payload) >= 1: + text = str(payload[0]) + if len(payload) >= 2: + try: + score = float(payload[1]) + except Exception: + score = None + elif isinstance(payload, str): + text = payload + if len(line) >= 3: + try: + score = float(line[2]) + except Exception: + score = None + items.append({"text": text, "score": score, "box": box}) + return items + + # Case B: new format already a flat list of dicts or lists per detection + # Try dict format first + if isinstance(res[0], dict): + for obj in res: + box = obj.get("box") or obj.get("poly") or obj.get("bbox") or obj.get("det_polygons") + text = obj.get("text") or obj.get("rec_text") or "" + score = obj.get("score") or obj.get("rec_score") + try: + score = float(score) if score is not None else None + except Exception: + score = None + items.append({"text": str(text), "score": score, "box": poly_to_list(box) if box is not None else None}) + return items + + # Case C: flat list of [poly, text, (maybe score)] + if isinstance(res[0], (list, tuple)): + for line in res: + if not isinstance(line, (list, tuple)) or len(line) < 2: + continue + box = line[0] + text = str(line[1]) + score: Optional[float] = None + if len(line) >= 3: + try: + score = float(line[2]) + except Exception: + score = None + items.append({"text": text, "score": score, "box": poly_to_list(box) if box is not None else None}) + return items + return items + + parsed = parse_lines(result) + det_polys = extract_polygons(result) + + # Additionally collect both dt and rec polygons for JSON output + def extract_both(res: Any) -> Tuple[List[List[Tuple[int, int]]], List[List[Tuple[int, int]]]]: + def to_tuple_list(poly_any: Any) -> List[Tuple[int, int]]: + try: + if hasattr(poly_any, "tolist"): + poly_any = poly_any.tolist() + return [(int(p[0]), int(p[1])) for p in list(poly_any)] + except Exception: + return [] + if isinstance(res, dict): + dt = res.get("dt_polys") or res.get("det_polygons") or [] + rc = res.get("rec_polys") or [] + return [to_tuple_list(p) for p in list(dt)], [to_tuple_list(p) for p in list(rc)] + if isinstance(res, list) and len(res) > 0 and isinstance(res[0], dict): + return extract_both(res[0]) + return [], [] + + all_dt_polys, all_rec_polys = extract_both(result) + + # Print quick summary to console + print("\n[TEXT]\n" + "\n".join([p["text"] for p in parsed])) + + # Save JSON + with open(args.out_json, "w", encoding="utf-8") as f: + json.dump( + { + "image": os.path.abspath(args.image), + "language": used_lang, + "num_items": len(parsed), + "items": parsed, + "poly_source": args.poly_source, + "det_polygons": [[list(pt) for pt in poly] for poly in all_dt_polys], + "rec_polygons": [[list(pt) for pt in poly] for poly in all_rec_polys], + "enhance": bool(args.enhance), + "recognition_model": chosen_rec, + "detection_model": chosen_det, + "ocr_version": args.ocr_version or "auto", + "box_color": args.box_color, + "fill_color": args.fill_color, + "box_width": int(args.box_width), + "scale_ratio": float(args.scale_ratio) if args.scale_ratio is not None else None, + "offset_x": int(args.offset_x), + "offset_y": int(args.offset_y), + }, + f, + ensure_ascii=False, + indent=2, + ) + print(f"[INFO] Saved JSON: {args.out_json}") + # Also store raw result for debugging purposes + try: + with open("raw_result.txt", "w", encoding="utf-8") as rf: + rf.write(repr(result)) + except Exception: + pass + + # Draw and save visualization (only polygons) + image = Image.open(args.image).convert("RGBA") + canvas = image.copy() + overlay = Image.new("RGBA", image.size, (0, 0, 0, 0)) + draw = ImageDraw.Draw(overlay) + # Prefer detection polygons from pipeline; fallback to parsed boxes + polygons: List[List[Tuple[int, int]]] = det_polys + if not polygons: + polygons = [ + [tuple(p) for p in (box or [])] # type: ignore[misc] + for box in [p.get("box") for p in parsed] + if box + ] + def parse_color(color_str: str, default=(255, 0, 0, 255)) -> Tuple[int, int, int, int]: + try: + if color_str.startswith("#"): + hexv = color_str.lstrip("#") + if len(hexv) == 6: + r = int(hexv[0:2], 16) + g = int(hexv[2:4], 16) + b = int(hexv[4:6], 16) + return (r, g, b, 255) + if len(hexv) == 8: + r = int(hexv[0:2], 16) + g = int(hexv[2:4], 16) + b = int(hexv[4:6], 16) + a = int(hexv[6:8], 16) + return (r, g, b, a) + else: + parts = [int(x) for x in color_str.split(",")] + if len(parts) == 3: + return (parts[0], parts[1], parts[2], 255) + if len(parts) == 4: + return (parts[0], parts[1], parts[2], parts[3]) + except Exception: + pass + return default + + outline_rgba = parse_color(args.box_color) + fill_rgba = parse_color(args.fill_color, default=(255, 0, 0, 51)) + + def transform_polygon( + poly: List[Tuple[int, int]], + scale: Optional[float], + dx: int, + dy: int, + scale_x: Optional[float] = None, + scale_y: Optional[float] = None, + pad_x: int = 0, + pad_y: int = 0, + ) -> List[Tuple[int, int]]: + if not poly: + return poly + cx = sum(p[0] for p in poly) / len(poly) + cy = sum(p[1] for p in poly) / len(poly) + out: List[Tuple[int, int]] = [] + for (x, y) in poly: + sx = scale_x if scale_x is not None else scale + sy = scale_y if scale_y is not None else scale + if sx is not None: + x = cx + sx * (x - cx) + if sy is not None: + y = cy + sy * (y - cy) + out.append((int(round(x + dx)), int(round(y + dy)))) + # pad expands rect-like by moving points outwards along axes + if pad_x or pad_y: + out = [(x - pad_x if x < cx else x + pad_x, y - pad_y if y < cy else y + pad_y) for (x, y) in out] + return out + + vis_polys: List[List[Tuple[int, int]]] = [] + def draw_axis_aligned(draw_obj, pts: List[Tuple[int, int]]): + xs = [p[0] for p in pts] + ys = [p[1] for p in pts] + box = [(min(xs), min(ys)), (max(xs), min(ys)), (max(xs), max(ys)), (min(xs), max(ys))] + draw_obj.polygon(box, outline=outline_rgba, fill=fill_rgba) + draw_obj.line(box + [box[0]], fill=outline_rgba, width=args.box_width) + + def draw_rotated(draw_obj, pts: List[Tuple[int, int]]): + cnt = np.array(pts, dtype=np.int32).reshape(-1, 1, 2) + rect = cv2.minAreaRect(cnt) + box = cv2.boxPoints(rect) + box = np.int0(box) + poly = [(int(x), int(y)) for x, y in box.tolist()] + draw_obj.polygon(poly, outline=outline_rgba, fill=fill_rgba) + draw_obj.line(poly + [poly[0]], fill=outline_rgba, width=args.box_width) + + for poly in polygons: + if len(poly) >= 3: + # Backward-compat: if scale-ratio not provided, use shrink-ratio (<1.0) + scale = args.scale_ratio if args.scale_ratio is not None else args.shrink_ratio + sp = transform_polygon( + poly, + scale, + args.offset_x, + args.offset_y, + args.scale_x, + args.scale_y, + args.pad_x, + args.pad_y, + ) + vis_polys.append(sp) + if args.rect == "axis": + draw_axis_aligned(draw, sp) + elif args.rect == "rotated": + draw_rotated(draw, sp) + else: + draw.polygon(sp, outline=outline_rgba, fill=fill_rgba) + draw.line(sp + [sp[0]], fill=outline_rgba, width=args.box_width) + + out = Image.alpha_composite(canvas, overlay).convert("RGB") + out.save(args.out_image) + print(f"[INFO] Saved visualization: {args.out_image}") + + # Append the visualization polygons to JSON file for exact reproducibility + try: + with open(args.out_json, "r", encoding="utf-8") as fjson: + data = json.load(fjson) + data["vis_polygons"] = [[list(pt) for pt in poly] for poly in vis_polys] + with open(args.out_json, "w", encoding="utf-8") as fjson: + json.dump(data, fjson, ensure_ascii=False, indent=2) + except Exception: + pass + + +if __name__ == "__main__": + main() + + diff --git a/src/model/text_detector/PaddleOCR/ocr_result.json b/src/model/text_detector/PaddleOCR/ocr_result.json new file mode 100644 index 0000000..c420c85 --- /dev/null +++ b/src/model/text_detector/PaddleOCR/ocr_result.json @@ -0,0 +1,5040 @@ +{ + "image": "C:\\Users\\nguye\\Desktop\\IDcardsGenerator\\src\\model\\text_detector\\im1.png", + "language": "fr", + "num_items": 66, + "items": [ + { + "text": "Relevé QXBAN", + "score": 0.9995512366294861, + "box": [ + [ + 1844, + 56 + ], + [ + 2556, + 62 + ], + [ + 2555, + 174 + ], + [ + 1842, + 168 + ] + ] + }, + { + "text": "Mutuel", + "score": 0.9998524188995361, + "box": [ + [ + 649, + 168 + ], + [ + 1078, + 153 + ], + [ + 1083, + 284 + ], + [ + 653, + 298 + ] + ] + }, + { + "text": "Crédit", + "score": 0.9998147487640381, + "box": [ + [ + 128, + 187 + ], + [ + 524, + 172 + ], + [ + 529, + 302 + ], + [ + 133, + 317 + ] + ] + }, + { + "text": "QXBAN correspondant à votre IBAN rappelé ci-dessous", + "score": 0.9950084090232849, + "box": [ + [ + 635, + 306 + ], + [ + 2427, + 262 + ], + [ + 2430, + 356 + ], + [ + 638, + 400 + ] + ] + }, + { + "text": "213410110000690 031chq/G", + "score": 0.9989956021308899, + "box": [ + [ + 91, + 389 + ], + [ + 132, + 393 + ], + [ + 101, + 742 + ], + [ + 60, + 738 + ] + ] + }, + { + "text": "Domiciliation", + "score": 0.9994469881057739, + "box": [ + [ + 2473, + 383 + ], + [ + 2775, + 394 + ], + [ + 2773, + 454 + ], + [ + 2471, + 443 + ] + ] + }, + { + "text": "CCM HOENHEIM", + "score": 0.9996560215950012, + "box": [ + [ + 2172, + 447 + ], + [ + 2589, + 455 + ], + [ + 2588, + 513 + ], + [ + 2171, + 505 + ] + ] + }, + { + "text": "QX65 - CMCI - FR2A- XXXD - LB3B - TR42 - ZO14 - VPJF - NT", + "score": 0.9701882004737854, + "box": [ + [ + 261, + 479 + ], + [ + 1920, + 444 + ], + [ + 1921, + 510 + ], + [ + 262, + 545 + ] + ] + }, + { + "text": "Identifiant international de compte bancaire", + "score": 0.9938004016876221, + "box": [ + [ + 287, + 575 + ], + [ + 1663, + 559 + ], + [ + 1664, + 662 + ], + [ + 288, + 679 + ] + ] + }, + { + "text": "IBAN (International Bank Account number)", + "score": 0.9934032559394836, + "box": [ + [ + 512, + 719 + ], + [ + 1411, + 710 + ], + [ + 1411, + 771 + ], + [ + 512, + 780 + ] + ] + }, + { + "text": "BIC (Bank Identification Code)", + "score": 0.9850165247917175, + "box": [ + [ + 2384, + 722 + ], + [ + 3038, + 738 + ], + [ + 3036, + 797 + ], + [ + 2382, + 780 + ] + ] + }, + { + "text": "FR76", + "score": 0.999854564666748, + "box": [ + [ + 311, + 799 + ], + [ + 514, + 803 + ], + [ + 513, + 855 + ], + [ + 310, + 851 + ] + ] + }, + { + "text": "10XX XXXX XXXX", + "score": 0.9545566439628601, + "box": [ + [ + 487, + 801 + ], + [ + 989, + 809 + ], + [ + 988, + 861 + ], + [ + 486, + 853 + ] + ] + }, + { + "text": "XXXX 7824", + "score": 0.9839823246002197, + "box": [ + [ + 1023, + 802 + ], + [ + 1350, + 802 + ], + [ + 1350, + 857 + ], + [ + 1023, + 857 + ] + ] + }, + { + "text": "0XX", + "score": 0.8636813163757324, + "box": [ + [ + 1431, + 804 + ], + [ + 1539, + 801 + ], + [ + 1540, + 856 + ], + [ + 1433, + 859 + ] + ] + }, + { + "text": "CMCIFR2A", + "score": 0.9998859763145447, + "box": [ + [ + 2480, + 822 + ], + [ + 2755, + 827 + ], + [ + 2754, + 881 + ], + [ + 2479, + 875 + ] + ] + }, + { + "text": "Domiciliation", + "score": 0.9998284578323364, + "box": [ + [ + 423, + 882 + ], + [ + 747, + 892 + ], + [ + 746, + 943 + ], + [ + 422, + 934 + ] + ] + }, + { + "text": "Titulaire du compte (Account Owner)", + "score": 0.9954113364219666, + "box": [ + [ + 2278, + 937 + ], + [ + 3194, + 954 + ], + [ + 3193, + 1015 + ], + [ + 2277, + 998 + ] + ] + }, + { + "text": "CCM HOENHEIM", + "score": 0.9983739256858826, + "box": [ + [ + 420, + 962 + ], + [ + 799, + 967 + ], + [ + 799, + 1017 + ], + [ + 419, + 1012 + ] + ] + }, + { + "text": "MME JOSIANE KARA", + "score": 0.999321460723877, + "box": [ + [ + 2285, + 1007 + ], + [ + 2772, + 1013 + ], + [ + 2772, + 1063 + ], + [ + 2284, + 1057 + ] + ] + }, + { + "text": "14 RUE DE LA REPUBLIQUE", + "score": 0.9998688101768494, + "box": [ + [ + 421, + 1028 + ], + [ + 1053, + 1024 + ], + [ + 1053, + 1078 + ], + [ + 421, + 1081 + ] + ] + }, + { + "text": "42 RUE DE LA WANTZENAU", + "score": 0.9911791086196899, + "box": [ + [ + 2281, + 1067 + ], + [ + 2936, + 1075 + ], + [ + 2936, + 1125 + ], + [ + 2280, + 1116 + ] + ] + }, + { + "text": "67800 HOENHEIM", + "score": 0.9638767838478088, + "box": [ + [ + 419, + 1090 + ], + [ + 820, + 1094 + ], + [ + 819, + 1144 + ], + [ + 418, + 1140 + ] + ] + }, + { + "text": "67116 REICHSTETT", + "score": 0.9998940825462341, + "box": [ + [ + 2279, + 1130 + ], + [ + 2742, + 1136 + ], + [ + 2741, + 1186 + ], + [ + 2278, + 1180 + ] + ] + }, + { + "text": "TEL 03 90 41 65 29", + "score": 0.9649747014045715, + "box": [ + [ + 419, + 1150 + ], + [ + 837, + 1158 + ], + [ + 836, + 1210 + ], + [ + 418, + 1202 + ] + ] + }, + { + "text": "Remettez ce relevé à tout organisme ayant besoin de connaître vos références", + "score": 0.987461268901825, + "box": [ + [ + 219, + 1264 + ], + [ + 2016, + 1270 + ], + [ + 2015, + 1348 + ], + [ + 219, + 1342 + ] + ] + }, + { + "text": "bancaires pour la domiciliation de vos demandes de présentation de factures", + "score": 0.9758935570716858, + "box": [ + [ + 221, + 1327 + ], + [ + 2006, + 1329 + ], + [ + 2006, + 1402 + ], + [ + 221, + 1400 + ] + ] + }, + { + "text": "à payer par SEPAMAIL sur votre Espace Personnel de Banque à Distance.", + "score": 0.9884541630744934, + "box": [ + [ + 219, + 1384 + ], + [ + 2014, + 1392 + ], + [ + 2014, + 1463 + ], + [ + 219, + 1455 + ] + ] + }, + { + "text": "PARTIE RESERVEE AU DESTINATAIRE", + "score": 0.9995322823524475, + "box": [ + [ + 2274, + 1397 + ], + [ + 3182, + 1403 + ], + [ + 3182, + 1453 + ], + [ + 2273, + 1447 + ] + ] + }, + { + "text": "Votre QXBAN ne peut être transmis pour d'autres usages.", + "score": 0.9979712963104248, + "box": [ + [ + 221, + 1442 + ], + [ + 1533, + 1453 + ], + [ + 1532, + 1524 + ], + [ + 220, + 1512 + ] + ] + }, + { + "text": "DU RELEVE", + "score": 0.9993599653244019, + "box": [ + [ + 2597, + 1454 + ], + [ + 2883, + 1454 + ], + [ + 2883, + 1504 + ], + [ + 2597, + 1504 + ] + ] + }, + { + "text": "RÉPUBLIQUE FRANÇAMSE", + "score": 0.9445427656173706, + "box": [ + [ + 730, + 2150 + ], + [ + 1572, + 2157 + ], + [ + 1571, + 2227 + ], + [ + 730, + 2221 + ] + ] + }, + { + "text": "FR", + "score": 0.9999163746833801, + "box": [ + [ + 1694, + 2191 + ], + [ + 1763, + 2195 + ], + [ + 1761, + 2244 + ], + [ + 1691, + 2240 + ] + ] + }, + { + "text": "CARTE NATIONALE D'IDENTITÉ / IDENTITY CARD", + "score": 0.9931046366691589, + "box": [ + [ + 731, + 2222 + ], + [ + 1569, + 2227 + ], + [ + 1569, + 2272 + ], + [ + 731, + 2267 + ] + ] + }, + { + "text": "NOM/Sumame", + "score": 0.9803663492202759, + "box": [ + [ + 1000, + 2276 + ], + [ + 1163, + 2279 + ], + [ + 1162, + 2316 + ], + [ + 1000, + 2314 + ] + ] + }, + { + "text": "KARA", + "score": 0.9997424483299255, + "box": [ + [ + 1002, + 2308 + ], + [ + 1152, + 2308 + ], + [ + 1152, + 2361 + ], + [ + 1002, + 2361 + ] + ] + }, + { + "text": "A0034", + "score": 0.29388436675071716, + "box": [ + [ + 373, + 2359 + ], + [ + 404, + 2359 + ], + [ + 404, + 2498 + ], + [ + 373, + 2498 + ] + ] + }, + { + "text": "Prénoms/ Given names", + "score": 0.9784952998161316, + "box": [ + [ + 998, + 2404 + ], + [ + 1232, + 2406 + ], + [ + 1232, + 2444 + ], + [ + 997, + 2441 + ] + ] + }, + { + "text": "Josiane,Marie", + "score": 0.9998264908790588, + "box": [ + [ + 1002, + 2442 + ], + [ + 1305, + 2442 + ], + [ + 1305, + 2488 + ], + [ + 1002, + 2488 + ] + ] + }, + { + "text": "SEXE /Sex NATIONALITÉ/ Nationalty DATE DE NAISS. / Date of birth", + "score": 0.9410700798034668, + "box": [ + [ + 995, + 2525 + ], + [ + 1854, + 2536 + ], + [ + 1853, + 2581 + ], + [ + 995, + 2571 + ] + ] + }, + { + "text": "F", + "score": 0.9961280226707458, + "box": [ + [ + 1000, + 2571 + ], + [ + 1034, + 2571 + ], + [ + 1034, + 2607 + ], + [ + 1000, + 2607 + ] + ] + }, + { + "text": "FRA", + "score": 0.9998824000358582, + "box": [ + [ + 1146, + 2569 + ], + [ + 1241, + 2569 + ], + [ + 1241, + 2611 + ], + [ + 1146, + 2611 + ] + ] + }, + { + "text": "17 10 1949", + "score": 0.9922542572021484, + "box": [ + [ + 1491, + 2570 + ], + [ + 1729, + 2577 + ], + [ + 1727, + 2625 + ], + [ + 1490, + 2618 + ] + ] + }, + { + "text": "LIEU DE NAISSANCE / Place of birth", + "score": 0.9634525179862976, + "box": [ + [ + 995, + 2610 + ], + [ + 1406, + 2612 + ], + [ + 1406, + 2649 + ], + [ + 995, + 2646 + ] + ] + }, + { + "text": "VANDGUVRE-LÈS-NANCY", + "score": 0.9882041811943054, + "box": [ + [ + 999, + 2640 + ], + [ + 1563, + 2645 + ], + [ + 1563, + 2695 + ], + [ + 998, + 2690 + ] + ] + }, + { + "text": "N° DU DOCUMENT / Document No", + "score": 0.915422797203064, + "box": [ + [ + 993, + 2765 + ], + [ + 1394, + 2769 + ], + [ + 1394, + 2806 + ], + [ + 992, + 2802 + ] + ] + }, + { + "text": "DATE D'EXPIR./ Expiry date", + "score": 0.9698978066444397, + "box": [ + [ + 1486, + 2770 + ], + [ + 1802, + 2775 + ], + [ + 1801, + 2816 + ], + [ + 1485, + 2810 + ] + ] + }, + { + "text": "PM6LBDVJ2", + "score": 0.9999509453773499, + "box": [ + [ + 987, + 2803 + ], + [ + 1422, + 2808 + ], + [ + 1421, + 2872 + ], + [ + 986, + 2866 + ] + ] + }, + { + "text": "02 11 2033", + "score": 0.9991224408149719, + "box": [ + [ + 1487, + 2808 + ], + [ + 1726, + 2811 + ], + [ + 1726, + 2862 + ], + [ + 1486, + 2859 + ] + ] + }, + { + "text": "D", + "score": 0.4960181415081024, + "box": [ + [ + 1097, + 2865 + ], + [ + 1331, + 2865 + ], + [ + 1331, + 2975 + ], + [ + 1097, + 2975 + ] + ] + }, + { + "text": "046413", + "score": 0.9999680519104004, + "box": [ + [ + 1487, + 2886 + ], + [ + 1770, + 2891 + ], + [ + 1769, + 2960 + ], + [ + 1486, + 2954 + ] + ] + }, + { + "text": "TALLE/ Height", + "score": 0.9302806854248047, + "box": [ + [ + 695, + 3500 + ], + [ + 857, + 3502 + ], + [ + 856, + 3540 + ], + [ + 694, + 3537 + ] + ] + }, + { + "text": "DATE DE DELIVRANCE / Date ofissue", + "score": 0.9335067272186279, + "box": [ + [ + 926, + 3506 + ], + [ + 1307, + 3513 + ], + [ + 1306, + 3548 + ], + [ + 925, + 3540 + ] + ] + }, + { + "text": "1,69 m", + "score": 0.9858195185661316, + "box": [ + [ + 693, + 3534 + ], + [ + 862, + 3534 + ], + [ + 862, + 3585 + ], + [ + 693, + 3585 + ] + ] + }, + { + "text": "03 11 2023", + "score": 0.9933412671089172, + "box": [ + [ + 927, + 3537 + ], + [ + 1170, + 3541 + ], + [ + 1170, + 3584 + ], + [ + 927, + 3580 + ] + ] + }, + { + "text": "02 11 2033", + "score": 0.9226387739181519, + "box": [ + [ + 1520, + 3572 + ], + [ + 1723, + 3569 + ], + [ + 1724, + 3613 + ], + [ + 1521, + 3616 + ] + ] + }, + { + "text": "ADRESSEI Adaress", + "score": 0.9250975847244263, + "box": [ + [ + 692, + 3587 + ], + [ + 922, + 3590 + ], + [ + 922, + 3627 + ], + [ + 692, + 3624 + ] + ] + }, + { + "text": "23297M007941", + "score": 0.999860942363739, + "box": [ + [ + 360, + 3618 + ], + [ + 417, + 3621 + ], + [ + 396, + 3987 + ], + [ + 338, + 3983 + ] + ] + }, + { + "text": "42 RUE DE LA WANTZENAU", + "score": 0.9630455374717712, + "box": [ + [ + 694, + 3620 + ], + [ + 1153, + 3628 + ], + [ + 1153, + 3664 + ], + [ + 693, + 3657 + ] + ] + }, + { + "text": "67116 REICHSTETT", + "score": 0.975064754486084, + "box": [ + [ + 692, + 3658 + ], + [ + 998, + 3662 + ], + [ + 997, + 3698 + ], + [ + 692, + 3695 + ] + ] + }, + { + "text": "FRANCE", + "score": 0.9987226128578186, + "box": [ + [ + 690, + 3691 + ], + [ + 834, + 3695 + ], + [ + 832, + 3734 + ], + [ + 689, + 3730 + ] + ] + }, + { + "text": "PM6L0AUI3", + "score": 0.5835664868354797, + "box": [ + [ + 1797, + 3705 + ], + [ + 1827, + 3707 + ], + [ + 1822, + 3835 + ], + [ + 1791, + 3834 + ] + ] + }, + { + "text": "RÉPUBLIQUE FRANÇAISE", + "score": 0.9994015693664551, + "box": [ + [ + 562, + 3975 + ], + [ + 1091, + 3975 + ], + [ + 1091, + 4016 + ], + [ + 562, + 4016 + ] + ] + }, + { + "text": "IDFRAPM6LBDVJ26<<<<<<<<<<<<<<<", + "score": 0.9714798331260681, + "box": [ + [ + 323, + 4085 + ], + [ + 1758, + 4071 + ], + [ + 1759, + 4145 + ], + [ + 324, + 4159 + ] + ] + }, + { + "text": "4910176F3311020FRA<<<<<<<<<<<8", + "score": 0.9966246485710144, + "box": [ + [ + 318, + 4159 + ], + [ + 1752, + 4145 + ], + [ + 1753, + 4219 + ], + [ + 319, + 4233 + ] + ] + }, + { + "text": "KARA< Path: + image_path = Path(image_path) + if not image_path.is_file(): + raise FileNotFoundError(f"Image not found: {image_path}") + + output_dir_path = Path(output_dir) + output_dir_path.mkdir(parents=True, exist_ok=True) + + # Load image for visualization + bgr = cv2.imread(str(image_path)) + if bgr is None: + raise RuntimeError(f"Failed to read image with OpenCV: {image_path}") + image = cv2.cvtColor(bgr, cv2.COLOR_BGR2RGB) + + # Build predictor + predictor = ocr_predictor(det_arch=det_arch, reco_arch=reco_arch, pretrained=True) + + # Inference + doc = DocumentFile.from_images(str(image_path)) + result = predictor(doc) + + # Export structured result to JSON + export = result.export() + json_path = output_dir_path / f"{image_path.stem}_doctr.json" + with open(json_path, "w", encoding="utf-8") as f: + json.dump(export, f, ensure_ascii=False, indent=2) + + # Visualization + page = result.pages[0] + page_dict = export["pages"][0] + fig = visualize_page(page_dict, image=image) + vis_path = output_dir_path / f"{image_path.stem}_doctr_vis.png" + fig.savefig(vis_path, dpi=200, bbox_inches="tight") + plt.close(fig) + + # Write a simple text file with detected lines (if any) + lines = [] + for block in page.blocks: + for line in block.lines: + text = " ".join([word.value for word in line.words]) + if text: + lines.append(text) + if lines: + txt_path = output_dir_path / f"{image_path.stem}_doctr.txt" + txt_path.write_text("\n".join(lines), encoding="utf-8") + + return vis_path + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser(description="Run docTR OCR on an image") + parser.add_argument("--image", required=True, help="Path to input image") + parser.add_argument( + "--output-dir", + default="docTR_outputs", + help="Directory to store outputs (JSON/visualization)", + ) + parser.add_argument("--det-arch", default="db_resnet50", help="Detection architecture") + parser.add_argument("--reco-arch", default="crnn_vgg16_bn", help="Recognition architecture") + return parser.parse_args() + + +if __name__ == "__main__": + args = parse_args() + vis_path = run_doctr( + image_path=args.image, + output_dir=args.output_dir, + det_arch=args.det_arch, + reco_arch=args.reco_arch, + ) + print(f"Saved visualization to: {vis_path}") + +