commit 24060e4ce7233a97d636857f1cd14128c91d4aab Author: Nguyễn Phước Thành <93478665+Zeres-Engel@users.noreply.github.com> Date: Tue Aug 5 19:09:55 2025 +0700 init diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..70d391f --- /dev/null +++ b/.gitignore @@ -0,0 +1,17 @@ +*.png +*.json +*.jpg +*.zip +*.rar +*.pdf +*.docx +*.doc +*.xls +*.xlsx +*.ppt +*.pptx +*.txt +*.csv +*.json +*.pt +*.ipynb diff --git a/README_ID_Card_Processing.md b/README_ID_Card_Processing.md new file mode 100644 index 0000000..3e922a4 --- /dev/null +++ b/README_ID_Card_Processing.md @@ -0,0 +1,183 @@ +# ID Card Processing with YOLO Detection + +Hệ thống xử lý ID cards sử dụng YOLO để detect và crop, kết hợp với các phương pháp tiền xử lý để clean background và enhance chất lượng ảnh. + +## Tính năng chính + +- **YOLO Detection**: Detect và crop ID cards từ ảnh gốc +- **Background Removal**: 3 phương pháp loại bỏ background (GrabCut, Threshold, Contour) +- **Image Enhancement**: Cải thiện chất lượng ảnh cho OCR +- **Batch Processing**: Xử lý hàng loạt ảnh +- **Flexible Pipeline**: Có thể chạy từng bước riêng biệt + +## Cài đặt + +1. Cài đặt dependencies: +```bash +pip install -r requirements.txt +``` + +2. Cấu trúc thư mục: +``` +OCR/ +├── src/ +│ ├── model/ +│ │ ├── __init__.py +│ │ ├── yolo_detector.py +│ │ └── id_card_processor.py +│ └── ... +├── data/ +│ ├── IDcards/ # Thư mục chứa ảnh ID cards gốc +│ └── processed_id_cards/ # Thư mục output +├── id_card_processor_main.py +└── requirements.txt +``` + +## Sử dụng + +### 1. Full Pipeline (Detect + Preprocess) + +```bash +python id_card_processor_main.py \ + --input-dir "data/IDcards" \ + --output-dir "data/processed_id_cards" \ + --confidence 0.5 \ + --bg-removal grabcut \ + --target-size 800x600 \ + --save-annotated +``` + +### 2. Chỉ Detect và Crop + +```bash +python id_card_processor_main.py \ + --input-dir "data/IDcards" \ + --output-dir "data/processed_id_cards" \ + --detect-only \ + --save-annotated +``` + +### 3. Chỉ Preprocess (bỏ qua detection) + +```bash +python id_card_processor_main.py \ + --input-dir "data/IDcards" \ + --output-dir "data/processed_id_cards" \ + --preprocess-only \ + --bg-removal threshold \ + --target-size 800x600 +``` + +## Các tham số + +### Detection Parameters +- `--model-path`: Đường dẫn đến custom YOLO model (.pt file) +- `--confidence`: Ngưỡng confidence cho detection (default: 0.5) + +### Preprocessing Parameters +- `--bg-removal`: Phương pháp loại bỏ background + - `grabcut`: Sử dụng GrabCut algorithm (recommended) + - `threshold`: Sử dụng thresholding + - `contour`: Sử dụng contour detection + - `none`: Không loại bỏ background +- `--target-size`: Kích thước chuẩn hóa (width x height) + +### Output Options +- `--save-annotated`: Lưu ảnh với bounding boxes +- `--detect-only`: Chỉ chạy detection +- `--preprocess-only`: Chỉ chạy preprocessing + +## Output Structure + +``` +data/processed_id_cards/ +├── cropped/ # Ảnh đã được crop từ YOLO +│ ├── image1_card_1.jpg +│ ├── image1_card_2.jpg +│ └── ... +├── processed/ # Ảnh đã được preprocess +│ ├── image1_card_1_processed.jpg +│ ├── image1_card_2_processed.jpg +│ └── ... +└── annotated/ # Ảnh với bounding boxes (nếu có) + ├── image1_annotated.jpg + └── ... +``` + +## Ví dụ sử dụng + +### Ví dụ 1: Xử lý toàn bộ dataset +```bash +# Xử lý tất cả ảnh trong thư mục IDcards +python id_card_processor_main.py \ + --input-dir "data/IDcards" \ + --output-dir "data/processed_id_cards" \ + --confidence 0.6 \ + --bg-removal grabcut \ + --target-size 1024x768 \ + --save-annotated +``` + +### Ví dụ 2: Test với một vài ảnh +```bash +# Tạo thư mục test với một vài ảnh +mkdir -p data/test_images +# Copy một vài ảnh vào test_images + +# Chạy detection +python id_card_processor_main.py \ + --input-dir "data/test_images" \ + --output-dir "data/test_output" \ + --detect-only \ + --save-annotated +``` + +### Ví dụ 3: Sử dụng custom model +```bash +# Nếu bạn có custom YOLO model đã train +python id_card_processor_main.py \ + --input-dir "data/IDcards" \ + --output-dir "data/processed_id_cards" \ + --model-path "models/custom_id_card_model.pt" \ + --confidence 0.7 +``` + +## Lưu ý + +1. **YOLO Model**: Mặc định sử dụng YOLOv8n pre-trained. Nếu có custom model tốt hơn, hãy sử dụng `--model-path` + +2. **Background Removal**: + - `grabcut`: Tốt nhất cho ID cards có background phức tạp + - `threshold`: Nhanh, phù hợp với background đơn giản + - `contour`: Phù hợp với ID cards có viền rõ ràng + +3. **Performance**: + - Sử dụng GPU nếu có thể để tăng tốc độ detection + - Có thể điều chỉnh `--confidence` để cân bằng giữa precision và recall + +4. **Memory**: Với dataset lớn, có thể cần tăng memory hoặc xử lý theo batch nhỏ hơn + +## Troubleshooting + +### Lỗi thường gặp + +1. **No detections found**: + - Giảm `--confidence` xuống 0.3-0.4 + - Kiểm tra chất lượng ảnh input + +2. **Memory error**: + - Giảm batch size hoặc xử lý từng ảnh một + - Sử dụng CPU thay vì GPU + +3. **Poor background removal**: + - Thử các phương pháp khác nhau: `grabcut`, `threshold`, `contour` + - Điều chỉnh parameters trong code + +### Debug mode + +```bash +python id_card_processor_main.py \ + --input-dir "data/IDcards" \ + --output-dir "data/processed_id_cards" \ + --log-level DEBUG +``` \ No newline at end of file diff --git a/config/config.yaml b/config/config.yaml new file mode 100644 index 0000000..96b942f --- /dev/null +++ b/config/config.yaml @@ -0,0 +1,48 @@ +# Data Augmentation Configuration +# Main configuration file for image data augmentation + +# Paths configuration +paths: + input_dir: "data/Archive" + output_dir: "out" + log_file: "logs/data_augmentation.log" + +# Data augmentation parameters - ONLY ROTATION +augmentation: + # Geometric transformations - ONLY ROTATION + rotation: + enabled: true + angles: [30, 60, 120, 150, 180, 210, 240, 300, 330] # Specific rotation angles + probability: 1.0 # Always apply rotation + +# Processing configuration +processing: + target_size: [224, 224] # [width, height] + batch_size: 32 + num_augmentations: 3 # number of augmented versions per image + save_format: "jpg" + quality: 95 + +# Supported image formats +supported_formats: + - ".jpg" + - ".jpeg" + - ".png" + - ".bmp" + - ".tiff" + +# Logging configuration +logging: + level: "INFO" # DEBUG, INFO, WARNING, ERROR + format: "%(asctime)s - %(name)s - %(levelname)s - %(message)s" + handlers: + - type: "file" + filename: "logs/data_augmentation.log" + - type: "console" + +# Performance settings +performance: + num_workers: 4 + prefetch_factor: 2 + pin_memory: true + use_gpu: false \ No newline at end of file diff --git a/data_augmentation.log b/data_augmentation.log new file mode 100644 index 0000000..2405a6d --- /dev/null +++ b/data_augmentation.log @@ -0,0 +1,222 @@ +2025-08-05 18:53:06,981 - src.model.yolo_detector - INFO - Using pre-trained YOLOv8n model +2025-08-05 18:53:07,004 - src.model.yolo_detector - INFO - Using device: cuda +2025-08-05 18:53:07,038 - src.model.yolo_detector - INFO - Using pre-trained YOLOv8n model +2025-08-05 18:53:07,038 - src.model.yolo_detector - INFO - Using device: cuda +2025-08-05 18:53:07,361 - src.model.yolo_detector - INFO - Using pre-trained YOLOv8n model +2025-08-05 18:53:07,362 - src.model.yolo_detector - INFO - Using device: cuda +2025-08-05 18:53:07,363 - src.model.id_card_processor - INFO - Detecting and cropping ID cards... +2025-08-05 18:53:07,363 - src.model.yolo_detector - ERROR - No images found in data\IDcards +2025-08-05 18:53:07,364 - src.model.id_card_processor - INFO - Processing cropped ID cards... +2025-08-05 18:53:07,364 - src.model.id_card_processor - ERROR - No images found in data\test_output\cropped +2025-08-05 19:04:14,903 - src.model.yolo_detector - INFO - Using pre-trained YOLOv8n model +2025-08-05 19:04:14,995 - src.model.yolo_detector - INFO - Using device: cuda +2025-08-05 19:04:14,996 - src.model.id_card_processor - INFO - Detecting and cropping ID cards... +2025-08-05 19:04:14,997 - src.model.yolo_detector - INFO - Processing 29 images from data\IDcards and subdirectories +2025-08-05 19:04:14,998 - src.model.yolo_detector - INFO - Processing 1/29: im10.png +2025-08-05 19:04:19,785 - src.model.yolo_detector - INFO - Found 1 detections in im10.png +2025-08-05 19:04:19,813 - src.model.yolo_detector - INFO - Saved cropped image to data\processed_id_cards\cropped\Archive\im10_card_1.jpg +2025-08-05 19:04:19,813 - src.model.yolo_detector - INFO - Processed im10.png: 1 cards cropped +2025-08-05 19:04:19,814 - src.model.yolo_detector - INFO - Processing 2/29: im11.png +2025-08-05 19:04:19,926 - src.model.yolo_detector - INFO - Found 2 detections in im11.png +2025-08-05 19:04:19,937 - src.model.yolo_detector - INFO - Saved cropped image to data\processed_id_cards\cropped\Archive\im11_card_1.jpg +2025-08-05 19:04:19,946 - src.model.yolo_detector - INFO - Saved cropped image to data\processed_id_cards\cropped\Archive\im11_card_2.jpg +2025-08-05 19:04:19,946 - src.model.yolo_detector - INFO - Processed im11.png: 2 cards cropped +2025-08-05 19:04:19,946 - src.model.yolo_detector - INFO - Processing 3/29: im12.png +2025-08-05 19:04:20,056 - src.model.yolo_detector - INFO - Found 2 detections in im12.png +2025-08-05 19:04:20,069 - src.model.yolo_detector - INFO - Saved cropped image to data\processed_id_cards\cropped\Archive\im12_card_1.jpg +2025-08-05 19:04:20,082 - src.model.yolo_detector - INFO - Saved cropped image to data\processed_id_cards\cropped\Archive\im12_card_2.jpg +2025-08-05 19:04:20,083 - src.model.yolo_detector - INFO - Processed im12.png: 2 cards cropped +2025-08-05 19:04:20,083 - src.model.yolo_detector - INFO - Processing 4/29: im13.png +2025-08-05 19:04:20,116 - src.model.yolo_detector - INFO - Found 0 detections in im13.png +2025-08-05 19:04:20,117 - src.model.yolo_detector - WARNING - No ID cards detected in im13.png +2025-08-05 19:04:20,117 - src.model.yolo_detector - INFO - Processing 5/29: im14.png +2025-08-05 19:04:20,156 - src.model.yolo_detector - INFO - Found 1 detections in im14.png +2025-08-05 19:04:20,172 - src.model.yolo_detector - INFO - Saved cropped image to data\processed_id_cards\cropped\Archive\im14_card_1.jpg +2025-08-05 19:04:20,173 - src.model.yolo_detector - INFO - Processed im14.png: 1 cards cropped +2025-08-05 19:04:20,174 - src.model.yolo_detector - INFO - Processing 6/29: im15.png +2025-08-05 19:04:20,208 - src.model.yolo_detector - INFO - Found 1 detections in im15.png +2025-08-05 19:04:20,222 - src.model.yolo_detector - INFO - Saved cropped image to data\processed_id_cards\cropped\Archive\im15_card_1.jpg +2025-08-05 19:04:20,222 - src.model.yolo_detector - INFO - Processed im15.png: 1 cards cropped +2025-08-05 19:04:20,223 - src.model.yolo_detector - INFO - Processing 7/29: im1_.png +2025-08-05 19:04:20,466 - src.model.yolo_detector - INFO - Found 0 detections in im1_.png +2025-08-05 19:04:20,466 - src.model.yolo_detector - WARNING - No ID cards detected in im1_.png +2025-08-05 19:04:20,466 - src.model.yolo_detector - INFO - Processing 8/29: im2.png +2025-08-05 19:04:20,534 - src.model.yolo_detector - INFO - Found 2 detections in im2.png +2025-08-05 19:04:20,564 - src.model.yolo_detector - INFO - Saved cropped image to data\processed_id_cards\cropped\Archive\im2_card_1.jpg +2025-08-05 19:04:20,594 - src.model.yolo_detector - INFO - Saved cropped image to data\processed_id_cards\cropped\Archive\im2_card_2.jpg +2025-08-05 19:04:20,594 - src.model.yolo_detector - INFO - Processed im2.png: 2 cards cropped +2025-08-05 19:04:20,595 - src.model.yolo_detector - INFO - Processing 9/29: im3.png +2025-08-05 19:04:20,648 - src.model.yolo_detector - INFO - Found 1 detections in im3.png +2025-08-05 19:04:20,671 - src.model.yolo_detector - INFO - Saved cropped image to data\processed_id_cards\cropped\Archive\im3_card_1.jpg +2025-08-05 19:04:20,671 - src.model.yolo_detector - INFO - Processed im3.png: 1 cards cropped +2025-08-05 19:04:20,672 - src.model.yolo_detector - INFO - Processing 10/29: im4.png +2025-08-05 19:04:20,724 - src.model.yolo_detector - INFO - Found 1 detections in im4.png +2025-08-05 19:04:20,753 - src.model.yolo_detector - INFO - Saved cropped image to data\processed_id_cards\cropped\Archive\im4_card_1.jpg +2025-08-05 19:04:20,754 - src.model.yolo_detector - INFO - Processed im4.png: 1 cards cropped +2025-08-05 19:04:20,754 - src.model.yolo_detector - INFO - Processing 11/29: im5.png +2025-08-05 19:04:20,798 - src.model.yolo_detector - INFO - Found 2 detections in im5.png +2025-08-05 19:04:20,816 - src.model.yolo_detector - INFO - Saved cropped image to data\processed_id_cards\cropped\Archive\im5_card_1.jpg +2025-08-05 19:04:20,835 - src.model.yolo_detector - INFO - Saved cropped image to data\processed_id_cards\cropped\Archive\im5_card_2.jpg +2025-08-05 19:04:20,836 - src.model.yolo_detector - INFO - Processed im5.png: 2 cards cropped +2025-08-05 19:04:20,837 - src.model.yolo_detector - INFO - Processing 12/29: im6.png +2025-08-05 19:04:20,994 - src.model.yolo_detector - INFO - Found 2 detections in im6.png +2025-08-05 19:04:21,052 - src.model.yolo_detector - INFO - Saved cropped image to data\processed_id_cards\cropped\Archive\im6_card_1.jpg +2025-08-05 19:04:21,118 - src.model.yolo_detector - INFO - Saved cropped image to data\processed_id_cards\cropped\Archive\im6_card_2.jpg +2025-08-05 19:04:21,119 - src.model.yolo_detector - INFO - Processed im6.png: 2 cards cropped +2025-08-05 19:04:21,120 - src.model.yolo_detector - INFO - Processing 13/29: im7.png +2025-08-05 19:04:21,159 - src.model.yolo_detector - INFO - Found 3 detections in im7.png +2025-08-05 19:04:21,168 - src.model.yolo_detector - INFO - Saved cropped image to data\processed_id_cards\cropped\Archive\im7_card_1.jpg +2025-08-05 19:04:21,176 - src.model.yolo_detector - INFO - Saved cropped image to data\processed_id_cards\cropped\Archive\im7_card_2.jpg +2025-08-05 19:04:21,184 - src.model.yolo_detector - INFO - Saved cropped image to data\processed_id_cards\cropped\Archive\im7_card_3.jpg +2025-08-05 19:04:21,184 - src.model.yolo_detector - INFO - Processed im7.png: 3 cards cropped +2025-08-05 19:04:21,185 - src.model.yolo_detector - INFO - Processing 14/29: im8.png +2025-08-05 19:04:21,353 - src.model.yolo_detector - INFO - Found 2 detections in im8.png +2025-08-05 19:04:21,387 - src.model.yolo_detector - INFO - Saved cropped image to data\processed_id_cards\cropped\Archive\im8_card_1.jpg +2025-08-05 19:04:21,423 - src.model.yolo_detector - INFO - Saved cropped image to data\processed_id_cards\cropped\Archive\im8_card_2.jpg +2025-08-05 19:04:21,424 - src.model.yolo_detector - INFO - Processed im8.png: 2 cards cropped +2025-08-05 19:04:21,425 - src.model.yolo_detector - INFO - Processing 15/29: im9.png +2025-08-05 19:04:21,522 - src.model.yolo_detector - INFO - Found 1 detections in im9.png +2025-08-05 19:04:21,532 - src.model.yolo_detector - INFO - Saved cropped image to data\processed_id_cards\cropped\Archive\im9_card_1.jpg +2025-08-05 19:04:21,532 - src.model.yolo_detector - INFO - Processed im9.png: 1 cards cropped +2025-08-05 19:04:21,532 - src.model.yolo_detector - INFO - Processing 16/29: im10.png +2025-08-05 19:04:21,585 - src.model.yolo_detector - INFO - Found 3 detections in im10.png +2025-08-05 19:04:21,601 - src.model.yolo_detector - INFO - Saved cropped image to data\processed_id_cards\cropped\titre-sejour-fr\im10_card_1.jpg +2025-08-05 19:04:21,618 - src.model.yolo_detector - INFO - Saved cropped image to data\processed_id_cards\cropped\titre-sejour-fr\im10_card_2.jpg +2025-08-05 19:04:21,636 - src.model.yolo_detector - INFO - Saved cropped image to data\processed_id_cards\cropped\titre-sejour-fr\im10_card_3.jpg +2025-08-05 19:04:21,636 - src.model.yolo_detector - INFO - Processed im10.png: 3 cards cropped +2025-08-05 19:04:21,638 - src.model.yolo_detector - INFO - Processing 17/29: im11.png +2025-08-05 19:04:21,679 - src.model.yolo_detector - INFO - Found 2 detections in im11.png +2025-08-05 19:04:21,696 - src.model.yolo_detector - INFO - Saved cropped image to data\processed_id_cards\cropped\titre-sejour-fr\im11_card_1.jpg +2025-08-05 19:04:21,712 - src.model.yolo_detector - INFO - Saved cropped image to data\processed_id_cards\cropped\titre-sejour-fr\im11_card_2.jpg +2025-08-05 19:04:21,713 - src.model.yolo_detector - INFO - Processed im11.png: 2 cards cropped +2025-08-05 19:04:21,713 - src.model.yolo_detector - INFO - Processing 18/29: im12.png +2025-08-05 19:04:21,755 - src.model.yolo_detector - INFO - Found 0 detections in im12.png +2025-08-05 19:04:21,756 - src.model.yolo_detector - WARNING - No ID cards detected in im12.png +2025-08-05 19:04:21,756 - src.model.yolo_detector - INFO - Processing 19/29: im13.png +2025-08-05 19:04:21,793 - src.model.yolo_detector - INFO - Found 1 detections in im13.png +2025-08-05 19:04:21,806 - src.model.yolo_detector - INFO - Saved cropped image to data\processed_id_cards\cropped\titre-sejour-fr\im13_card_1.jpg +2025-08-05 19:04:21,806 - src.model.yolo_detector - INFO - Processed im13.png: 1 cards cropped +2025-08-05 19:04:21,806 - src.model.yolo_detector - INFO - Processing 20/29: im14.png +2025-08-05 19:04:21,846 - src.model.yolo_detector - INFO - Found 2 detections in im14.png +2025-08-05 19:04:21,862 - src.model.yolo_detector - INFO - Saved cropped image to data\processed_id_cards\cropped\titre-sejour-fr\im14_card_1.jpg +2025-08-05 19:04:21,877 - src.model.yolo_detector - INFO - Saved cropped image to data\processed_id_cards\cropped\titre-sejour-fr\im14_card_2.jpg +2025-08-05 19:04:21,877 - src.model.yolo_detector - INFO - Processed im14.png: 2 cards cropped +2025-08-05 19:04:21,878 - src.model.yolo_detector - INFO - Processing 21/29: im15.png +2025-08-05 19:04:21,914 - src.model.yolo_detector - INFO - Found 0 detections in im15.png +2025-08-05 19:04:21,914 - src.model.yolo_detector - WARNING - No ID cards detected in im15.png +2025-08-05 19:04:21,914 - src.model.yolo_detector - INFO - Processing 22/29: im1_.png +2025-08-05 19:04:21,959 - src.model.yolo_detector - INFO - Found 3 detections in im1_.png +2025-08-05 19:04:21,971 - src.model.yolo_detector - INFO - Saved cropped image to data\processed_id_cards\cropped\titre-sejour-fr\im1__card_1.jpg +2025-08-05 19:04:21,983 - src.model.yolo_detector - INFO - Saved cropped image to data\processed_id_cards\cropped\titre-sejour-fr\im1__card_2.jpg +2025-08-05 19:04:21,996 - src.model.yolo_detector - INFO - Saved cropped image to data\processed_id_cards\cropped\titre-sejour-fr\im1__card_3.jpg +2025-08-05 19:04:21,997 - src.model.yolo_detector - INFO - Processed im1_.png: 3 cards cropped +2025-08-05 19:04:21,997 - src.model.yolo_detector - INFO - Processing 23/29: im2.png +2025-08-05 19:04:22,101 - src.model.yolo_detector - INFO - Found 1 detections in im2.png +2025-08-05 19:04:22,174 - src.model.yolo_detector - INFO - Saved cropped image to data\processed_id_cards\cropped\titre-sejour-fr\im2_card_1.jpg +2025-08-05 19:04:22,174 - src.model.yolo_detector - INFO - Processed im2.png: 1 cards cropped +2025-08-05 19:04:22,176 - src.model.yolo_detector - INFO - Processing 24/29: im3.png +2025-08-05 19:04:22,220 - src.model.yolo_detector - INFO - Found 2 detections in im3.png +2025-08-05 19:04:22,235 - src.model.yolo_detector - INFO - Saved cropped image to data\processed_id_cards\cropped\titre-sejour-fr\im3_card_1.jpg +2025-08-05 19:04:22,251 - src.model.yolo_detector - INFO - Saved cropped image to data\processed_id_cards\cropped\titre-sejour-fr\im3_card_2.jpg +2025-08-05 19:04:22,252 - src.model.yolo_detector - INFO - Processed im3.png: 2 cards cropped +2025-08-05 19:04:22,252 - src.model.yolo_detector - INFO - Processing 25/29: im5.png +2025-08-05 19:04:22,307 - src.model.yolo_detector - INFO - Found 1 detections in im5.png +2025-08-05 19:04:22,316 - src.model.yolo_detector - INFO - Saved cropped image to data\processed_id_cards\cropped\titre-sejour-fr\im5_card_1.jpg +2025-08-05 19:04:22,316 - src.model.yolo_detector - INFO - Processed im5.png: 1 cards cropped +2025-08-05 19:04:22,317 - src.model.yolo_detector - INFO - Processing 26/29: im6.png +2025-08-05 19:04:22,375 - src.model.yolo_detector - INFO - Found 2 detections in im6.png +2025-08-05 19:04:22,387 - src.model.yolo_detector - INFO - Saved cropped image to data\processed_id_cards\cropped\titre-sejour-fr\im6_card_1.jpg +2025-08-05 19:04:22,397 - src.model.yolo_detector - INFO - Saved cropped image to data\processed_id_cards\cropped\titre-sejour-fr\im6_card_2.jpg +2025-08-05 19:04:22,398 - src.model.yolo_detector - INFO - Processed im6.png: 2 cards cropped +2025-08-05 19:04:22,399 - src.model.yolo_detector - INFO - Processing 27/29: im7.png +2025-08-05 19:04:22,441 - src.model.yolo_detector - INFO - Found 1 detections in im7.png +2025-08-05 19:04:22,458 - src.model.yolo_detector - INFO - Saved cropped image to data\processed_id_cards\cropped\titre-sejour-fr\im7_card_1.jpg +2025-08-05 19:04:22,459 - src.model.yolo_detector - INFO - Processed im7.png: 1 cards cropped +2025-08-05 19:04:22,460 - src.model.yolo_detector - INFO - Processing 28/29: im8.png +2025-08-05 19:04:22,492 - src.model.yolo_detector - INFO - Found 2 detections in im8.png +2025-08-05 19:04:22,502 - src.model.yolo_detector - INFO - Saved cropped image to data\processed_id_cards\cropped\titre-sejour-fr\im8_card_1.jpg +2025-08-05 19:04:22,509 - src.model.yolo_detector - INFO - Saved cropped image to data\processed_id_cards\cropped\titre-sejour-fr\im8_card_2.jpg +2025-08-05 19:04:22,510 - src.model.yolo_detector - INFO - Processed im8.png: 2 cards cropped +2025-08-05 19:04:22,510 - src.model.yolo_detector - INFO - Processing 29/29: im9.png +2025-08-05 19:04:22,540 - src.model.yolo_detector - INFO - Found 1 detections in im9.png +2025-08-05 19:04:22,546 - src.model.yolo_detector - INFO - Saved cropped image to data\processed_id_cards\cropped\titre-sejour-fr\im9_card_1.jpg +2025-08-05 19:04:22,546 - src.model.yolo_detector - INFO - Processed im9.png: 1 cards cropped +2025-08-05 19:04:22,546 - src.model.yolo_detector - INFO - Batch processing completed: +2025-08-05 19:04:22,548 - src.model.yolo_detector - INFO - - Total images: 29 +2025-08-05 19:04:22,548 - src.model.yolo_detector - INFO - - Processed: 25 +2025-08-05 19:04:22,548 - src.model.yolo_detector - INFO - - Total detections: 42 +2025-08-05 19:04:22,549 - src.model.yolo_detector - INFO - - Total cropped: 42 +2025-08-05 19:04:22,549 - src.model.id_card_processor - INFO - Processing cropped ID cards... +2025-08-05 19:04:22,552 - src.model.id_card_processor - INFO - Processing 42 images from data\processed_id_cards\cropped and subdirectories +2025-08-05 19:04:22,552 - src.model.id_card_processor - INFO - Processing 1/42: im10_card_1.jpg +2025-08-05 19:04:22,564 - src.model.id_card_processor - INFO - Removing background from im10_card_1.jpg +2025-08-05 19:04:22,877 - src.model.id_card_processor - INFO - Enhancing im10_card_1.jpg +2025-08-05 19:04:23,016 - src.model.id_card_processor - INFO - Normalizing im10_card_1.jpg +2025-08-05 19:04:23,023 - src.model.id_card_processor - INFO - Processed im10_card_1.jpg +2025-08-05 19:04:23,023 - src.model.id_card_processor - INFO - Processing 2/42: im11_card_1.jpg +2025-08-05 19:04:23,034 - src.model.id_card_processor - INFO - Removing background from im11_card_1.jpg +2025-08-05 19:04:23,264 - src.model.id_card_processor - INFO - Enhancing im11_card_1.jpg +2025-08-05 19:04:23,265 - src.model.id_card_processor - INFO - Normalizing im11_card_1.jpg +2025-08-05 19:04:23,270 - src.model.id_card_processor - INFO - Processed im11_card_1.jpg +2025-08-05 19:04:23,271 - src.model.id_card_processor - INFO - Processing 3/42: im11_card_2.jpg +2025-08-05 19:04:23,282 - src.model.id_card_processor - INFO - Removing background from im11_card_2.jpg +2025-08-05 19:04:23,312 - src.model.id_card_processor - INFO - Enhancing im11_card_2.jpg +2025-08-05 19:04:23,313 - src.model.id_card_processor - INFO - Normalizing im11_card_2.jpg +2025-08-05 19:04:23,316 - src.model.id_card_processor - INFO - Processed im11_card_2.jpg +2025-08-05 19:04:23,316 - src.model.id_card_processor - INFO - Processing 4/42: im12_card_1.jpg +2025-08-05 19:04:23,328 - src.model.id_card_processor - INFO - Removing background from im12_card_1.jpg +2025-08-05 19:04:23,670 - src.model.id_card_processor - INFO - Enhancing im12_card_1.jpg +2025-08-05 19:04:23,671 - src.model.id_card_processor - INFO - Normalizing im12_card_1.jpg +2025-08-05 19:04:23,675 - src.model.id_card_processor - INFO - Processed im12_card_1.jpg +2025-08-05 19:04:23,676 - src.model.id_card_processor - INFO - Processing 5/42: im12_card_2.jpg +2025-08-05 19:04:23,686 - src.model.id_card_processor - INFO - Removing background from im12_card_2.jpg +2025-08-05 19:04:29,279 - src.model.id_card_processor - INFO - Enhancing im12_card_2.jpg +2025-08-05 19:04:29,284 - src.model.id_card_processor - INFO - Normalizing im12_card_2.jpg +2025-08-05 19:04:29,289 - src.model.id_card_processor - INFO - Processed im12_card_2.jpg +2025-08-05 19:04:29,290 - src.model.id_card_processor - INFO - Processing 6/42: im14_card_1.jpg +2025-08-05 19:04:29,301 - src.model.id_card_processor - INFO - Removing background from im14_card_1.jpg +2025-08-05 19:04:29,774 - src.model.id_card_processor - INFO - Enhancing im14_card_1.jpg +2025-08-05 19:04:29,775 - src.model.id_card_processor - INFO - Normalizing im14_card_1.jpg +2025-08-05 19:04:29,779 - src.model.id_card_processor - INFO - Processed im14_card_1.jpg +2025-08-05 19:04:29,780 - src.model.id_card_processor - INFO - Processing 7/42: im15_card_1.jpg +2025-08-05 19:04:29,791 - src.model.id_card_processor - INFO - Removing background from im15_card_1.jpg +2025-08-05 19:04:30,009 - src.model.id_card_processor - INFO - Enhancing im15_card_1.jpg +2025-08-05 19:04:30,010 - src.model.id_card_processor - INFO - Normalizing im15_card_1.jpg +2025-08-05 19:04:30,015 - src.model.id_card_processor - INFO - Processed im15_card_1.jpg +2025-08-05 19:04:30,015 - src.model.id_card_processor - INFO - Processing 8/42: im2_card_1.jpg +2025-08-05 19:04:30,017 - src.model.id_card_processor - INFO - Removing background from im2_card_1.jpg +2025-08-05 19:04:31,861 - src.model.id_card_processor - INFO - Enhancing im2_card_1.jpg +2025-08-05 19:04:31,863 - src.model.id_card_processor - INFO - Normalizing im2_card_1.jpg +2025-08-05 19:04:31,869 - src.model.id_card_processor - INFO - Processed im2_card_1.jpg +2025-08-05 19:04:31,869 - src.model.id_card_processor - INFO - Processing 9/42: im2_card_2.jpg +2025-08-05 19:04:31,884 - src.model.id_card_processor - INFO - Removing background from im2_card_2.jpg +2025-08-05 19:04:38,985 - src.model.id_card_processor - INFO - Enhancing im2_card_2.jpg +2025-08-05 19:04:38,996 - src.model.id_card_processor - INFO - Normalizing im2_card_2.jpg +2025-08-05 19:04:39,007 - src.model.id_card_processor - INFO - Processed im2_card_2.jpg +2025-08-05 19:04:39,008 - src.model.id_card_processor - INFO - Processing 10/42: im3_card_1.jpg +2025-08-05 19:04:39,009 - src.model.id_card_processor - INFO - Removing background from im3_card_1.jpg +2025-08-05 19:04:39,177 - src.model.id_card_processor - INFO - Enhancing im3_card_1.jpg +2025-08-05 19:04:39,178 - src.model.id_card_processor - INFO - Normalizing im3_card_1.jpg +2025-08-05 19:04:39,182 - src.model.id_card_processor - INFO - Processed im3_card_1.jpg +2025-08-05 19:04:39,182 - src.model.id_card_processor - INFO - Processing 11/42: im4_card_1.jpg +2025-08-05 19:04:39,184 - src.model.id_card_processor - INFO - Removing background from im4_card_1.jpg +2025-08-05 19:04:39,374 - src.model.id_card_processor - INFO - Enhancing im4_card_1.jpg +2025-08-05 19:04:39,375 - src.model.id_card_processor - INFO - Normalizing im4_card_1.jpg +2025-08-05 19:04:39,379 - src.model.id_card_processor - INFO - Processed im4_card_1.jpg +2025-08-05 19:04:39,379 - src.model.id_card_processor - INFO - Processing 12/42: im5_card_1.jpg +2025-08-05 19:04:39,389 - src.model.id_card_processor - INFO - Removing background from im5_card_1.jpg +2025-08-05 19:04:39,842 - src.model.id_card_processor - INFO - Enhancing im5_card_1.jpg +2025-08-05 19:04:39,843 - src.model.id_card_processor - INFO - Normalizing im5_card_1.jpg +2025-08-05 19:04:39,846 - src.model.id_card_processor - INFO - Processed im5_card_1.jpg +2025-08-05 19:04:39,846 - src.model.id_card_processor - INFO - Processing 13/42: im5_card_2.jpg +2025-08-05 19:04:39,859 - src.model.id_card_processor - INFO - Removing background from im5_card_2.jpg +2025-08-05 19:04:42,430 - src.model.id_card_processor - INFO - Enhancing im5_card_2.jpg +2025-08-05 19:04:42,434 - src.model.id_card_processor - INFO - Normalizing im5_card_2.jpg +2025-08-05 19:04:42,438 - src.model.id_card_processor - INFO - Processed im5_card_2.jpg +2025-08-05 19:04:42,439 - src.model.id_card_processor - INFO - Processing 14/42: im6_card_1.jpg +2025-08-05 19:04:42,449 - src.model.id_card_processor - INFO - Removing background from im6_card_1.jpg +2025-08-05 19:04:47,647 - src.model.id_card_processor - INFO - Enhancing im6_card_1.jpg +2025-08-05 19:04:47,652 - src.model.id_card_processor - INFO - Normalizing im6_card_1.jpg +2025-08-05 19:04:47,657 - src.model.id_card_processor - INFO - Processed im6_card_1.jpg +2025-08-05 19:04:47,657 - src.model.id_card_processor - INFO - Processing 15/42: im6_card_2.jpg +2025-08-05 19:04:47,680 - src.model.id_card_processor - INFO - Removing background from im6_card_2.jpg diff --git a/id_card_processor_main.py b/id_card_processor_main.py new file mode 100644 index 0000000..5dabff9 --- /dev/null +++ b/id_card_processor_main.py @@ -0,0 +1,234 @@ +""" +Main script for ID Card Processing with YOLO Detection +""" +import argparse +import sys +from pathlib import Path +from typing import Dict, Any +import logging + +# Add src to path for imports +sys.path.append(str(Path(__file__).parent / "src")) + +from src.model.yolo_detector import YOLODetector +from src.model.id_card_processor import IDCardProcessor +from src.utils import setup_logging + +def parse_arguments(): + """Parse command line arguments""" + parser = argparse.ArgumentParser(description="ID Card Processing with YOLO Detection") + + parser.add_argument( + "--input-dir", + type=str, + required=True, + help="Input directory containing ID card images" + ) + + parser.add_argument( + "--output-dir", + type=str, + default="data/processed_id_cards", + help="Output directory for processed images" + ) + + parser.add_argument( + "--model-path", + type=str, + help="Path to custom YOLO model (.pt file)" + ) + + parser.add_argument( + "--confidence", + type=float, + default=0.5, + help="Confidence threshold for YOLO detection" + ) + + parser.add_argument( + "--detect-only", + action="store_true", + help="Only detect and crop ID cards, skip preprocessing" + ) + + parser.add_argument( + "--preprocess-only", + action="store_true", + help="Skip detection, directly preprocess images" + ) + + parser.add_argument( + "--bg-removal", + type=str, + default="grabcut", + choices=["grabcut", "threshold", "contour", "none"], + help="Background removal method" + ) + + parser.add_argument( + "--target-size", + type=str, + default="800x600", + help="Target size for normalization (width x height)" + ) + + parser.add_argument( + "--save-annotated", + action="store_true", + help="Save annotated images with bounding boxes" + ) + + parser.add_argument( + "--log-level", + type=str, + default="INFO", + choices=["DEBUG", "INFO", "WARNING", "ERROR"], + help="Logging level" + ) + + return parser.parse_args() + +def parse_size(size_str: str) -> tuple: + """Parse size string like '800x600' to tuple (800, 600)""" + try: + width, height = map(int, size_str.split('x')) + return (width, height) + except ValueError: + print(f"Invalid size format: {size_str}. Expected format: widthxheight") + sys.exit(1) + +def main(): + """Main function""" + args = parse_arguments() + + # Setup logging + logging_config = {"level": args.log_level} + logger = setup_logging(logging_config.get("level", "INFO")) + logger.info("Starting ID Card Processing") + + # Parse paths + input_dir = Path(args.input_dir) + output_dir = Path(args.output_dir) + + # Check if input directory exists + if not input_dir.exists(): + logger.error(f"Input directory does not exist: {input_dir}") + sys.exit(1) + + # Create output directory + output_dir.mkdir(parents=True, exist_ok=True) + + # Parse target size + target_size = parse_size(args.target_size) + + # Initialize YOLO detector + logger.info("Initializing YOLO detector...") + yolo_detector = YOLODetector( + model_path=args.model_path, + confidence=args.confidence + ) + + # Initialize ID card processor + logger.info("Initializing ID card processor...") + id_processor = IDCardProcessor(yolo_detector) + + if args.detect_only: + # Only detect and crop ID cards + logger.info("Running YOLO detection only...") + results = yolo_detector.batch_process( + input_dir, + output_dir / "cropped", + save_annotated=args.save_annotated + ) + + print("\n" + "="*50) + print("YOLO DETECTION RESULTS") + print("="*50) + print(f"Total images: {results['total_images']}") + print(f"Processed images: {results['processed_images']}") + print(f"Total detections: {results['total_detections']}") + print(f"Total cropped: {results['total_cropped']}") + print(f"Output directory: {output_dir / 'cropped'}") + print("="*50) + + elif args.preprocess_only: + # Skip detection, directly preprocess + logger.info("Running preprocessing only...") + results = id_processor.batch_process_id_cards( + input_dir, + output_dir / "processed", + detect_first=False, + remove_bg=args.bg_removal != "none", + enhance=True, + normalize=True, + target_size=target_size + ) + + print("\n" + "="*50) + print("PREPROCESSING RESULTS") + print("="*50) + print(f"Total images: {results['total_images']}") + print(f"Processed images: {results['processed_images']}") + print(f"Output directory: {output_dir / 'processed'}") + print("="*50) + + else: + # Full pipeline: detect + preprocess + logger.info("Running full pipeline: detection + preprocessing...") + + # Step 1: Detect and crop ID cards + logger.info("Step 1: Detecting and cropping ID cards...") + detection_results = yolo_detector.batch_process( + input_dir, + output_dir / "cropped", + save_annotated=args.save_annotated + ) + + # Step 2: Preprocess cropped images + cropped_dir = output_dir / "cropped" + if cropped_dir.exists(): + logger.info("Step 2: Preprocessing cropped ID cards...") + preprocessing_results = id_processor.batch_process_id_cards( + cropped_dir, + output_dir / "processed", + detect_first=False, + remove_bg=args.bg_removal != "none", + enhance=True, + normalize=True, + target_size=target_size + ) + else: + logger.warning("No cropped images found, preprocessing original images") + preprocessing_results = id_processor.batch_process_id_cards( + input_dir, + output_dir / "processed", + detect_first=False, + remove_bg=args.bg_removal != "none", + enhance=True, + normalize=True, + target_size=target_size + ) + + # Print summary + print("\n" + "="*50) + print("FULL PIPELINE RESULTS") + print("="*50) + print("DETECTION PHASE:") + print(f" - Total images: {detection_results['total_images']}") + print(f" - Processed images: {detection_results['processed_images']}") + print(f" - Total detections: {detection_results['total_detections']}") + print(f" - Total cropped: {detection_results['total_cropped']}") + print("\nPREPROCESSING PHASE:") + print(f" - Total images: {preprocessing_results['total_images']}") + print(f" - Processed images: {preprocessing_results['processed_images']}") + print(f"\nOutput directories:") + print(f" - Cropped images: {output_dir / 'cropped'}") + print(f" - Processed images: {output_dir / 'processed'}") + if args.save_annotated: + print(f" - Annotated images: {output_dir / 'cropped'}") + print("="*50) + + logger.info("ID Card Processing completed successfully") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/main.py b/main.py new file mode 100644 index 0000000..c544951 --- /dev/null +++ b/main.py @@ -0,0 +1,276 @@ +""" +Main script for data augmentation +""" +import argparse +import sys +from pathlib import Path +from typing import Dict, Any + +# Add src to path for imports +sys.path.append(str(Path(__file__).parent / "src")) + +from src.config_manager import ConfigManager +from src.data_augmentation import DataAugmentation +from src.image_processor import ImageProcessor +from src.utils import setup_logging, get_image_files, print_progress + +def parse_arguments(): + """Parse command line arguments""" + parser = argparse.ArgumentParser(description="Image Data Augmentation Tool") + + parser.add_argument( + "--config", + type=str, + default="config/config.yaml", + help="Path to configuration file" + ) + + parser.add_argument( + "--preset", + type=str, + help="Apply augmentation preset (light, medium, heavy, ocr_optimized, document)" + ) + + parser.add_argument( + "--input-dir", + type=str, + help="Input directory containing images (overrides config)" + ) + + parser.add_argument( + "--output-dir", + type=str, + help="Output directory for augmented images (overrides config)" + ) + + parser.add_argument( + "--num-augmentations", + type=int, + help="Number of augmented versions per image (overrides config)" + ) + + parser.add_argument( + "--target-size", + type=str, + help="Target size for images (width x height) (overrides config)" + ) + + parser.add_argument( + "--preview", + action="store_true", + help="Preview augmentation on first image only" + ) + + parser.add_argument( + "--info", + action="store_true", + help="Show information about images in input directory" + ) + + + + parser.add_argument( + "--list-presets", + action="store_true", + help="List available presets and exit" + ) + + parser.add_argument( + "--log-level", + type=str, + default="INFO", + choices=["DEBUG", "INFO", "WARNING", "ERROR"], + help="Logging level" + ) + + return parser.parse_args() + +def parse_range(range_str: str) -> tuple: + """Parse range string like '0.8-1.2' to tuple (0.8, 1.2)""" + try: + min_val, max_val = map(float, range_str.split('-')) + return (min_val, max_val) + except ValueError: + print(f"Invalid range format: {range_str}. Expected format: min-max") + sys.exit(1) + +def parse_size(size_str: str) -> tuple: + """Parse size string like '224x224' to tuple (224, 224)""" + try: + width, height = map(int, size_str.split('x')) + return (width, height) + except ValueError: + print(f"Invalid size format: {size_str}. Expected format: widthxheight") + sys.exit(1) + +def show_image_info(input_dir: Path): + """Show information about images in input directory""" + image_files = get_image_files(input_dir) + + if not image_files: + print(f"No images found in {input_dir}") + return + + print(f"\nFound {len(image_files)} images in {input_dir}") + print("\nImage Information:") + print("-" * 80) + + processor = ImageProcessor() + total_size = 0 + + for i, image_path in enumerate(image_files[:10]): # Show first 10 images + info = processor.get_image_info(image_path) + if info: + print(f"{i+1:2d}. {image_path.name}") + print(f" Size: {info['width']}x{info['height']} pixels") + print(f" Channels: {info['channels']}") + print(f" File size: {info['file_size_mb']} MB") + print(f" Format: {info['format']}") + total_size += info['file_size_mb'] + + if len(image_files) > 10: + print(f"\n... and {len(image_files) - 10} more images") + + print(f"\nTotal file size: {total_size:.2f} MB") + print(f"Average file size: {total_size/len(image_files):.2f} MB") + +def preview_augmentation(input_dir: Path, output_dir: Path, config: Dict[str, Any]): + """Preview augmentation on first image""" + image_files = get_image_files(input_dir) + + if not image_files: + print(f"No images found in {input_dir}") + return + + print(f"\nPreviewing augmentation on: {image_files[0].name}") + + # Create augmentation instance + augmenter = DataAugmentation(config) + + # Augment first image + augmented_paths = augmenter.augment_image_file( + image_files[0], + output_dir, + num_augmentations=3 + ) + + if augmented_paths: + print(f"Created {len(augmented_paths)} augmented versions:") + for i, path in enumerate(augmented_paths, 1): + print(f" {i}. {path.name}") + else: + print("Failed to create augmented images") + +def main(): + """Main function""" + args = parse_arguments() + + # Initialize config manager + config_manager = ConfigManager(args.config) + + # List presets if requested + if args.list_presets: + presets = config_manager.list_presets() + print("\nAvailable presets:") + for preset in presets: + print(f" - {preset}") + return + + # Apply preset if specified + if args.preset: + if not config_manager.apply_preset(args.preset): + sys.exit(1) + + + + # Override config with command line arguments + if args.input_dir: + config_manager.update_config({"paths": {"input_dir": args.input_dir}}) + + if args.output_dir: + config_manager.update_config({"paths": {"output_dir": args.output_dir}}) + + if args.num_augmentations: + config_manager.update_config({"processing": {"num_augmentations": args.num_augmentations}}) + + if args.target_size: + target_size = parse_size(args.target_size) + config_manager.update_config({"processing": {"target_size": list(target_size)}}) + + # Get configuration + config = config_manager.get_config() + paths_config = config_manager.get_paths_config() + processing_config = config_manager.get_processing_config() + augmentation_config = config_manager.get_augmentation_config() + logging_config = config_manager.get_logging_config() + + # Setup logging + logger = setup_logging(logging_config.get("level", "INFO")) + logger.info("Starting data augmentation process") + + # Parse paths + input_dir = Path(paths_config.get("input_dir", "data/dataset/training_data/images")) + output_dir = Path(paths_config.get("output_dir", "data/augmented_data")) + + # Check if input directory exists + if not input_dir.exists(): + logger.error(f"Input directory does not exist: {input_dir}") + sys.exit(1) + + # Create output directory + output_dir.mkdir(parents=True, exist_ok=True) + + # Show image information if requested + if args.info: + show_image_info(input_dir) + return + + # Preview augmentation if requested + if args.preview: + preview_augmentation(input_dir, output_dir, augmentation_config) + return + + # Get image files + image_files = get_image_files(input_dir) + + if not image_files: + logger.error(f"No images found in {input_dir}") + sys.exit(1) + + logger.info(f"Found {len(image_files)} images to process") + logger.info(f"Output directory: {output_dir}") + logger.info(f"Number of augmentations per image: {processing_config.get('num_augmentations', 3)}") + logger.info(f"Target size: {processing_config.get('target_size', [224, 224])}") + + # Create augmentation instance with new config + augmenter = DataAugmentation(augmentation_config) + + # Update target size + target_size = tuple(processing_config.get("target_size", [224, 224])) + augmenter.image_processor.target_size = target_size + + # Perform batch augmentation + logger.info("Starting batch augmentation...") + results = augmenter.batch_augment( + input_dir, + output_dir, + num_augmentations=processing_config.get("num_augmentations", 3) + ) + + # Get and display summary + summary = augmenter.get_augmentation_summary(results) + + print("\n" + "="*50) + print("AUGMENTATION SUMMARY") + print("="*50) + print(f"Original images: {summary['total_original_images']}") + print(f"Augmented images: {summary['total_augmented_images']}") + print(f"Augmentation ratio: {summary['augmentation_ratio']:.2f}") + print(f"Successful augmentations: {summary['successful_augmentations']}") + print(f"Output directory: {output_dir}") + print("="*50) + + logger.info("Data augmentation completed successfully") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/src/__init__.py b/src/__init__.py new file mode 100644 index 0000000..47078f4 --- /dev/null +++ b/src/__init__.py @@ -0,0 +1,23 @@ +""" +Data Augmentation Package +""" + +__version__ = "1.0.0" +__author__ = "OCR Data Augmentation Tool" + +from .utils import * +from .image_processor import ImageProcessor +from .data_augmentation import DataAugmentation +from .config_manager import ConfigManager + +__all__ = [ + "ImageProcessor", + "DataAugmentation", + "ConfigManager", + "setup_logging", + "get_image_files", + "load_image", + "save_image", + "validate_image", + "print_progress", +] \ No newline at end of file diff --git a/src/__pycache__/__init__.cpython-313.pyc b/src/__pycache__/__init__.cpython-313.pyc new file mode 100644 index 0000000..49581ca Binary files /dev/null and b/src/__pycache__/__init__.cpython-313.pyc differ diff --git a/src/__pycache__/__init__.cpython-39.pyc b/src/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000..9f1a7a0 Binary files /dev/null and b/src/__pycache__/__init__.cpython-39.pyc differ diff --git a/src/__pycache__/config_manager.cpython-39.pyc b/src/__pycache__/config_manager.cpython-39.pyc new file mode 100644 index 0000000..d629082 Binary files /dev/null and b/src/__pycache__/config_manager.cpython-39.pyc differ diff --git a/src/__pycache__/data_augmentation.cpython-39.pyc b/src/__pycache__/data_augmentation.cpython-39.pyc new file mode 100644 index 0000000..8caddd1 Binary files /dev/null and b/src/__pycache__/data_augmentation.cpython-39.pyc differ diff --git a/src/__pycache__/image_processor.cpython-39.pyc b/src/__pycache__/image_processor.cpython-39.pyc new file mode 100644 index 0000000..93f0a66 Binary files /dev/null and b/src/__pycache__/image_processor.cpython-39.pyc differ diff --git a/src/__pycache__/utils.cpython-313.pyc b/src/__pycache__/utils.cpython-313.pyc new file mode 100644 index 0000000..3ac5118 Binary files /dev/null and b/src/__pycache__/utils.cpython-313.pyc differ diff --git a/src/__pycache__/utils.cpython-39.pyc b/src/__pycache__/utils.cpython-39.pyc new file mode 100644 index 0000000..b62b19a Binary files /dev/null and b/src/__pycache__/utils.cpython-39.pyc differ diff --git a/src/config.py b/src/config.py new file mode 100644 index 0000000..2231176 --- /dev/null +++ b/src/config.py @@ -0,0 +1,40 @@ +""" +Configuration file for data augmentation +""" +import os +from pathlib import Path + +# Paths +BASE_DIR = Path(__file__).parent.parent +DATA_DIR = BASE_DIR / "data" +INPUT_IMAGES_DIR = DATA_DIR / "dataset" / "training_data" / "images" +OUTPUT_DIR = DATA_DIR / "augmented_data" + +# Data augmentation parameters +AUGMENTATION_CONFIG = { + "rotation_range": 15, # degrees + "width_shift_range": 0.1, # fraction of total width + "height_shift_range": 0.1, # fraction of total height + "brightness_range": [0.8, 1.2], # brightness factor + "zoom_range": [0.9, 1.1], # zoom factor + "horizontal_flip": True, + "vertical_flip": False, + "fill_mode": "nearest", + "cval": 0, + "rescale": 1./255, +} + +# Processing parameters +PROCESSING_CONFIG = { + "target_size": (224, 224), # (width, height) + "batch_size": 32, + "num_augmentations": 3, # number of augmented versions per image + "save_format": "jpg", + "quality": 95, +} + +# Supported image formats +SUPPORTED_FORMATS = ['.jpg', '.jpeg', '.png', '.bmp', '.tiff'] + +# Create output directory if it doesn't exist +OUTPUT_DIR.mkdir(parents=True, exist_ok=True) \ No newline at end of file diff --git a/src/config_manager.py b/src/config_manager.py new file mode 100644 index 0000000..62d612f --- /dev/null +++ b/src/config_manager.py @@ -0,0 +1,175 @@ +""" +Configuration manager for data augmentation +""" +import yaml +import os +from pathlib import Path +from typing import Dict, Any, Optional, Union + +class ConfigManager: + """Manages configuration loading and validation""" + + def __init__(self, config_path: Optional[Union[str, Path]] = None): + """ + Initialize ConfigManager + + Args: + config_path: Path to main config file + """ + self.config_path = Path(config_path) if config_path else Path("config/config.yaml") + self.config = {} + + self._load_config() + + def _load_config(self): + """Load main configuration file""" + try: + if self.config_path.exists(): + with open(self.config_path, 'r', encoding='utf-8') as f: + self.config = yaml.safe_load(f) + print(f"✅ Loaded configuration from {self.config_path}") + else: + print(f"⚠️ Config file not found: {self.config_path}") + self.config = self._get_default_config() + except Exception as e: + print(f"❌ Error loading config: {e}") + self.config = self._get_default_config() + + def _get_default_config(self) -> Dict[str, Any]: + """Get default configuration""" + return { + "paths": { + "input_dir": "data/dataset/training_data/images", + "output_dir": "data/augmented_data", + "log_file": "logs/data_augmentation.log" + }, + "augmentation": { + "rotation": {"enabled": True, "angles": [30, 60, 120, 150, 180, 210, 240, 300, 330], "probability": 1.0} + }, + "processing": { + "target_size": [224, 224], + "batch_size": 32, + "num_augmentations": 3, + "save_format": "jpg", + "quality": 95 + }, + "supported_formats": [".jpg", ".jpeg", ".png", ".bmp", ".tiff"], + "logging": { + "level": "INFO", + "format": "%(asctime)s - %(name)s - %(levelname)s - %(message)s" + }, + "performance": { + "num_workers": 4, + "prefetch_factor": 2, + "pin_memory": True, + "use_gpu": False + } + } + + def get_config(self) -> Dict[str, Any]: + """Get current configuration""" + return self.config + + def get_augmentation_config(self) -> Dict[str, Any]: + """Get augmentation configuration""" + return self.config.get("augmentation", {}) + + def get_processing_config(self) -> Dict[str, Any]: + """Get processing configuration""" + return self.config.get("processing", {}) + + def get_paths_config(self) -> Dict[str, Any]: + """Get paths configuration""" + return self.config.get("paths", {}) + + def get_logging_config(self) -> Dict[str, Any]: + """Get logging configuration""" + return self.config.get("logging", {}) + + def get_performance_config(self) -> Dict[str, Any]: + """Get performance configuration""" + return self.config.get("performance", {}) + + + + def update_config(self, updates: Dict[str, Any]) -> bool: + """ + Update configuration with new values + + Args: + updates: Dictionary with updates to apply + + Returns: + True if updated successfully + """ + try: + self.config = self._merge_configs(self.config, updates) + return True + except Exception as e: + print(f"❌ Error updating config: {e}") + return False + + def _merge_configs(self, base_config: Dict[str, Any], updates: Dict[str, Any]) -> Dict[str, Any]: + """Merge updates with base configuration""" + merged = base_config.copy() + + def deep_merge(base: Dict[str, Any], update: Dict[str, Any]) -> Dict[str, Any]: + result = base.copy() + for key, value in update.items(): + if key in result and isinstance(result[key], dict) and isinstance(value, dict): + result[key] = deep_merge(result[key], value) + else: + result[key] = value + return result + + return deep_merge(merged, updates) + + def save_config(self, output_path: Optional[Union[str, Path]] = None) -> bool: + """ + Save current configuration to file + + Args: + output_path: Path to save config file + + Returns: + True if saved successfully + """ + try: + output_path = Path(output_path) if output_path else self.config_path + output_path.parent.mkdir(parents=True, exist_ok=True) + + with open(output_path, 'w', encoding='utf-8') as f: + yaml.dump(self.config, f, default_flow_style=False, indent=2, allow_unicode=True) + + print(f"✅ Configuration saved to {output_path}") + return True + except Exception as e: + print(f"❌ Error saving config: {e}") + return False + + def print_config_summary(self): + """Print configuration summary""" + print("\n" + "="*50) + print("CONFIGURATION SUMMARY") + print("="*50) + + # Paths + paths = self.get_paths_config() + print(f"Input directory: {paths.get('input_dir', 'Not set')}") + print(f"Output directory: {paths.get('output_dir', 'Not set')}") + + # Processing + processing = self.get_processing_config() + print(f"Target size: {processing.get('target_size', 'Not set')}") + print(f"Number of augmentations: {processing.get('num_augmentations', 'Not set')}") + + # Augmentation + augmentation = self.get_augmentation_config() + enabled_augmentations = [] + for name, config in augmentation.items(): + if isinstance(config, dict) and config.get('enabled', False): + enabled_augmentations.append(name) + + print(f"Enabled augmentations: {', '.join(enabled_augmentations) if enabled_augmentations else 'None'}") + + print("="*50) \ No newline at end of file diff --git a/src/data_augmentation.py b/src/data_augmentation.py new file mode 100644 index 0000000..430ee79 --- /dev/null +++ b/src/data_augmentation.py @@ -0,0 +1,161 @@ +""" +Data augmentation class for image augmentation - ONLY ROTATION +""" +import cv2 +import numpy as np +from pathlib import Path +from typing import List, Tuple, Optional, Dict, Any +import random +import math +from image_processor import ImageProcessor +from utils import load_image, save_image, create_augmented_filename, print_progress + +class DataAugmentation: + """Class for image data augmentation - ONLY ROTATION""" + + def __init__(self, config: Dict[str, Any] = None): + """ + Initialize DataAugmentation + + Args: + config: Configuration dictionary for augmentation parameters + """ + self.config = config or {} + self.image_processor = ImageProcessor() + + def rotate_image(self, image: np.ndarray, angle: float) -> np.ndarray: + """ + Rotate image by given angle + + Args: + image: Input image + angle: Rotation angle in degrees + + Returns: + Rotated image + """ + height, width = image.shape[:2] + center = (width // 2, height // 2) + + # Create rotation matrix + rotation_matrix = cv2.getRotationMatrix2D(center, angle, 1.0) + + # Perform rotation + rotated = cv2.warpAffine(image, rotation_matrix, (width, height), + borderMode=cv2.BORDER_REPLICATE) + + return rotated + + def augment_single_image(self, image: np.ndarray, num_augmentations: int = None) -> List[np.ndarray]: + """ + Apply rotation augmentation to a single image + + Args: + image: Input image + num_augmentations: Number of augmented versions to create + + Returns: + List of augmented images + """ + num_augmentations = num_augmentations or 3 # Default value + augmented_images = [] + + # Get rotation configuration + rotation_config = self.config.get("rotation", {}) + angles = rotation_config.get("angles", [30, 60, 120, 150, 180, 210, 240, 300, 330]) + + for i in range(num_augmentations): + augmented = image.copy() + + # Apply rotation with random angle from the specified list + if rotation_config.get("enabled", False): + angle = random.choice(angles) + augmented = self.rotate_image(augmented, angle) + + augmented_images.append(augmented) + + return augmented_images + + def augment_image_file(self, image_path: Path, output_dir: Path, num_augmentations: int = None) -> List[Path]: + """ + Augment a single image file and save results + + Args: + image_path: Path to input image + output_dir: Output directory for augmented images + num_augmentations: Number of augmented versions to create + + Returns: + List of paths to saved augmented images + """ + # Load image + image = load_image(image_path, self.image_processor.target_size) + if image is None: + return [] + + # Apply augmentations + augmented_images = self.augment_single_image(image, num_augmentations) + + # Save augmented images + saved_paths = [] + for i, aug_image in enumerate(augmented_images): + # Create output filename + output_filename = create_augmented_filename(image_path, i + 1) + output_path = output_dir / output_filename.name + + # Save image + if save_image(aug_image, output_path): + saved_paths.append(output_path) + + return saved_paths + + def batch_augment(self, input_dir: Path, output_dir: Path, num_augmentations: int = None) -> Dict[str, List[Path]]: + """ + Augment all images in a directory + + Args: + input_dir: Input directory containing images + output_dir: Output directory for augmented images + num_augmentations: Number of augmented versions per image + + Returns: + Dictionary mapping original images to their augmented versions + """ + from utils import get_image_files + + image_files = get_image_files(input_dir) + results = {} + + print(f"Found {len(image_files)} images to augment") + + for i, image_path in enumerate(image_files): + print_progress(i + 1, len(image_files), "Augmenting images") + + # Augment single image + augmented_paths = self.augment_image_file(image_path, output_dir, num_augmentations) + + if augmented_paths: + results[str(image_path)] = augmented_paths + + print(f"\nAugmented {len(results)} images successfully") + return results + + def get_augmentation_summary(self, results: Dict[str, List[Path]]) -> Dict[str, Any]: + """ + Get summary of augmentation results + + Args: + results: Results from batch_augment + + Returns: + Summary dictionary + """ + total_original = len(results) + total_augmented = sum(len(paths) for paths in results.values()) + + return { + "total_original_images": total_original, + "total_augmented_images": total_augmented, + "augmentation_ratio": total_augmented / total_original if total_original > 0 else 0, + "successful_augmentations": len([paths for paths in results.values() if paths]) + } \ No newline at end of file diff --git a/src/image_processor.py b/src/image_processor.py new file mode 100644 index 0000000..46c3111 --- /dev/null +++ b/src/image_processor.py @@ -0,0 +1,174 @@ +""" +Image processing class for basic image operations +""" +import cv2 +import numpy as np +from pathlib import Path +from typing import Tuple, Optional, List +from utils import load_image, save_image, validate_image, get_image_files + +class ImageProcessor: + """Class for basic image processing operations""" + + def __init__(self, target_size: Tuple[int, int] = None): + """ + Initialize ImageProcessor + + Args: + target_size: Target size for image resizing (width, height) + """ + self.target_size = target_size or (224, 224) # Default size + + def load_and_preprocess(self, image_path: Path) -> Optional[np.ndarray]: + """ + Load and preprocess image + + Args: + image_path: Path to image file + + Returns: + Preprocessed image as numpy array or None if failed + """ + if not validate_image(image_path): + print(f"Invalid image file: {image_path}") + return None + + image = load_image(image_path, self.target_size) + if image is None: + return None + + # Normalize pixel values + image = image.astype(np.float32) / 255.0 + + return image + + def resize_image(self, image: np.ndarray, target_size: Tuple[int, int]) -> np.ndarray: + """ + Resize image to target size + + Args: + image: Input image as numpy array + target_size: Target size (width, height) + + Returns: + Resized image + """ + return cv2.resize(image, target_size, interpolation=cv2.INTER_AREA) + + def normalize_image(self, image: np.ndarray) -> np.ndarray: + """ + Normalize image pixel values to [0, 1] + + Args: + image: Input image + + Returns: + Normalized image + """ + return image.astype(np.float32) / 255.0 + + def denormalize_image(self, image: np.ndarray) -> np.ndarray: + """ + Denormalize image pixel values to [0, 255] + + Args: + image: Input image (normalized) + + Returns: + Denormalized image + """ + return (image * 255).astype(np.uint8) + + def get_image_info(self, image_path: Path) -> dict: + """ + Get information about image + + Args: + image_path: Path to image file + + Returns: + Dictionary containing image information + """ + try: + image = cv2.imread(str(image_path)) + if image is None: + return {} + + height, width, channels = image.shape + file_size = image_path.stat().st_size / (1024 * 1024) # MB + + return { + "path": str(image_path), + "width": width, + "height": height, + "channels": channels, + "file_size_mb": round(file_size, 2), + "format": image_path.suffix + } + except Exception as e: + print(f"Error getting image info for {image_path}: {e}") + return {} + + def batch_process_images(self, input_dir: Path, output_dir: Path) -> List[Path]: + """ + Process all images in a directory + + Args: + input_dir: Input directory containing images + output_dir: Output directory for processed images + + Returns: + List of processed image paths + """ + image_files = get_image_files(input_dir) + processed_files = [] + + print(f"Found {len(image_files)} images to process") + + for i, image_path in enumerate(image_files): + print_progress(i + 1, len(image_files), "Processing images") + + # Load and preprocess image + image = self.load_and_preprocess(image_path) + if image is None: + continue + + # Create output path + output_path = output_dir / image_path.name + + # Denormalize for saving + image = self.denormalize_image(image) + + # Save processed image + if save_image(image, output_path): + processed_files.append(output_path) + + print(f"\nProcessed {len(processed_files)} images successfully") + return processed_files + + def create_thumbnail(self, image: np.ndarray, size: Tuple[int, int] = (100, 100)) -> np.ndarray: + """ + Create thumbnail of image + + Args: + image: Input image + size: Thumbnail size (width, height) + + Returns: + Thumbnail image + """ + return cv2.resize(image, size, interpolation=cv2.INTER_AREA) + + def convert_to_grayscale(self, image: np.ndarray) -> np.ndarray: + """ + Convert image to grayscale + + Args: + image: Input image (RGB) + + Returns: + Grayscale image + """ + if len(image.shape) == 3: + return cv2.cvtColor(image, cv2.COLOR_RGB2GRAY) + return image \ No newline at end of file diff --git a/src/model/__init__.py b/src/model/__init__.py new file mode 100644 index 0000000..ecb9162 --- /dev/null +++ b/src/model/__init__.py @@ -0,0 +1,8 @@ +""" +Model module for YOLO-based ID card detection and cropping +""" + +from .yolo_detector import YOLODetector +from .id_card_processor import IDCardProcessor + +__all__ = ['YOLODetector', 'IDCardProcessor'] \ No newline at end of file diff --git a/src/model/__pycache__/__init__.cpython-39.pyc b/src/model/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000..231b3dd Binary files /dev/null and b/src/model/__pycache__/__init__.cpython-39.pyc differ diff --git a/src/model/__pycache__/id_card_processor.cpython-39.pyc b/src/model/__pycache__/id_card_processor.cpython-39.pyc new file mode 100644 index 0000000..23bc067 Binary files /dev/null and b/src/model/__pycache__/id_card_processor.cpython-39.pyc differ diff --git a/src/model/__pycache__/yolo_detector.cpython-39.pyc b/src/model/__pycache__/yolo_detector.cpython-39.pyc new file mode 100644 index 0000000..a67f54a Binary files /dev/null and b/src/model/__pycache__/yolo_detector.cpython-39.pyc differ diff --git a/src/model/id_card_processor.py b/src/model/id_card_processor.py new file mode 100644 index 0000000..710f0cf --- /dev/null +++ b/src/model/id_card_processor.py @@ -0,0 +1,343 @@ +""" +ID Card Processor for background removal and preprocessing +""" +import cv2 +import numpy as np +from pathlib import Path +from typing import List, Optional, Dict, Any, Tuple +import logging +from .yolo_detector import YOLODetector + +class IDCardProcessor: + """ + ID Card Processor for background removal and preprocessing + """ + + def __init__(self, yolo_detector: Optional[YOLODetector] = None): + """ + Initialize ID Card Processor + + Args: + yolo_detector: YOLO detector instance + """ + self.yolo_detector = yolo_detector or YOLODetector() + self.logger = logging.getLogger(__name__) + + def remove_background(self, image: np.ndarray, method: str = 'grabcut') -> np.ndarray: + """ + Remove background from image + + Args: + image: Input image + method: Background removal method ('grabcut', 'threshold', 'contour') + + Returns: + Image with background removed + """ + if method == 'grabcut': + return self._grabcut_background_removal(image) + elif method == 'threshold': + return self._threshold_background_removal(image) + elif method == 'contour': + return self._contour_background_removal(image) + else: + self.logger.warning(f"Unknown method: {method}, using grabcut") + return self._grabcut_background_removal(image) + + def _grabcut_background_removal(self, image: np.ndarray) -> np.ndarray: + """ + Remove background using GrabCut algorithm + """ + try: + # Create mask + mask = np.zeros(image.shape[:2], np.uint8) + + # Create temporary arrays + bgd_model = np.zeros((1, 65), np.float64) + fgd_model = np.zeros((1, 65), np.float64) + + # Define rectangle (assuming ID card is in center) + height, width = image.shape[:2] + rect = (width//8, height//8, width*3//4, height*3//4) + + # Apply GrabCut + cv2.grabCut(image, mask, rect, bgd_model, fgd_model, 5, cv2.GC_INIT_WITH_RECT) + + # Create mask + mask2 = np.where((mask == 2) | (mask == 0), 0, 1).astype('uint8') + + # Apply mask + result = image * mask2[:, :, np.newaxis] + + return result + + except Exception as e: + self.logger.error(f"Error in grabcut background removal: {e}") + return image + + def _threshold_background_removal(self, image: np.ndarray) -> np.ndarray: + """ + Remove background using thresholding + """ + try: + # Convert to grayscale + gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) + + # Apply Gaussian blur + blurred = cv2.GaussianBlur(gray, (5, 5), 0) + + # Apply threshold + _, thresh = cv2.threshold(blurred, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) + + # Find contours + contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) + + # Find largest contour (assumed to be the ID card) + if contours: + largest_contour = max(contours, key=cv2.contourArea) + + # Create mask + mask = np.zeros_like(gray) + cv2.fillPoly(mask, [largest_contour], 255) + + # Apply mask + result = cv2.bitwise_and(image, image, mask=mask) + return result + + return image + + except Exception as e: + self.logger.error(f"Error in threshold background removal: {e}") + return image + + def _contour_background_removal(self, image: np.ndarray) -> np.ndarray: + """ + Remove background using contour detection + """ + try: + # Convert to grayscale + gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) + + # Apply edge detection + edges = cv2.Canny(gray, 50, 150) + + # Find contours + contours, _ = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) + + # Find largest contour + if contours: + largest_contour = max(contours, key=cv2.contourArea) + + # Approximate contour to get rectangle + epsilon = 0.02 * cv2.arcLength(largest_contour, True) + approx = cv2.approxPolyDP(largest_contour, epsilon, True) + + # Create mask + mask = np.zeros_like(gray) + cv2.fillPoly(mask, [approx], 255) + + # Apply mask + result = cv2.bitwise_and(image, image, mask=mask) + return result + + return image + + except Exception as e: + self.logger.error(f"Error in contour background removal: {e}") + return image + + def enhance_image(self, image: np.ndarray) -> np.ndarray: + """ + Enhance image quality for better OCR + """ + try: + # Convert to LAB color space + lab = cv2.cvtColor(image, cv2.COLOR_BGR2LAB) + + # Apply CLAHE to L channel + clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8)) + lab[:, :, 0] = clahe.apply(lab[:, :, 0]) + + # Convert back to BGR + enhanced = cv2.cvtColor(lab, cv2.COLOR_LAB2BGR) + + # Apply slight Gaussian blur to reduce noise + enhanced = cv2.GaussianBlur(enhanced, (3, 3), 0) + + return enhanced + + except Exception as e: + self.logger.error(f"Error enhancing image: {e}") + return image + + def normalize_image(self, image: np.ndarray, target_size: Tuple[int, int] = (800, 600)) -> np.ndarray: + """ + Normalize image size and orientation + """ + try: + # Resize image + resized = cv2.resize(image, target_size, interpolation=cv2.INTER_AREA) + + # Convert to grayscale if needed + if len(resized.shape) == 3: + gray = cv2.cvtColor(resized, cv2.COLOR_BGR2GRAY) + else: + gray = resized + + # Apply histogram equalization + equalized = cv2.equalizeHist(gray) + + # Convert back to BGR for consistency + if len(image.shape) == 3: + result = cv2.cvtColor(equalized, cv2.COLOR_GRAY2BGR) + else: + result = equalized + + return result + + except Exception as e: + self.logger.error(f"Error normalizing image: {e}") + return image + + def process_id_card(self, image_path: Path, output_dir: Path, + remove_bg: bool = True, enhance: bool = True, + normalize: bool = True, target_size: Tuple[int, int] = (800, 600)) -> Dict[str, Any]: + """ + Process a single ID card image + + Args: + image_path: Path to input image + output_dir: Output directory + remove_bg: Whether to remove background + enhance: Whether to enhance image + normalize: Whether to normalize image + target_size: Target size for normalization + + Returns: + Processing results + """ + result = { + 'input_path': str(image_path), + 'output_paths': [], + 'success': False + } + + try: + # Load image + image = cv2.imread(str(image_path)) + if image is None: + self.logger.error(f"Could not load image: {image_path}") + return result + + # Create output filename + stem = image_path.stem + processed_path = output_dir / f"{stem}_processed.jpg" + + # Apply processing steps + processed_image = image.copy() + + if remove_bg: + self.logger.info(f"Removing background from {image_path.name}") + processed_image = self.remove_background(processed_image) + + if enhance: + self.logger.info(f"Enhancing {image_path.name}") + processed_image = self.enhance_image(processed_image) + + if normalize: + self.logger.info(f"Normalizing {image_path.name}") + processed_image = self.normalize_image(processed_image, target_size) + + # Save processed image + processed_path.parent.mkdir(parents=True, exist_ok=True) + cv2.imwrite(str(processed_path), processed_image) + result['output_paths'].append(str(processed_path)) + + result['success'] = True + self.logger.info(f"Processed {image_path.name}") + + except Exception as e: + self.logger.error(f"Error processing {image_path}: {e}") + + return result + + def batch_process_id_cards(self, input_dir: Path, output_dir: Path, + detect_first: bool = True, **kwargs) -> Dict[str, Any]: + """ + Process all ID card images in a directory + + Args: + input_dir: Input directory + output_dir: Output directory + detect_first: Whether to detect ID cards first using YOLO + **kwargs: Additional arguments for processing + + Returns: + Batch processing results + """ + # Create output directory + output_dir.mkdir(parents=True, exist_ok=True) + + if detect_first: + # First detect and crop ID cards + self.logger.info("Detecting and cropping ID cards...") + detection_results = self.yolo_detector.batch_process(input_dir, output_dir / "cropped") + + # Process cropped images + cropped_dir = output_dir / "cropped" + if cropped_dir.exists(): + self.logger.info("Processing cropped ID cards...") + return self._process_cropped_images(cropped_dir, output_dir / "processed", **kwargs) + else: + self.logger.warning("No cropped images found, processing original images") + return self._process_cropped_images(input_dir, output_dir / "processed", **kwargs) + else: + # Process original images directly + return self._process_cropped_images(input_dir, output_dir / "processed", **kwargs) + + def _process_cropped_images(self, input_dir: Path, output_dir: Path, **kwargs) -> Dict[str, Any]: + """ + Process cropped ID card images recursively + """ + # Get all image files recursively from input directory and subdirectories + image_extensions = {'.jpg', '.jpeg', '.png', '.bmp', '.tiff'} + image_files = [] + + # Recursively find all image files + for file_path in input_dir.rglob('*'): + if file_path.is_file() and file_path.suffix.lower() in image_extensions: + image_files.append(file_path) + + if not image_files: + self.logger.error(f"No images found in {input_dir} and subdirectories") + return {'success': False, 'error': 'No images found'} + + self.logger.info(f"Processing {len(image_files)} images from {input_dir} and subdirectories") + + results = { + 'total_images': len(image_files), + 'processed_images': 0, + 'results': [] + } + + # Process each image + for i, image_path in enumerate(image_files): + self.logger.info(f"Processing {i+1}/{len(image_files)}: {image_path.name}") + + # Create subdirectory structure in output to match input structure + relative_path = image_path.relative_to(input_dir) + output_subdir = output_dir / relative_path.parent + output_subdir.mkdir(parents=True, exist_ok=True) + + result = self.process_id_card(image_path, output_subdir, **kwargs) + results['results'].append(result) + + if result['success']: + results['processed_images'] += 1 + + # Summary + self.logger.info(f"ID card processing completed:") + self.logger.info(f" - Total images: {results['total_images']}") + self.logger.info(f" - Processed: {results['processed_images']}") + + return results \ No newline at end of file diff --git a/src/model/yolo_detector.py b/src/model/yolo_detector.py new file mode 100644 index 0000000..b1974bf --- /dev/null +++ b/src/model/yolo_detector.py @@ -0,0 +1,266 @@ +""" +YOLO Detector for ID Card Detection and Cropping +""" +import cv2 +import numpy as np +from pathlib import Path +from typing import List, Tuple, Optional, Dict, Any +import logging +from ultralytics import YOLO +import torch + +class YOLODetector: + """ + YOLO-based detector for ID card detection and cropping + """ + + def __init__(self, model_path: Optional[str] = None, confidence: float = 0.5): + """ + Initialize YOLO detector + + Args: + model_path: Path to YOLO model file (.pt) + confidence: Confidence threshold for detection + """ + self.confidence = confidence + self.logger = logging.getLogger(__name__) + + # Initialize model + if model_path and Path(model_path).exists(): + self.model = YOLO(model_path) + self.logger.info(f"Loaded custom YOLO model from {model_path}") + else: + # Use pre-trained YOLO model for general object detection + self.model = YOLO('yolov8n.pt') + self.logger.info("Using pre-trained YOLOv8n model") + + # Set device + self.device = 'cuda' if torch.cuda.is_available() else 'cpu' + self.logger.info(f"Using device: {self.device}") + + def detect_id_cards(self, image_path: Path) -> List[Dict[str, Any]]: + """ + Detect ID cards in an image + + Args: + image_path: Path to image file + + Returns: + List of detection results with bounding boxes + """ + try: + # Load image + image = cv2.imread(str(image_path)) + if image is None: + self.logger.error(f"Could not load image: {image_path}") + return [] + + # Run detection + results = self.model(image, conf=self.confidence) + + detections = [] + for result in results: + boxes = result.boxes + if boxes is not None: + for box in boxes: + # Get coordinates + x1, y1, x2, y2 = box.xyxy[0].cpu().numpy() + confidence = float(box.conf[0]) + class_id = int(box.cls[0]) + class_name = self.model.names[class_id] + + detection = { + 'bbox': [int(x1), int(y1), int(x2), int(y2)], + 'confidence': confidence, + 'class_id': class_id, + 'class_name': class_name, + 'area': (x2 - x1) * (y2 - y1) + } + detections.append(detection) + + # Sort by confidence and area (prefer larger, more confident detections) + detections.sort(key=lambda x: (x['confidence'], x['area']), reverse=True) + + self.logger.info(f"Found {len(detections)} detections in {image_path.name}") + return detections + + except Exception as e: + self.logger.error(f"Error detecting ID cards in {image_path}: {e}") + return [] + + def crop_id_card(self, image_path: Path, bbox: List[int], + output_path: Optional[Path] = None, + padding: int = 10) -> Optional[np.ndarray]: + """ + Crop ID card from image using bounding box + + Args: + image_path: Path to input image + bbox: Bounding box [x1, y1, x2, y2] + output_path: Path to save cropped image + padding: Padding around the bounding box + + Returns: + Cropped image as numpy array + """ + try: + # Load image + image = cv2.imread(str(image_path)) + if image is None: + self.logger.error(f"Could not load image: {image_path}") + return None + + height, width = image.shape[:2] + x1, y1, x2, y2 = bbox + + # Add padding + x1 = max(0, x1 - padding) + y1 = max(0, y1 - padding) + x2 = min(width, x2 + padding) + y2 = min(height, y2 + padding) + + # Crop image + cropped = image[y1:y2, x1:x2] + + # Save if output path provided + if output_path: + output_path.parent.mkdir(parents=True, exist_ok=True) + cv2.imwrite(str(output_path), cropped) + self.logger.info(f"Saved cropped image to {output_path}") + + return cropped + + except Exception as e: + self.logger.error(f"Error cropping ID card from {image_path}: {e}") + return None + + def process_single_image(self, image_path: Path, output_dir: Path, + save_original: bool = False) -> Dict[str, Any]: + """ + Process a single image: detect and crop ID cards + + Args: + image_path: Path to input image + output_dir: Output directory for cropped images + save_original: Whether to save original image with bounding boxes + + Returns: + Processing results + """ + result = { + 'input_path': str(image_path), + 'detections': [], + 'cropped_paths': [], + 'success': False + } + + try: + # Detect ID cards + detections = self.detect_id_cards(image_path) + + if not detections: + self.logger.warning(f"No ID cards detected in {image_path.name}") + return result + + # Process each detection + for i, detection in enumerate(detections): + bbox = detection['bbox'] + + # Create output filename + stem = image_path.stem + suffix = f"_card_{i+1}.jpg" + output_path = output_dir / f"{stem}{suffix}" + + # Crop ID card + cropped = self.crop_id_card(image_path, bbox, output_path) + + if cropped is not None: + result['detections'].append(detection) + result['cropped_paths'].append(str(output_path)) + + # Save original with bounding boxes if requested + if save_original and detections: + image = cv2.imread(str(image_path)) + for detection in detections: + bbox = detection['bbox'] + cv2.rectangle(image, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (0, 255, 0), 2) + cv2.putText(image, f"{detection['confidence']:.2f}", + (bbox[0], bbox[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, + 0.5, (0, 255, 0), 2) + + annotated_path = output_dir / f"{image_path.stem}_annotated.jpg" + cv2.imwrite(str(annotated_path), image) + result['annotated_path'] = str(annotated_path) + + result['success'] = True + self.logger.info(f"Processed {image_path.name}: {len(result['cropped_paths'])} cards cropped") + + except Exception as e: + self.logger.error(f"Error processing {image_path}: {e}") + + return result + + def batch_process(self, input_dir: Path, output_dir: Path, + save_annotated: bool = False) -> Dict[str, Any]: + """ + Process all images in a directory and subdirectories + + Args: + input_dir: Input directory containing images + output_dir: Output directory for cropped images + save_annotated: Whether to save annotated images + + Returns: + Batch processing results + """ + # Create output directory + output_dir.mkdir(parents=True, exist_ok=True) + + # Get all image files recursively from input directory and subdirectories + image_extensions = {'.jpg', '.jpeg', '.png', '.bmp', '.tiff'} + image_files = [] + + # Recursively find all image files + for file_path in input_dir.rglob('*'): + if file_path.is_file() and file_path.suffix.lower() in image_extensions: + image_files.append(file_path) + + if not image_files: + self.logger.error(f"No images found in {input_dir} and subdirectories") + return {'success': False, 'error': 'No images found'} + + self.logger.info(f"Processing {len(image_files)} images from {input_dir} and subdirectories") + + results = { + 'total_images': len(image_files), + 'processed_images': 0, + 'total_detections': 0, + 'total_cropped': 0, + 'results': [] + } + + # Process each image + for i, image_path in enumerate(image_files): + self.logger.info(f"Processing {i+1}/{len(image_files)}: {image_path.name}") + + # Create subdirectory structure in output to match input structure + relative_path = image_path.relative_to(input_dir) + output_subdir = output_dir / relative_path.parent + output_subdir.mkdir(parents=True, exist_ok=True) + + result = self.process_single_image(image_path, output_subdir, save_annotated) + results['results'].append(result) + + if result['success']: + results['processed_images'] += 1 + results['total_detections'] += len(result['detections']) + results['total_cropped'] += len(result['cropped_paths']) + + # Summary + self.logger.info(f"Batch processing completed:") + self.logger.info(f" - Total images: {results['total_images']}") + self.logger.info(f" - Processed: {results['processed_images']}") + self.logger.info(f" - Total detections: {results['total_detections']}") + self.logger.info(f" - Total cropped: {results['total_cropped']}") + + return results \ No newline at end of file diff --git a/src/utils.py b/src/utils.py new file mode 100644 index 0000000..fa7f80a --- /dev/null +++ b/src/utils.py @@ -0,0 +1,98 @@ +""" +Utility functions for data augmentation +""" +import os +import logging +from pathlib import Path +from typing import List, Tuple, Optional +import cv2 +import numpy as np +from PIL import Image + +def setup_logging(log_level: str = "INFO") -> logging.Logger: + """Setup logging configuration""" + logging.basicConfig( + level=getattr(logging, log_level.upper()), + format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', + handlers=[ + logging.FileHandler('data_augmentation.log'), + logging.StreamHandler() + ] + ) + return logging.getLogger(__name__) + +def get_image_files(directory: Path) -> List[Path]: + """Get all image files from directory""" + SUPPORTED_FORMATS = ['.jpg', '.jpeg', '.png', '.bmp', '.tiff'] + + image_files = [] + if directory.exists(): + for ext in SUPPORTED_FORMATS: + image_files.extend(directory.glob(f"*{ext}")) + image_files.extend(directory.glob(f"*{ext.upper()}")) + return sorted(image_files) + +def validate_image(image_path: Path) -> bool: + """Validate if file is a valid image""" + try: + with Image.open(image_path) as img: + img.verify() + return True + except Exception: + return False + +def load_image(image_path: Path, target_size: Tuple[int, int] = None) -> Optional[np.ndarray]: + """Load and resize image""" + try: + # Load image using OpenCV + image = cv2.imread(str(image_path)) + if image is None: + return None + + # Convert BGR to RGB + image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) + + # Resize if target_size is provided + if target_size: + image = cv2.resize(image, target_size, interpolation=cv2.INTER_AREA) + + return image + except Exception as e: + print(f"Error loading image {image_path}: {e}") + return None + +def save_image(image: np.ndarray, output_path: Path, quality: int = 95) -> bool: + """Save image to file""" + try: + # Convert RGB to BGR for OpenCV + image_bgr = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) + + # Create output directory if it doesn't exist + output_path.parent.mkdir(parents=True, exist_ok=True) + + # Save image + cv2.imwrite(str(output_path), image_bgr, [cv2.IMWRITE_JPEG_QUALITY, quality]) + return True + except Exception as e: + print(f"Error saving image {output_path}: {e}") + return False + +def create_augmented_filename(original_path: Path, index: int, suffix: str = "aug") -> Path: + """Create filename for augmented image""" + stem = original_path.stem + suffix = f"_{suffix}_{index:02d}" + return original_path.parent / f"{stem}{suffix}{original_path.suffix}" + +def get_file_size_mb(file_path: Path) -> float: + """Get file size in MB""" + return file_path.stat().st_size / (1024 * 1024) + +def print_progress(current: int, total: int, prefix: str = "Progress"): + """Print progress bar""" + bar_length = 50 + filled_length = int(round(bar_length * current / float(total))) + percents = round(100.0 * current / float(total), 1) + bar = '=' * filled_length + '-' * (bar_length - filled_length) + print(f'\r{prefix}: [{bar}] {percents}% ({current}/{total})', end='') + if current == total: + print() \ No newline at end of file