diff --git a/.gitignore b/.gitignore index 4af1b00..35cc9d2 100644 --- a/.gitignore +++ b/.gitignore @@ -16,4 +16,11 @@ *.pt *.ipynb *.pyc -*.log \ No newline at end of file +*.log + +!docs/ +!docs/**/*.png +!docs/**/*.jpg +!docs/**/*.jpeg +!docs/**/*.gif +!docs/**/*.svg \ No newline at end of file diff --git a/README.md b/README.md index 6270081..36a4baf 100644 --- a/README.md +++ b/README.md @@ -1,132 +1,148 @@ -# ID Cards Data Augmentation Tool +# ID Card Data Augmentation Pipeline -A comprehensive data augmentation tool specifically designed for ID card images, implementing 7 different augmentation techniques to simulate real-world scenarios. +A comprehensive data augmentation pipeline for ID card images with YOLO-based detection and advanced augmentation techniques. -## 🎯 Overview +![Pipeline Overview](docs/images/yolov8_pipeline.png) -This tool provides data augmentation capabilities for ID card images, implementing various transformation techniques that mimic real-world conditions such as worn-out cards, partial occlusion, different lighting conditions, and more. +## 🚀 Features -## ✨ Features +### **YOLO-based ID Card Detection** +- Automatic detection and cropping of ID cards from large images +- Configurable confidence and IoU thresholds +- Multiple cropping modes (bbox, square, aspect_ratio) +- Padding and target size customization -### 7 Augmentation Techniques +### **Advanced Data Augmentation** +- **Geometric Transformations**: Rotation with multiple angles +- **Random Cropping**: Simulates partially visible cards +- **Noise Addition**: Simulates worn-out cards +- **Partial Blockage**: Simulates occluded card details +- **Blurring**: Simulates blurred but readable images +- **Brightness/Contrast**: Mimics different lighting conditions +- **Grayscale Conversion**: Final preprocessing step for all images -1. **Rotation** - Simulates cards at different angles -2. **Random Cropping** - Simulates partially visible cards -3. **Random Noise** - Simulates worn-out cards -4. **Horizontal Blockage** - Simulates occluded card details -5. **Grayscale Transformation** - Simulates Xerox/scan copies -6. **Blurring** - Simulates blurred but readable cards -7. **Brightness & Contrast** - Simulates different lighting conditions +### **Flexible Configuration** +- YAML-based configuration system +- Command-line argument overrides +- Environment-specific settings +- Comprehensive logging -### Key Features +## 📋 Requirements -- **Separate Methods**: Each augmentation technique is applied independently -- **Quality Preservation**: Maintains image quality with white background preservation -- **OpenCV Integration**: Uses OpenCV functions for reliable image processing -- **Configurable**: Easy configuration through YAML files -- **Progress Tracking**: Real-time progress monitoring -- **Batch Processing**: Process multiple images efficiently +```bash +# Python 3.8+ +conda create -n gpu python=3.8 +conda activate gpu -## 🚀 Installation +# Install dependencies +pip install -r requirements.txt +``` -### Prerequisites +### Dependencies +- `opencv-python>=4.5.0` +- `numpy>=1.21.0` +- `Pillow>=8.3.0` +- `PyYAML>=5.4.0` +- `ultralytics>=8.0.0` (for YOLO models) -- Python 3.7+ -- OpenCV -- NumPy -- PyYAML -- PIL (Pillow) +## 🛠️ Installation -### Setup - -1. **Clone the repository**: +1. **Clone the repository** ```bash git clone cd IDcardsGenerator ``` -2. **Install dependencies**: +2. **Install dependencies** ```bash -pip install opencv-python numpy pyyaml pillow +pip install -r requirements.txt ``` -3. **Activate conda environment** (if using GPU): +3. **Prepare YOLO model** (optional) ```bash -conda activate gpu +# Place your trained YOLO model at: +data/weights/id_cards_yolov8n.pt ``` -## 📁 Project Structure +## 📖 Usage -``` -IDcardsGenerator/ -├── config/ -│ └── config.yaml # Main configuration file -├── data/ -│ └── IDcards/ -│ └── processed/ # Input images directory -├── src/ -│ ├── data_augmentation.py # Core augmentation logic -│ ├── config_manager.py # Configuration management -│ ├── image_processor.py # Image processing utilities -│ └── utils.py # Utility functions -├── logs/ # Log files -├── out/ # Output directory -└── main.py # Main script +### **Basic Usage** + +```bash +# Run with default configuration +python main.py + +# Run with ID card detection enabled +python main.py --enable-id-detection + +# Run with custom input/output directories +python main.py --input-dir "path/to/input" --output-dir "path/to/output" ``` -## ⚙️ Configuration +### **Configuration Options** -### Main Configuration (`config/config.yaml`) +#### **ID Card Detection** +```bash +# Enable detection with custom model +python main.py --enable-id-detection --model-path "path/to/model.pt" + +# Adjust detection parameters +python main.py --enable-id-detection --confidence 0.3 --crop-mode square + +# Set target size for cropped cards +python main.py --enable-id-detection --crop-target-size "640,640" +``` + +#### **Data Augmentation** +```bash +# Customize augmentation parameters +python main.py --num-augmentations 5 --target-size "512,512" + +# Preview augmentation results +python main.py --preview +``` + +### **Configuration File** + +Edit `config/config.yaml` for persistent settings: ```yaml -# Data augmentation parameters +# ID Card Detection +id_card_detection: + enabled: false # Enable/disable YOLO detection + model_path: "data/weights/id_cards_yolov8n.pt" + confidence_threshold: 0.25 + iou_threshold: 0.45 + padding: 10 + crop_mode: "bbox" + target_size: null + +# Data Augmentation augmentation: - # Rotation rotation: enabled: true angles: [30, 60, 120, 150, 180, 210, 240, 300, 330] - probability: 1.0 - - # Random cropping random_cropping: enabled: true ratio_range: [0.7, 1.0] - probability: 1.0 - - # Random noise random_noise: enabled: true mean_range: [0.0, 0.7] variance_range: [0.0, 0.1] - probability: 1.0 - - # Partial blockage partial_blockage: enabled: true - num_occlusions_range: [1, 100] coverage_range: [0.0, 0.25] - variance_range: [0.0, 0.1] - probability: 1.0 - - # Grayscale transformation - grayscale: - enabled: true - probability: 1.0 - - # Blurring blurring: enabled: true kernel_ratio_range: [0.0, 0.0084] - probability: 1.0 - - # Brightness and contrast brightness_contrast: enabled: true alpha_range: [0.4, 3.0] beta_range: [1, 100] - probability: 1.0 + grayscale: + enabled: true # Applied as final step -# Processing configuration +# Processing processing: target_size: [640, 640] num_augmentations: 3 @@ -134,156 +150,139 @@ processing: quality: 95 ``` -## 🎮 Usage +## 🔄 Workflow -### Basic Usage +### **Two-Step Processing Pipeline** +#### **Step 1: ID Card Detection (Optional)** +When `id_card_detection.enabled: true`: +1. **Input**: Large images containing multiple ID cards +2. **YOLO Detection**: Locate and detect ID cards +3. **Cropping**: Extract individual ID cards with padding +4. **Output**: Cropped ID cards saved to `out/processed/` + +#### **Step 2: Data Augmentation** +1. **Input**: Original images OR cropped ID cards +2. **Augmentation**: Apply 6 augmentation methods: + - Rotation (9 different angles) + - Random cropping (70-100% ratio) + - Random noise (simulate wear) + - Partial blockage (simulate occlusion) + - Blurring (simulate motion blur) + - Brightness/Contrast adjustment +3. **Grayscale**: Convert all images to grayscale (final step) +4. **Output**: Augmented images in main output directory + +### **Direct Augmentation Mode** +When `id_card_detection.enabled: false`: +- Skips YOLO detection +- Applies augmentation directly to input images +- All images are converted to grayscale + +## 📊 Output Structure + +``` +output_directory/ +├── processed/ # Cropped ID cards (if detection enabled) +│ ├── id_card_001.jpg +│ ├── id_card_002.jpg +│ └── processing_summary.json +├── im1__rotation_01.png # Augmented images +├── im1__cropping_01.png +├── im1__noise_01.png +├── im1__blockage_01.png +├── im1__blurring_01.png +├── im1__brightness_contrast_01.png +└── augmentation_summary.json +``` + +## 🎯 Use Cases + +### **Training Data Generation** ```bash -python main.py --input-dir data/IDcards/processed --output-dir out +# Generate diverse training data +python main.py --enable-id-detection --num-augmentations 10 ``` -### Command Line Options - +### **Quality Control** ```bash -python main.py [OPTIONS] - -Options: - --config CONFIG Path to configuration file (default: config/config.yaml) - --input-dir INPUT_DIR Input directory containing images - --output-dir OUTPUT_DIR Output directory for augmented images - --num-augmentations N Number of augmented versions per image (default: 3) - --target-size SIZE Target size for images (width x height) - --preview Preview augmentation on first image only - --info Show information about images in input directory - --list-presets List available presets and exit - --log-level LEVEL Logging level (DEBUG, INFO, WARNING, ERROR) +# Preview results before processing +python main.py --preview ``` -### Examples - -1. **Preview augmentation**: +### **Batch Processing** ```bash -python main.py --preview --input-dir data/IDcards/processed --output-dir test_output +# Process large datasets +python main.py --input-dir "large_dataset/" --output-dir "augmented_dataset/" ``` -2. **Show image information**: -```bash -python main.py --info --input-dir data/IDcards/processed +## ⚙️ Advanced Configuration + +### **Custom Augmentation Parameters** + +```yaml +augmentation: + rotation: + angles: [45, 90, 135, 180, 225, 270, 315] # Custom angles + random_cropping: + ratio_range: [0.8, 0.95] # Tighter cropping + random_noise: + mean_range: [0.1, 0.5] # More noise + variance_range: [0.05, 0.15] ``` -3. **Custom number of augmentations**: -```bash -python main.py --input-dir data/IDcards/processed --output-dir out --num-augmentations 5 +### **Performance Optimization** + +```yaml +performance: + num_workers: 4 + prefetch_factor: 2 + pin_memory: true + use_gpu: false ``` -4. **Custom target size**: -```bash -python main.py --input-dir data/IDcards/processed --output-dir out --target-size 512x512 -``` - -## 📊 Output - -### File Naming Convention - -The tool creates separate files for each augmentation method: - -``` -im1_rotation_01.png # Rotation method -im1_cropping_01.png # Random cropping method -im1_noise_01.png # Random noise method -im1_blockage_01.png # Partial blockage method -im1_grayscale_01.png # Grayscale method -im1_blurring_01.png # Blurring method -im1_brightness_contrast_01.png # Brightness/contrast method -``` - -### Output Summary - -After processing, you'll see a summary like: - -``` -================================================== -AUGMENTATION SUMMARY -================================================== -Original images: 106 -Augmented images: 2226 -Augmentation ratio: 21.00 -Successful augmentations: 106 -Output directory: out -================================================== -``` - -## 🔧 Augmentation Techniques Details - -### 1. Rotation -- **Purpose**: Simulates cards at different angles -- **Angles**: 30°, 60°, 120°, 150°, 180°, 210°, 240°, 300°, 330° -- **Method**: OpenCV rotation with white background preservation - -### 2. Random Cropping -- **Purpose**: Simulates partially visible ID cards -- **Ratio Range**: 0.7 to 1.0 (70% to 100% of original size) -- **Method**: Random crop with white background preservation - -### 3. Random Noise -- **Purpose**: Simulates worn-out cards -- **Mean Range**: 0.0 to 0.7 -- **Variance Range**: 0.0 to 0.1 -- **Method**: Gaussian noise addition - -### 4. Horizontal Blockage -- **Purpose**: Simulates occluded card details -- **Lines**: 1 to 100 horizontal lines -- **Coverage**: 0% to 25% of image area -- **Colors**: Multiple colors to simulate various objects - -### 5. Grayscale Transformation -- **Purpose**: Simulates Xerox/scan copies -- **Method**: OpenCV `cv2.cvtColor()` function -- **Output**: 3-channel grayscale image - -### 6. Blurring -- **Purpose**: Simulates blurred but readable cards -- **Kernel Ratio**: 0.0 to 0.0084 -- **Method**: OpenCV `cv2.filter2D()` with Gaussian kernel - -### 7. Brightness & Contrast -- **Purpose**: Simulates different lighting conditions -- **Alpha Range**: 0.4 to 3.0 (contrast) -- **Beta Range**: 1 to 100 (brightness) -- **Method**: OpenCV `cv2.convertScaleAbs()` - -## 🛠️ Development - -### Adding New Augmentation Methods - -1. Add the method to `src/data_augmentation.py` -2. Update configuration in `config/config.yaml` -3. Update default config in `src/config_manager.py` -4. Test with preview mode - -### Code Structure - -- **`main.py`**: Entry point and command-line interface -- **`src/data_augmentation.py`**: Core augmentation logic -- **`src/config_manager.py`**: Configuration management -- **`src/image_processor.py`**: Image processing utilities -- **`src/utils.py`**: Utility functions - ## 📝 Logging -The tool provides comprehensive logging: +The system provides comprehensive logging: +- **File**: `logs/data_augmentation.log` +- **Console**: Real-time progress updates +- **Summary**: JSON files with processing statistics -- **File logging**: `logs/data_augmentation.log` -- **Console logging**: Real-time progress updates -- **Log levels**: DEBUG, INFO, WARNING, ERROR +### **Log Levels** +- `INFO`: General processing information +- `WARNING`: Non-critical issues (e.g., no cards detected) +- `ERROR`: Critical errors + +## 🔧 Troubleshooting + +### **Common Issues** + +1. **No images detected** + - Check input directory path + - Verify image formats (jpg, png, bmp, tiff) + - Ensure images are not corrupted + +2. **YOLO model not found** + - Place model file at `data/weights/id_cards_yolov8n.pt` + - Or specify custom path with `--model-path` + +3. **Memory issues** + - Reduce `num_augmentations` + - Use smaller `target_size` + - Enable GPU if available + +### **Performance Tips** + +- **GPU Acceleration**: Set `use_gpu: true` in config +- **Batch Processing**: Use multiple workers for large datasets +- **Memory Management**: Process in smaller batches ## 🤝 Contributing 1. Fork the repository 2. Create a feature branch 3. Make your changes -4. Test thoroughly +4. Add tests if applicable 5. Submit a pull request ## 📄 License @@ -292,18 +291,10 @@ This project is licensed under the MIT License - see the LICENSE file for detail ## 🙏 Acknowledgments -- OpenCV for image processing capabilities -- NumPy for numerical operations -- PyYAML for configuration management - -## 📞 Support - -For issues and questions: -1. Check the logs in `logs/data_augmentation.log` -2. Review the configuration in `config/config.yaml` -3. Test with preview mode first -4. Create an issue with detailed information +- **YOLOv8**: Ultralytics for the detection framework +- **OpenCV**: Computer vision operations +- **NumPy**: Numerical computations --- -**Note**: This tool is specifically designed for ID card augmentation and may need adjustments for other image types. \ No newline at end of file +**For questions and support, please open an issue on GitHub.** \ No newline at end of file diff --git a/config/config.yaml b/config/config.yaml index 3a8ea44..e041ff2 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -7,6 +7,17 @@ paths: output_dir: "out1" log_file: "logs/data_augmentation.log" +# ID Card Detection configuration +id_card_detection: + enabled: false # Bật/tắt tính năng detect và crop ID cards + model_path: "data/weights/id_cards_yolov8n.pt" # Đường dẫn đến YOLO model + confidence_threshold: 0.25 # Confidence threshold cho detection + iou_threshold: 0.45 # IoU threshold cho NMS + padding: 10 # Padding thêm xung quanh bbox + crop_mode: "bbox" # Mode cắt: bbox, square, aspect_ratio + target_size: null # Kích thước target (width, height) hoặc null + save_original_crops: true # Có lưu ảnh gốc đã crop không + # Data augmentation parameters - ROTATION and RANDOM CROPPING augmentation: # Geometric transformations @@ -36,11 +47,6 @@ augmentation: variance_range: [0.0, 0.1] # Line thickness variance (min, max) probability: 1.0 # Always apply blockage - # Grayscale transformation to mimic Xerox/scan copies - grayscale: - enabled: true - probability: 1.0 # Always apply grayscale - # Blurring to simulate blurred card images that are still readable blurring: enabled: true @@ -53,6 +59,11 @@ augmentation: alpha_range: [0.4, 3.0] # Contrast range (min, max) beta_range: [1, 100] # Brightness range (min, max) probability: 1.0 # Always apply brightness/contrast adjustment + + # Grayscale transformation as final step (applied to all augmented images) + grayscale: + enabled: true + probability: 1.0 # Always apply grayscale as final step # Processing configuration processing: diff --git a/docs/images/yolov8_pipeline.png b/docs/images/yolov8_pipeline.png new file mode 100644 index 0000000..c2ddf63 Binary files /dev/null and b/docs/images/yolov8_pipeline.png differ diff --git a/main.py b/main.py index c544951..f996d34 100644 --- a/main.py +++ b/main.py @@ -12,6 +12,7 @@ sys.path.append(str(Path(__file__).parent / "src")) from src.config_manager import ConfigManager from src.data_augmentation import DataAugmentation from src.image_processor import ImageProcessor +from src.id_card_detector import IDCardDetector from src.utils import setup_logging, get_image_files, print_progress def parse_arguments(): @@ -83,6 +84,38 @@ def parse_arguments(): help="Logging level" ) + # ID Card Detection arguments + parser.add_argument( + "--enable-id-detection", + action="store_true", + help="Enable ID card detection and cropping before augmentation" + ) + + parser.add_argument( + "--model-path", + type=str, + help="Path to YOLO model for ID card detection (overrides config)" + ) + + parser.add_argument( + "--confidence", + type=float, + help="Confidence threshold for ID card detection (overrides config)" + ) + + parser.add_argument( + "--crop-mode", + type=str, + choices=["bbox", "square", "aspect_ratio"], + help="Crop mode for ID cards (overrides config)" + ) + + parser.add_argument( + "--crop-target-size", + type=str, + help="Target size for cropped ID cards (widthxheight) (overrides config)" + ) + return parser.parse_args() def parse_range(range_str: str) -> tuple: @@ -134,7 +167,8 @@ def show_image_info(input_dir: Path): print(f"\nTotal file size: {total_size:.2f} MB") print(f"Average file size: {total_size/len(image_files):.2f} MB") -def preview_augmentation(input_dir: Path, output_dir: Path, config: Dict[str, Any]): +def preview_augmentation(input_dir: Path, output_dir: Path, config: Dict[str, Any], + id_detection_config: Dict[str, Any] = None): """Preview augmentation on first image""" image_files = get_image_files(input_dir) @@ -147,7 +181,40 @@ def preview_augmentation(input_dir: Path, output_dir: Path, config: Dict[str, An # Create augmentation instance augmenter = DataAugmentation(config) - # Augment first image + # Process with ID detection if enabled + if id_detection_config and id_detection_config.get('enabled', False): + print("🔍 ID Card Detection enabled - processing with YOLO model...") + + # Initialize ID card detector + detector = IDCardDetector( + model_path=id_detection_config.get('model_path'), + config=config + ) + + if not detector.model: + print("❌ Failed to load YOLO model, proceeding with normal augmentation") + else: + # Process single image with ID detection + result = detector.process_single_image( + image_path=image_files[0], + output_dir=output_dir, + apply_augmentation=True, + save_original=id_detection_config.get('save_original_crops', True), + confidence=id_detection_config.get('confidence_threshold', 0.25), + iou_threshold=id_detection_config.get('iou_threshold', 0.45), + crop_mode=id_detection_config.get('crop_mode', 'bbox'), + target_size=id_detection_config.get('target_size'), + padding=id_detection_config.get('padding', 10) + ) + + if result and result.get('detections'): + print(f"✅ Detected {len(result['detections'])} ID cards") + print(f"💾 Saved {len(result['processed_cards'])} processed cards") + return + else: + print("⚠️ No ID cards detected, proceeding with normal augmentation") + + # Normal augmentation (fallback) augmented_paths = augmenter.augment_image_file( image_files[0], output_dir, @@ -225,9 +292,29 @@ def main(): show_image_info(input_dir) return + # Get ID detection config + id_detection_config = config.get('id_card_detection', {}) + + # Override ID detection config with command line arguments + if args.enable_id_detection: + id_detection_config['enabled'] = True + + if args.model_path: + id_detection_config['model_path'] = args.model_path + + if args.confidence: + id_detection_config['confidence_threshold'] = args.confidence + + if args.crop_mode: + id_detection_config['crop_mode'] = args.crop_mode + + if args.crop_target_size: + target_size = parse_size(args.crop_target_size) + id_detection_config['target_size'] = list(target_size) + # Preview augmentation if requested if args.preview: - preview_augmentation(input_dir, output_dir, augmentation_config) + preview_augmentation(input_dir, output_dir, augmentation_config, id_detection_config) return # Get image files @@ -242,35 +329,56 @@ def main(): logger.info(f"Number of augmentations per image: {processing_config.get('num_augmentations', 3)}") logger.info(f"Target size: {processing_config.get('target_size', [224, 224])}") - # Create augmentation instance with new config - augmenter = DataAugmentation(augmentation_config) + # Process with ID detection if enabled + if id_detection_config.get('enabled', False): + logger.info("ID Card Detection enabled - processing with YOLO model...") + + # Initialize ID card detector + detector = IDCardDetector( + model_path=id_detection_config.get('model_path'), + config=config + ) + + if not detector.model: + logger.error("Failed to load YOLO model") + sys.exit(1) + + logger.info(f"YOLO model loaded: {detector.model_path}") + logger.info(f"Confidence threshold: {id_detection_config.get('confidence_threshold', 0.25)}") + logger.info(f"Crop mode: {id_detection_config.get('crop_mode', 'bbox')}") + + # Bước 1: Detect và crop ID cards vào thư mục processed + processed_dir = output_dir / "processed" + processed_dir.mkdir(parents=True, exist_ok=True) + logger.info("Step 1: Detect and crop ID cards...") + detector.batch_process( + input_dir=input_dir, + output_dir=processed_dir, + confidence=id_detection_config.get('confidence_threshold', 0.25), + iou_threshold=id_detection_config.get('iou_threshold', 0.45), + crop_mode=id_detection_config.get('crop_mode', 'bbox'), + target_size=id_detection_config.get('target_size'), + padding=id_detection_config.get('padding', 10) + ) + # Bước 2: Augment các card đã crop + logger.info("Step 2: Augment cropped ID cards...") + augmenter = DataAugmentation(augmentation_config) + augmenter.batch_augment( + processed_dir, + output_dir, + num_augmentations=processing_config.get("num_augmentations", 3) + ) + else: + # Augment trực tiếp ảnh gốc + logger.info("Starting normal batch augmentation (direct augmentation)...") + augmenter = DataAugmentation(augmentation_config) + augmenter.batch_augment( + input_dir, + output_dir, + num_augmentations=processing_config.get("num_augmentations", 3) + ) - # Update target size - target_size = tuple(processing_config.get("target_size", [224, 224])) - augmenter.image_processor.target_size = target_size - - # Perform batch augmentation - logger.info("Starting batch augmentation...") - results = augmenter.batch_augment( - input_dir, - output_dir, - num_augmentations=processing_config.get("num_augmentations", 3) - ) - - # Get and display summary - summary = augmenter.get_augmentation_summary(results) - - print("\n" + "="*50) - print("AUGMENTATION SUMMARY") - print("="*50) - print(f"Original images: {summary['total_original_images']}") - print(f"Augmented images: {summary['total_augmented_images']}") - print(f"Augmentation ratio: {summary['augmentation_ratio']:.2f}") - print(f"Successful augmentations: {summary['successful_augmentations']}") - print(f"Output directory: {output_dir}") - print("="*50) - - logger.info("Data augmentation completed successfully") + logger.info("Data processing completed successfully") if __name__ == "__main__": main() \ No newline at end of file diff --git a/script/id_card_cropper.py b/script/id_card_cropper.py deleted file mode 100644 index 5dc78ef..0000000 --- a/script/id_card_cropper.py +++ /dev/null @@ -1,133 +0,0 @@ -#!/usr/bin/env python3 -""" -Simple ID Card Cropper using Roboflow API -Input: folder containing images -Output: folder with cropped ID cards -""" -import sys -import yaml -from pathlib import Path -import logging -import argparse - -# Add src to path -sys.path.append(str(Path(__file__).parent / "src")) - -from model.roboflow_id_detector import RoboflowIDDetector - -def setup_logging(): - """Setup basic logging""" - logging.basicConfig( - level=logging.INFO, - format='%(asctime)s - %(levelname)s - %(message)s' - ) - -def crop_id_cards(input_folder: str, output_folder: str, api_key: str = "Pkz4puRA0Cy3xMOuNoNr"): - """ - Crop ID cards from all images in input folder - - Args: - input_folder: Path to input folder containing images - output_folder: Path to output folder for cropped ID cards - api_key: Roboflow API key - """ - logger = logging.getLogger(__name__) - - # Convert to Path objects - input_path = Path(input_folder) - output_path = Path(output_folder) - - # Check if input folder exists - if not input_path.exists(): - logger.error(f"Input folder not found: {input_folder}") - return False - - # Create output folder - output_path.mkdir(parents=True, exist_ok=True) - - # Initialize detector - detector = RoboflowIDDetector( - api_key=api_key, - model_id="french-card-id-detect", - version=3, - confidence=0.5 - ) - - # Get all image files - image_extensions = {'.jpg', '.jpeg', '.png', '.bmp', '.tiff'} - image_files = [] - - for file_path in input_path.rglob('*'): - if file_path.is_file() and file_path.suffix.lower() in image_extensions: - image_files.append(file_path) - - if not image_files: - logger.error(f"No images found in {input_folder}") - return False - - logger.info(f"Found {len(image_files)} images to process") - - # Process each image - total_cropped = 0 - - for i, image_path in enumerate(image_files, 1): - logger.info(f"Processing {i}/{len(image_files)}: {image_path.name}") - - # Detect ID cards - detections = detector.detect_id_cards(image_path) - - if not detections: - logger.warning(f"No ID cards detected in {image_path.name}") - continue - - # Crop each detected ID card - for j, detection in enumerate(detections): - bbox = detection['bbox'] - - # Create output filename - stem = image_path.stem - suffix = f"_card_{j+1}.jpg" - output_file = output_path / f"{stem}{suffix}" - - # Crop ID card - cropped = detector.crop_id_card(image_path, bbox, output_file) - - if cropped is not None: - total_cropped += 1 - logger.info(f" ✓ Cropped card {j+1} to {output_file.name}") - - # Add delay between requests - if i < len(image_files): - import time - time.sleep(1.0) - - logger.info(f"Processing completed! Total ID cards cropped: {total_cropped}") - return True - -def main(): - """Main function""" - parser = argparse.ArgumentParser(description='Crop ID cards from images using Roboflow API') - parser.add_argument('input_folder', help='Input folder containing images') - parser.add_argument('output_folder', help='Output folder for cropped ID cards') - parser.add_argument('--api-key', default="Pkz4puRA0Cy3xMOuNoNr", - help='Roboflow API key (default: demo key)') - - args = parser.parse_args() - - # Setup logging - setup_logging() - - # Process images - success = crop_id_cards(args.input_folder, args.output_folder, args.api_key) - - if success: - print(f"\n✓ Successfully processed images from '{args.input_folder}'") - print(f"✓ Cropped ID cards saved to '{args.output_folder}'") - else: - print(f"\n✗ Failed to process images") - return 1 - - return 0 - -if __name__ == "__main__": - exit(main()) \ No newline at end of file diff --git a/src/data_augmentation.py b/src/data_augmentation.py index de2627a..3dbc20a 100644 --- a/src/data_augmentation.py +++ b/src/data_augmentation.py @@ -363,8 +363,6 @@ class DataAugmentation: return result - - def augment_single_image(self, image: np.ndarray, num_augmentations: int = None) -> List[np.ndarray]: """ Apply each augmentation method separately to create independent augmented versions @@ -455,20 +453,7 @@ class DataAugmentation: augmented_images.append(augmented) - # 5. Grayscale only - if grayscale_config.get("enabled", False): - for i in range(num_augmentations): - augmented = image.copy() - augmented = self.convert_to_grayscale_preserve_quality(augmented) - - # Resize preserving aspect ratio - target_size = self.image_processor.target_size - if target_size: - augmented = self.resize_preserve_aspect(augmented, target_size) - - augmented_images.append(augmented) - - # 6. Blurring only + # 5. Blurring only if blurring_config.get("enabled", False): for i in range(num_augmentations): augmented = image.copy() @@ -481,7 +466,7 @@ class DataAugmentation: augmented_images.append(augmented) - # 7. Brightness and contrast only + # 6. Brightness/Contrast only if brightness_contrast_config.get("enabled", False): for i in range(num_augmentations): augmented = image.copy() @@ -494,6 +479,11 @@ class DataAugmentation: augmented_images.append(augmented) + # 7. Apply grayscale as final step to ALL augmented images + if grayscale_config.get("enabled", False): + for i in range(len(augmented_images)): + augmented_images[i] = self.convert_to_grayscale_preserve_quality(augmented_images[i]) + return augmented_images def augment_image_file(self, image_path: Path, output_dir: Path, num_augmentations: int = None) -> List[Path]: @@ -518,7 +508,7 @@ class DataAugmentation: # Save augmented images with method names saved_paths = [] - method_names = ["rotation", "cropping", "noise", "blockage", "grayscale", "blurring", "brightness_contrast"] + method_names = ["rotation", "cropping", "noise", "blockage", "blurring", "brightness_contrast", "grayscale"] method_index = 0 for i, aug_image in enumerate(augmented_images): diff --git a/src/id_card_detector.py b/src/id_card_detector.py new file mode 100644 index 0000000..d39bf2c --- /dev/null +++ b/src/id_card_detector.py @@ -0,0 +1,611 @@ +""" +ID Card Detector Module +Sử dụng YOLO để detect và cắt ID cards từ ảnh lớn, kết hợp với data augmentation +Tích hợp với YOLOv8 French ID Card Detection model +""" +import cv2 +import numpy as np +from pathlib import Path +from typing import List, Tuple, Optional, Dict, Any, Union +import torch +import torch.nn as nn +from ultralytics import YOLO +import logging +from data_augmentation import DataAugmentation +from utils import load_image, save_image, create_augmented_filename, print_progress +import os +import json +import yaml + +class IDCardDetector: + """Class để detect và cắt ID cards từ ảnh lớn sử dụng YOLO""" + + def __init__(self, model_path: str = None, config: Dict[str, Any] = None): + """ + Initialize ID Card Detector + + Args: + model_path: Đường dẫn đến model YOLO đã train + config: Configuration dictionary + """ + self.config = config or {} + self.model_path = model_path + self.model = None + self.data_augmentation = DataAugmentation(config) + self.logger = self._setup_logger() + + # Default model path nếu không được cung cấp + if not model_path: + default_model_path = "data/weights/id_cards_yolov8n.pt" + if os.path.exists(default_model_path): + model_path = default_model_path + self.model_path = model_path + + # Load YOLO model nếu có + if model_path and os.path.exists(model_path): + self.load_model(model_path) + + def _setup_logger(self) -> logging.Logger: + """Setup logger cho module""" + logger = logging.getLogger(__name__) + logger.setLevel(logging.INFO) + + if not logger.handlers: + handler = logging.StreamHandler() + formatter = logging.Formatter( + '%(asctime)s - %(name)s - %(levelname)s - %(message)s' + ) + handler.setFormatter(formatter) + logger.addHandler(handler) + + return logger + + def load_model(self, model_path: str) -> bool: + """ + Load YOLO model từ file + + Args: + model_path: Đường dẫn đến model file + + Returns: + True nếu load thành công, False nếu thất bại + """ + try: + self.model = YOLO(model_path) + self.logger.info(f"Loaded YOLO model from: {model_path}") + return True + except Exception as e: + self.logger.error(f"Failed to load model: {e}") + return False + + def detect_id_cards(self, image: np.ndarray, confidence: float = 0.5, iou_threshold: float = 0.45) -> List[Dict[str, Any]]: + """ + Detect ID cards trong ảnh sử dụng YOLO + + Args: + image: Input image + confidence: Confidence threshold + iou_threshold: IoU threshold cho NMS + + Returns: + List các detection results với format: + { + 'bbox': [x1, y1, x2, y2], + 'confidence': float, + 'class_id': int, + 'class_name': str + } + """ + if self.model is None: + self.logger.error("Model chưa được load!") + return [] + + try: + # Run inference + results = self.model(image, conf=confidence, iou=float(iou_threshold), verbose=False) + + detections = [] + for result in results: + boxes = result.boxes + if boxes is not None: + for box in boxes: + # Get bbox coordinates + x1, y1, x2, y2 = box.xyxy[0].cpu().numpy() + + # Get confidence and class + confidence_score = float(box.conf[0].cpu().numpy()) + class_id = int(box.cls[0].cpu().numpy()) + class_name = self.model.names[class_id] if hasattr(self.model, 'names') else f"class_{class_id}" + + detection = { + 'bbox': [int(x1), int(y1), int(x2), int(y2)], + 'confidence': confidence_score, + 'class_id': class_id, + 'class_name': class_name + } + detections.append(detection) + + self.logger.info(f"Detected {len(detections)} ID cards") + return detections + + except Exception as e: + self.logger.error(f"Error during detection: {e}") + return [] + + def crop_id_card(self, image: np.ndarray, bbox: List[int], padding: int = 10, + crop_mode: str = "bbox", target_size: Tuple[int, int] = None) -> np.ndarray: + """ + Cắt ID card từ ảnh gốc dựa trên bbox với nhiều options + + Args: + image: Input image + bbox: Bounding box [x1, y1, x2, y2] + padding: Padding thêm xung quanh bbox + crop_mode: Mode cắt ("bbox", "square", "aspect_ratio") + target_size: Kích thước target (width, height) nếu muốn resize + + Returns: + Cropped ID card image + """ + x1, y1, x2, y2 = bbox + + # Thêm padding + height, width = image.shape[:2] + x1 = max(0, x1 - padding) + y1 = max(0, y1 - padding) + x2 = min(width, x2 + padding) + y2 = min(height, y2 + padding) + + # Cắt ảnh theo mode + if crop_mode == "bbox": + # Cắt theo bbox gốc + cropped = image[y1:y2, x1:x2] + elif crop_mode == "square": + # Cắt thành hình vuông + center_x = (x1 + x2) // 2 + center_y = (y1 + y2) // 2 + size = max(x2 - x1, y2 - y1) + half_size = size // 2 + + x1 = max(0, center_x - half_size) + y1 = max(0, center_y - half_size) + x2 = min(width, center_x + half_size) + y2 = min(height, center_y + half_size) + + cropped = image[y1:y2, x1:x2] + elif crop_mode == "aspect_ratio": + # Cắt theo tỷ lệ khung hình chuẩn (3:4 cho ID card) + bbox_width = x2 - x1 + bbox_height = y2 - y1 + center_x = (x1 + x2) // 2 + center_y = (y1 + y2) // 2 + + # Tỷ lệ 3:4 cho ID card + target_ratio = 3 / 4 + current_ratio = bbox_width / bbox_height + + if current_ratio > target_ratio: + # Bbox quá rộng, giữ chiều cao + new_width = int(bbox_height * target_ratio) + half_width = new_width // 2 + x1 = max(0, center_x - half_width) + x2 = min(width, center_x + half_width) + else: + # Bbox quá cao, giữ chiều rộng + new_height = int(bbox_width / target_ratio) + half_height = new_height // 2 + y1 = max(0, center_y - half_height) + y2 = min(height, center_y + half_height) + + cropped = image[y1:y2, x1:x2] + else: + # Default: cắt theo bbox + cropped = image[y1:y2, x1:x2] + + # Resize nếu có target_size + if target_size: + cropped = cv2.resize(cropped, target_size, interpolation=cv2.INTER_AREA) + + return cropped + + def process_single_image(self, image_path: Union[str, Path], output_dir: Path, + confidence: float = 0.5, iou_threshold: float = 0.45, + crop_mode: str = "bbox", target_size: Tuple[int, int] = None, + padding: int = 10, card_counter: int = 0) -> Dict[str, Any]: + """ + Xử lý một ảnh: detect ID cards, cắt và áp dụng augmentation + + Args: + image_path: Đường dẫn đến ảnh input + output_dir: Thư mục output + apply_augmentation: Có áp dụng data augmentation không + save_original: Có lưu ảnh gốc không + confidence: Confidence threshold + iou_threshold: IoU threshold + crop_mode: Mode cắt ("bbox", "square", "aspect_ratio") + target_size: Kích thước target (width, height) hoặc None + padding: Padding thêm xung quanh bbox + + Returns: + Dictionary chứa kết quả xử lý + """ + image_path = Path(image_path) + if not image_path.exists(): + self.logger.error(f"Image not found: {image_path}") + return {} + + # Load ảnh + image = load_image(str(image_path)) + if image is None: + self.logger.error(f"Failed to load image: {image_path}") + return {} + + # Detect ID cards + detections = self.detect_id_cards(image, confidence, float(iou_threshold)) + + if not detections: + self.logger.warning(f"No ID cards detected in: {image_path}") + return { + 'image_path': str(image_path), + 'detections': [], + 'processed_cards': [] + } + + # Tạo thư mục output + output_dir.mkdir(parents=True, exist_ok=True) + + processed_cards = [] + current_card_counter = card_counter + + for i, detection in enumerate(detections): + # Cắt ID card với options mới + cropped_card = self.crop_id_card( + image, + detection['bbox'], + padding=padding, + crop_mode=crop_mode, + target_size=target_size + ) + + # Tạo tên file unique cho mỗi ID card + current_card_counter += 1 + card_filename = f"id_card_{current_card_counter:03d}.jpg" + card_path = output_dir / card_filename + + # Lưu ảnh gốc + save_image(cropped_card, card_path) + processed_cards.append({ + 'original_path': str(card_path), + 'detection_info': detection, + 'crop_info': { + 'mode': crop_mode, + 'target_size': target_size, + 'padding': padding + } + }) + + result = { + 'image_path': str(image_path), + 'detections': detections, + 'processed_cards': processed_cards, + 'total_cards': len(processed_cards), + 'crop_settings': { + 'mode': crop_mode, + 'target_size': target_size, + 'padding': padding + } + } + + self.logger.info(f"Processed {len(processed_cards)} cards from {image_path.name}") + return result + + def batch_process(self, input_dir: Union[str, Path], output_dir: Union[str, Path], + confidence: float = 0.5, iou_threshold: float = 0.45, + crop_mode: str = "bbox", target_size: Tuple[int, int] = None, + padding: int = 10) -> Dict[str, Any]: + """ + Xử lý batch nhiều ảnh + + Args: + input_dir: Thư mục chứa ảnh input + output_dir: Thư mục output + apply_augmentation: Có áp dụng data augmentation không + save_original: Có lưu ảnh gốc không + confidence: Confidence threshold + iou_threshold: IoU threshold + crop_mode: Mode cắt ("bbox", "square", "aspect_ratio") + target_size: Kích thước target (width, height) hoặc None + padding: Padding thêm xung quanh bbox + + Returns: + Dictionary chứa kết quả batch processing + """ + input_dir = Path(input_dir) + output_dir = Path(output_dir) + + if not input_dir.exists(): + self.logger.error(f"Input directory not found: {input_dir}") + return {} + + # Tạo thư mục output + output_dir.mkdir(parents=True, exist_ok=True) + + # Tìm tất cả ảnh + supported_formats = self.config.get('supported_formats', ['.jpg', '.jpeg', '.png', '.bmp', '.tiff']) + image_files = [] + for fmt in supported_formats: + image_files.extend(input_dir.glob(f"*{fmt}")) + image_files.extend(input_dir.glob(f"*{fmt.upper()}")) + + if not image_files: + self.logger.warning(f"No supported images found in: {input_dir}") + return {} + + self.logger.info(f"Found {len(image_files)} images to process") + + results = {} + total_cards = 0 + global_card_counter = 0 # Counter để tạo tên file unique + + for i, image_path in enumerate(image_files): + self.logger.info(f"Processing {i+1}/{len(image_files)}: {image_path.name}") + + # Xử lý ảnh - chỉ detect và crop, không augment + result = self.process_single_image( + image_path, + output_dir, + confidence, + iou_threshold, + crop_mode, + target_size, + padding, + global_card_counter + ) + + # Cập nhật counter + global_card_counter += len(result.get('detections', [])) + + results[image_path.name] = result + total_cards += len(result.get('detections', [])) # Số lượng ID cards thực tế đã detect + + # Print progress + print_progress(i + 1, len(image_files), f"Processed {image_path.name}") + + # Tạo summary + summary = { + 'total_images': len(image_files), + 'total_cards_detected': total_cards, + 'images_with_cards': len([r for r in results.values() if r.get('detections')]), + 'images_without_cards': len([r for r in results.values() if not r.get('detections')]), + 'output_directory': str(output_dir), + 'crop_settings': { + 'mode': crop_mode, + 'target_size': target_size, + 'padding': padding + }, + 'results': results + } + + # Lưu summary + summary_path = output_dir / "processing_summary.json" + with open(summary_path, 'w', encoding='utf-8') as f: + json.dump(summary, f, indent=2, ensure_ascii=False) + + self.logger.info(f"Batch processing completed. Summary saved to: {summary_path}") + return summary + + def get_detection_statistics(self, results: Dict[str, Any]) -> Dict[str, Any]: + """ + Tính toán thống kê từ kết quả detection + + Args: + results: Kết quả từ batch_process + + Returns: + Dictionary chứa thống kê + """ + if not results: + return {} + + total_images = results.get('total_images', 0) + total_cards = results.get('total_cards_detected', 0) + images_with_cards = results.get('images_with_cards', 0) + + # Tính confidence statistics + all_confidences = [] + for image_result in results.get('results', {}).values(): + for detection in image_result.get('detections', []): + all_confidences.append(detection.get('confidence', 0)) + + stats = { + 'total_images_processed': total_images, + 'total_cards_detected': total_cards, + 'images_with_cards': images_with_cards, + 'images_without_cards': total_images - images_with_cards, + 'average_cards_per_image': total_cards / total_images if total_images > 0 else 0, + 'detection_rate': images_with_cards / total_images if total_images > 0 else 0, + 'confidence_statistics': { + 'min': min(all_confidences) if all_confidences else 0, + 'max': max(all_confidences) if all_confidences else 0, + 'mean': np.mean(all_confidences) if all_confidences else 0, + 'std': np.std(all_confidences) if all_confidences else 0 + } + } + + return stats + + def augment_cropped_cards(self, input_dir: Union[str, Path], output_dir: Union[str, Path], + num_augmentations: int = 3) -> Dict[str, Any]: + """ + Augment tất cả ID cards đã crop trong thư mục input + + Args: + input_dir: Thư mục chứa ID cards đã crop + output_dir: Thư mục output cho augmented images + num_augmentations: Số lượng augmentation cho mỗi card + + Returns: + Dictionary chứa kết quả augmentation + """ + input_dir = Path(input_dir) + output_dir = Path(output_dir) + + if not input_dir.exists(): + self.logger.error(f"Input directory not found: {input_dir}") + return {} + + # Tạo thư mục output + output_dir.mkdir(parents=True, exist_ok=True) + + # Tìm tất cả ID cards đã crop + card_files = list(input_dir.glob("id_card_*.jpg")) + + if not card_files: + self.logger.warning(f"No ID card files found in: {input_dir}") + return {} + + self.logger.info(f"Found {len(card_files)} ID cards to augment") + + results = {} + total_augmented = 0 + + for i, card_path in enumerate(card_files): + self.logger.info(f"Augmenting {i+1}/{len(card_files)}: {card_path.name}") + + # Load ID card + card_image = load_image(str(card_path)) + if card_image is None: + self.logger.error(f"Failed to load card: {card_path}") + continue + + # Augment card + try: + augmented_cards = self.data_augmentation.augment_single_image( + card_image, + num_augmentations=num_augmentations + ) + + # Debug: Kiểm tra số lượng augmented cards + self.logger.info(f"Generated {len(augmented_cards)} augmented cards for {card_path.name}") + + # Debug: Kiểm tra config + self.logger.info(f"DataAugmentation config: {self.data_augmentation.config}") + + except Exception as e: + self.logger.error(f"Error during augmentation: {e}") + augmented_cards = [] + + # Save augmented cards + card_results = [] + for j, aug_card in enumerate(augmented_cards): + aug_filename = f"{card_path.stem}_aug_{j+1}.jpg" + aug_path = output_dir / aug_filename + save_image(aug_card, aug_path) + + card_results.append({ + 'augmented_path': str(aug_path), + 'augmentation_index': j+1 + }) + + results[card_path.name] = { + 'original_path': str(card_path), + 'augmented_cards': card_results, + 'total_augmented': len(card_results) + } + + total_augmented += len(card_results) + + # Print progress + print_progress(i + 1, len(card_files), f"Augmented {card_path.name}") + + # Tạo summary + summary = { + 'total_cards': len(card_files), + 'total_augmented': total_augmented, + 'output_directory': str(output_dir), + 'results': results + } + + # Lưu summary + summary_path = output_dir / "augmentation_summary.json" + with open(summary_path, 'w', encoding='utf-8') as f: + json.dump(summary, f, indent=2, ensure_ascii=False) + + self.logger.info(f"Augmentation completed. Summary saved to: {summary_path}") + return summary + + def load_yolo_config(self, config_path: str = None) -> Dict[str, Any]: + """ + Load config từ YOLO detector + + Args: + config_path: Đường dẫn đến file config + + Returns: + Config dictionary + """ + if config_path is None: + # Tìm config mặc định + default_config_path = "src/model/ID_cards_detector/config.py" + if os.path.exists(default_config_path): + config_path = default_config_path + + config = {} + + try: + # Import config từ YOLO detector + import sys + sys.path.append(str(Path("src/model/ID_cards_detector"))) + + from config import DEFAULT_TRAINING_CONFIG, DEFAULT_INFERENCE_CONFIG + + config.update({ + 'yolo_training_config': DEFAULT_TRAINING_CONFIG, + 'yolo_inference_config': DEFAULT_INFERENCE_CONFIG, + 'detection': { + 'confidence_threshold': DEFAULT_INFERENCE_CONFIG.get('conf_threshold', 0.25), + 'iou_threshold': DEFAULT_INFERENCE_CONFIG.get('iou_threshold', 0.45), + 'padding': 10 + }, + 'processing': { + 'apply_augmentation': True, + 'save_original': True, + 'num_augmentations': 3, + 'save_format': "jpg", + 'quality': 95, + 'target_size': [640, 640] + }, + 'crop_options': { + 'crop_mode': 'bbox', # bbox, square, aspect_ratio + 'target_size': None, # (width, height) hoặc None + 'padding': 10 + } + }) + + self.logger.info("Loaded YOLO config successfully") + + except Exception as e: + self.logger.warning(f"Failed to load YOLO config: {e}") + # Fallback config + config = { + 'detection': { + 'confidence_threshold': 0.25, + 'iou_threshold': 0.45, + 'padding': 10 + }, + 'processing': { + 'apply_augmentation': True, + 'save_original': True, + 'num_augmentations': 3, + 'save_format': "jpg", + 'quality': 95, + 'target_size': [640, 640] + }, + 'crop_options': { + 'crop_mode': 'bbox', + 'target_size': None, + 'padding': 10 + } + } + + return config \ No newline at end of file diff --git a/src/utils.py b/src/utils.py index c891d8d..f28476d 100644 --- a/src/utils.py +++ b/src/utils.py @@ -41,14 +41,11 @@ def load_image(image_path: Path, target_size: Tuple[int, int] = None) -> Optiona image = cv2.imread(str(image_path)) if image is None: return None - # Convert BGR to RGB image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) - # Resize if target_size is provided if target_size: image = cv2.resize(image, target_size, interpolation=cv2.INTER_AREA) - return image except Exception as e: print(f"Error loading image {image_path}: {e}")