update augment + YOLO pipeline

2025-08-06 20:52:39 +07:00
parent 4ee14f17d3
commit 51d3a66cc4
9 changed files with 989 additions and 407 deletions
--- a/src/data_augmentation.py
+++ b/src/data_augmentation.py
@@ -363,8 +363,6 @@ class DataAugmentation:
        
        return result
    
-
-    
    def augment_single_image(self, image: np.ndarray, num_augmentations: int = None) -> List[np.ndarray]:
        """
        Apply each augmentation method separately to create independent augmented versions
@@ -455,20 +453,7 @@ class DataAugmentation:
                
                augmented_images.append(augmented)
        
-        # 5. Grayscale only
-        if grayscale_config.get("enabled", False):
-            for i in range(num_augmentations):
-                augmented = image.copy()
-                augmented = self.convert_to_grayscale_preserve_quality(augmented)
-                
-                # Resize preserving aspect ratio
-                target_size = self.image_processor.target_size
-                if target_size:
-                    augmented = self.resize_preserve_aspect(augmented, target_size)
-                
-                augmented_images.append(augmented)
-        
-        # 6. Blurring only
+        # 5. Blurring only
        if blurring_config.get("enabled", False):
            for i in range(num_augmentations):
                augmented = image.copy()
@@ -481,7 +466,7 @@ class DataAugmentation:
                
                augmented_images.append(augmented)
        
-        # 7. Brightness and contrast only
+        # 6. Brightness/Contrast only
        if brightness_contrast_config.get("enabled", False):
            for i in range(num_augmentations):
                augmented = image.copy()
@@ -494,6 +479,11 @@ class DataAugmentation:
                
                augmented_images.append(augmented)
        
+        # 7. Apply grayscale as final step to ALL augmented images
+        if grayscale_config.get("enabled", False):
+            for i in range(len(augmented_images)):
+                augmented_images[i] = self.convert_to_grayscale_preserve_quality(augmented_images[i])
+        
        return augmented_images
    
    def augment_image_file(self, image_path: Path, output_dir: Path, num_augmentations: int = None) -> List[Path]:
@@ -518,7 +508,7 @@ class DataAugmentation:
        
        # Save augmented images with method names
        saved_paths = []
-        method_names = ["rotation", "cropping", "noise", "blockage", "grayscale", "blurring", "brightness_contrast"]
+        method_names = ["rotation", "cropping", "noise", "blockage", "blurring", "brightness_contrast", "grayscale"]
        method_index = 0
        
        for i, aug_image in enumerate(augmented_images):
--- a/src/id_card_detector.py
+++ b/src/id_card_detector.py
@@ -0,0 +1,611 @@
+"""
+ID Card Detector Module
+Sử dụng YOLO để detect và cắt ID cards từ ảnh lớn, kết hợp với data augmentation
+Tích hợp với YOLOv8 French ID Card Detection model
+"""
+import cv2
+import numpy as np
+from pathlib import Path
+from typing import List, Tuple, Optional, Dict, Any, Union
+import torch
+import torch.nn as nn
+from ultralytics import YOLO
+import logging
+from data_augmentation import DataAugmentation
+from utils import load_image, save_image, create_augmented_filename, print_progress
+import os
+import json
+import yaml
+
+class IDCardDetector:
+    """Class để detect và cắt ID cards từ ảnh lớn sử dụng YOLO"""
+    
+    def __init__(self, model_path: str = None, config: Dict[str, Any] = None):
+        """
+        Initialize ID Card Detector
+        
+        Args:
+            model_path: Đường dẫn đến model YOLO đã train
+            config: Configuration dictionary
+        """
+        self.config = config or {}
+        self.model_path = model_path
+        self.model = None
+        self.data_augmentation = DataAugmentation(config)
+        self.logger = self._setup_logger()
+        
+        # Default model path nếu không được cung cấp
+        if not model_path:
+            default_model_path = "data/weights/id_cards_yolov8n.pt"
+            if os.path.exists(default_model_path):
+                model_path = default_model_path
+                self.model_path = model_path
+        
+        # Load YOLO model nếu có
+        if model_path and os.path.exists(model_path):
+            self.load_model(model_path)
+    
+    def _setup_logger(self) -> logging.Logger:
+        """Setup logger cho module"""
+        logger = logging.getLogger(__name__)
+        logger.setLevel(logging.INFO)
+        
+        if not logger.handlers:
+            handler = logging.StreamHandler()
+            formatter = logging.Formatter(
+                '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+            )
+            handler.setFormatter(formatter)
+            logger.addHandler(handler)
+        
+        return logger
+    
+    def load_model(self, model_path: str) -> bool:
+        """
+        Load YOLO model từ file
+        
+        Args:
+            model_path: Đường dẫn đến model file
+            
+        Returns:
+            True nếu load thành công, False nếu thất bại
+        """
+        try:
+            self.model = YOLO(model_path)
+            self.logger.info(f"Loaded YOLO model from: {model_path}")
+            return True
+        except Exception as e:
+            self.logger.error(f"Failed to load model: {e}")
+            return False
+    
+    def detect_id_cards(self, image: np.ndarray, confidence: float = 0.5, iou_threshold: float = 0.45) -> List[Dict[str, Any]]:
+        """
+        Detect ID cards trong ảnh sử dụng YOLO
+        
+        Args:
+            image: Input image
+            confidence: Confidence threshold
+            iou_threshold: IoU threshold cho NMS
+            
+        Returns:
+            List các detection results với format:
+            {
+                'bbox': [x1, y1, x2, y2],
+                'confidence': float,
+                'class_id': int,
+                'class_name': str
+            }
+        """
+        if self.model is None:
+            self.logger.error("Model chưa được load!")
+            return []
+        
+        try:
+            # Run inference
+            results = self.model(image, conf=confidence, iou=float(iou_threshold), verbose=False)
+            
+            detections = []
+            for result in results:
+                boxes = result.boxes
+                if boxes is not None:
+                    for box in boxes:
+                        # Get bbox coordinates
+                        x1, y1, x2, y2 = box.xyxy[0].cpu().numpy()
+                        
+                        # Get confidence and class
+                        confidence_score = float(box.conf[0].cpu().numpy())
+                        class_id = int(box.cls[0].cpu().numpy())
+                        class_name = self.model.names[class_id] if hasattr(self.model, 'names') else f"class_{class_id}"
+                        
+                        detection = {
+                            'bbox': [int(x1), int(y1), int(x2), int(y2)],
+                            'confidence': confidence_score,
+                            'class_id': class_id,
+                            'class_name': class_name
+                        }
+                        detections.append(detection)
+            
+            self.logger.info(f"Detected {len(detections)} ID cards")
+            return detections
+            
+        except Exception as e:
+            self.logger.error(f"Error during detection: {e}")
+            return []
+    
+    def crop_id_card(self, image: np.ndarray, bbox: List[int], padding: int = 10, 
+                     crop_mode: str = "bbox", target_size: Tuple[int, int] = None) -> np.ndarray:
+        """
+        Cắt ID card từ ảnh gốc dựa trên bbox với nhiều options
+        
+        Args:
+            image: Input image
+            bbox: Bounding box [x1, y1, x2, y2]
+            padding: Padding thêm xung quanh bbox
+            crop_mode: Mode cắt ("bbox", "square", "aspect_ratio")
+            target_size: Kích thước target (width, height) nếu muốn resize
+            
+        Returns:
+            Cropped ID card image
+        """
+        x1, y1, x2, y2 = bbox
+        
+        # Thêm padding
+        height, width = image.shape[:2]
+        x1 = max(0, x1 - padding)
+        y1 = max(0, y1 - padding)
+        x2 = min(width, x2 + padding)
+        y2 = min(height, y2 + padding)
+        
+        # Cắt ảnh theo mode
+        if crop_mode == "bbox":
+            # Cắt theo bbox gốc
+            cropped = image[y1:y2, x1:x2]
+        elif crop_mode == "square":
+            # Cắt thành hình vuông
+            center_x = (x1 + x2) // 2
+            center_y = (y1 + y2) // 2
+            size = max(x2 - x1, y2 - y1)
+            half_size = size // 2
+            
+            x1 = max(0, center_x - half_size)
+            y1 = max(0, center_y - half_size)
+            x2 = min(width, center_x + half_size)
+            y2 = min(height, center_y + half_size)
+            
+            cropped = image[y1:y2, x1:x2]
+        elif crop_mode == "aspect_ratio":
+            # Cắt theo tỷ lệ khung hình chuẩn (3:4 cho ID card)
+            bbox_width = x2 - x1
+            bbox_height = y2 - y1
+            center_x = (x1 + x2) // 2
+            center_y = (y1 + y2) // 2
+            
+            # Tỷ lệ 3:4 cho ID card
+            target_ratio = 3 / 4
+            current_ratio = bbox_width / bbox_height
+            
+            if current_ratio > target_ratio:
+                # Bbox quá rộng, giữ chiều cao
+                new_width = int(bbox_height * target_ratio)
+                half_width = new_width // 2
+                x1 = max(0, center_x - half_width)
+                x2 = min(width, center_x + half_width)
+            else:
+                # Bbox quá cao, giữ chiều rộng
+                new_height = int(bbox_width / target_ratio)
+                half_height = new_height // 2
+                y1 = max(0, center_y - half_height)
+                y2 = min(height, center_y + half_height)
+            
+            cropped = image[y1:y2, x1:x2]
+        else:
+            # Default: cắt theo bbox
+            cropped = image[y1:y2, x1:x2]
+        
+        # Resize nếu có target_size
+        if target_size:
+            cropped = cv2.resize(cropped, target_size, interpolation=cv2.INTER_AREA)
+        
+        return cropped
+    
+    def process_single_image(self, image_path: Union[str, Path], output_dir: Path, 
+                           confidence: float = 0.5, iou_threshold: float = 0.45,
+                           crop_mode: str = "bbox", target_size: Tuple[int, int] = None,
+                           padding: int = 10, card_counter: int = 0) -> Dict[str, Any]:
+        """
+        Xử lý một ảnh: detect ID cards, cắt và áp dụng augmentation
+        
+        Args:
+            image_path: Đường dẫn đến ảnh input
+            output_dir: Thư mục output
+            apply_augmentation: Có áp dụng data augmentation không
+            save_original: Có lưu ảnh gốc không
+            confidence: Confidence threshold
+            iou_threshold: IoU threshold
+            crop_mode: Mode cắt ("bbox", "square", "aspect_ratio")
+            target_size: Kích thước target (width, height) hoặc None
+            padding: Padding thêm xung quanh bbox
+            
+        Returns:
+            Dictionary chứa kết quả xử lý
+        """
+        image_path = Path(image_path)
+        if not image_path.exists():
+            self.logger.error(f"Image not found: {image_path}")
+            return {}
+        
+        # Load ảnh
+        image = load_image(str(image_path))
+        if image is None:
+            self.logger.error(f"Failed to load image: {image_path}")
+            return {}
+        
+        # Detect ID cards
+        detections = self.detect_id_cards(image, confidence, float(iou_threshold))
+        
+        if not detections:
+            self.logger.warning(f"No ID cards detected in: {image_path}")
+            return {
+                'image_path': str(image_path),
+                'detections': [],
+                'processed_cards': []
+            }
+        
+        # Tạo thư mục output
+        output_dir.mkdir(parents=True, exist_ok=True)
+        
+        processed_cards = []
+        current_card_counter = card_counter
+        
+        for i, detection in enumerate(detections):
+            # Cắt ID card với options mới
+            cropped_card = self.crop_id_card(
+                image, 
+                detection['bbox'], 
+                padding=padding,
+                crop_mode=crop_mode,
+                target_size=target_size
+            )
+            
+            # Tạo tên file unique cho mỗi ID card
+            current_card_counter += 1
+            card_filename = f"id_card_{current_card_counter:03d}.jpg"
+            card_path = output_dir / card_filename
+            
+            # Lưu ảnh gốc
+            save_image(cropped_card, card_path)
+            processed_cards.append({
+                'original_path': str(card_path),
+                'detection_info': detection,
+                'crop_info': {
+                    'mode': crop_mode,
+                    'target_size': target_size,
+                    'padding': padding
+                }
+            })
+        
+        result = {
+            'image_path': str(image_path),
+            'detections': detections,
+            'processed_cards': processed_cards,
+            'total_cards': len(processed_cards),
+            'crop_settings': {
+                'mode': crop_mode,
+                'target_size': target_size,
+                'padding': padding
+            }
+        }
+        
+        self.logger.info(f"Processed {len(processed_cards)} cards from {image_path.name}")
+        return result
+    
+    def batch_process(self, input_dir: Union[str, Path], output_dir: Union[str, Path],
+                     confidence: float = 0.5, iou_threshold: float = 0.45,
+                     crop_mode: str = "bbox", target_size: Tuple[int, int] = None,
+                     padding: int = 10) -> Dict[str, Any]:
+        """
+        Xử lý batch nhiều ảnh
+        
+        Args:
+            input_dir: Thư mục chứa ảnh input
+            output_dir: Thư mục output
+            apply_augmentation: Có áp dụng data augmentation không
+            save_original: Có lưu ảnh gốc không
+            confidence: Confidence threshold
+            iou_threshold: IoU threshold
+            crop_mode: Mode cắt ("bbox", "square", "aspect_ratio")
+            target_size: Kích thước target (width, height) hoặc None
+            padding: Padding thêm xung quanh bbox
+            
+        Returns:
+            Dictionary chứa kết quả batch processing
+        """
+        input_dir = Path(input_dir)
+        output_dir = Path(output_dir)
+        
+        if not input_dir.exists():
+            self.logger.error(f"Input directory not found: {input_dir}")
+            return {}
+        
+        # Tạo thư mục output
+        output_dir.mkdir(parents=True, exist_ok=True)
+        
+        # Tìm tất cả ảnh
+        supported_formats = self.config.get('supported_formats', ['.jpg', '.jpeg', '.png', '.bmp', '.tiff'])
+        image_files = []
+        for fmt in supported_formats:
+            image_files.extend(input_dir.glob(f"*{fmt}"))
+            image_files.extend(input_dir.glob(f"*{fmt.upper()}"))
+        
+        if not image_files:
+            self.logger.warning(f"No supported images found in: {input_dir}")
+            return {}
+        
+        self.logger.info(f"Found {len(image_files)} images to process")
+        
+        results = {}
+        total_cards = 0
+        global_card_counter = 0  # Counter để tạo tên file unique
+        
+        for i, image_path in enumerate(image_files):
+            self.logger.info(f"Processing {i+1}/{len(image_files)}: {image_path.name}")
+            
+            # Xử lý ảnh - chỉ detect và crop, không augment
+            result = self.process_single_image(
+                image_path, 
+                output_dir,
+                confidence,
+                iou_threshold,
+                crop_mode,
+                target_size,
+                padding,
+                global_card_counter
+            )
+            
+            # Cập nhật counter
+            global_card_counter += len(result.get('detections', []))
+            
+            results[image_path.name] = result
+            total_cards += len(result.get('detections', []))  # Số lượng ID cards thực tế đã detect
+            
+            # Print progress
+            print_progress(i + 1, len(image_files), f"Processed {image_path.name}")
+        
+        # Tạo summary
+        summary = {
+            'total_images': len(image_files),
+            'total_cards_detected': total_cards,
+            'images_with_cards': len([r for r in results.values() if r.get('detections')]),
+            'images_without_cards': len([r for r in results.values() if not r.get('detections')]),
+            'output_directory': str(output_dir),
+            'crop_settings': {
+                'mode': crop_mode,
+                'target_size': target_size,
+                'padding': padding
+            },
+            'results': results
+        }
+        
+        # Lưu summary
+        summary_path = output_dir / "processing_summary.json"
+        with open(summary_path, 'w', encoding='utf-8') as f:
+            json.dump(summary, f, indent=2, ensure_ascii=False)
+        
+        self.logger.info(f"Batch processing completed. Summary saved to: {summary_path}")
+        return summary
+    
+    def get_detection_statistics(self, results: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        Tính toán thống kê từ kết quả detection
+        
+        Args:
+            results: Kết quả từ batch_process
+            
+        Returns:
+            Dictionary chứa thống kê
+        """
+        if not results:
+            return {}
+        
+        total_images = results.get('total_images', 0)
+        total_cards = results.get('total_cards_detected', 0)
+        images_with_cards = results.get('images_with_cards', 0)
+        
+        # Tính confidence statistics
+        all_confidences = []
+        for image_result in results.get('results', {}).values():
+            for detection in image_result.get('detections', []):
+                all_confidences.append(detection.get('confidence', 0))
+        
+        stats = {
+            'total_images_processed': total_images,
+            'total_cards_detected': total_cards,
+            'images_with_cards': images_with_cards,
+            'images_without_cards': total_images - images_with_cards,
+            'average_cards_per_image': total_cards / total_images if total_images > 0 else 0,
+            'detection_rate': images_with_cards / total_images if total_images > 0 else 0,
+            'confidence_statistics': {
+                'min': min(all_confidences) if all_confidences else 0,
+                'max': max(all_confidences) if all_confidences else 0,
+                'mean': np.mean(all_confidences) if all_confidences else 0,
+                'std': np.std(all_confidences) if all_confidences else 0
+            }
+        }
+        
+        return stats
+    
+    def augment_cropped_cards(self, input_dir: Union[str, Path], output_dir: Union[str, Path],
+                             num_augmentations: int = 3) -> Dict[str, Any]:
+        """
+        Augment tất cả ID cards đã crop trong thư mục input
+        
+        Args:
+            input_dir: Thư mục chứa ID cards đã crop
+            output_dir: Thư mục output cho augmented images
+            num_augmentations: Số lượng augmentation cho mỗi card
+            
+        Returns:
+            Dictionary chứa kết quả augmentation
+        """
+        input_dir = Path(input_dir)
+        output_dir = Path(output_dir)
+        
+        if not input_dir.exists():
+            self.logger.error(f"Input directory not found: {input_dir}")
+            return {}
+        
+        # Tạo thư mục output
+        output_dir.mkdir(parents=True, exist_ok=True)
+        
+        # Tìm tất cả ID cards đã crop
+        card_files = list(input_dir.glob("id_card_*.jpg"))
+        
+        if not card_files:
+            self.logger.warning(f"No ID card files found in: {input_dir}")
+            return {}
+        
+        self.logger.info(f"Found {len(card_files)} ID cards to augment")
+        
+        results = {}
+        total_augmented = 0
+        
+        for i, card_path in enumerate(card_files):
+            self.logger.info(f"Augmenting {i+1}/{len(card_files)}: {card_path.name}")
+            
+            # Load ID card
+            card_image = load_image(str(card_path))
+            if card_image is None:
+                self.logger.error(f"Failed to load card: {card_path}")
+                continue
+            
+            # Augment card
+            try:
+                augmented_cards = self.data_augmentation.augment_single_image(
+                    card_image, 
+                    num_augmentations=num_augmentations
+                )
+                
+                # Debug: Kiểm tra số lượng augmented cards
+                self.logger.info(f"Generated {len(augmented_cards)} augmented cards for {card_path.name}")
+                
+                # Debug: Kiểm tra config
+                self.logger.info(f"DataAugmentation config: {self.data_augmentation.config}")
+                
+            except Exception as e:
+                self.logger.error(f"Error during augmentation: {e}")
+                augmented_cards = []
+            
+            # Save augmented cards
+            card_results = []
+            for j, aug_card in enumerate(augmented_cards):
+                aug_filename = f"{card_path.stem}_aug_{j+1}.jpg"
+                aug_path = output_dir / aug_filename
+                save_image(aug_card, aug_path)
+                
+                card_results.append({
+                    'augmented_path': str(aug_path),
+                    'augmentation_index': j+1
+                })
+            
+            results[card_path.name] = {
+                'original_path': str(card_path),
+                'augmented_cards': card_results,
+                'total_augmented': len(card_results)
+            }
+            
+            total_augmented += len(card_results)
+            
+            # Print progress
+            print_progress(i + 1, len(card_files), f"Augmented {card_path.name}")
+        
+        # Tạo summary
+        summary = {
+            'total_cards': len(card_files),
+            'total_augmented': total_augmented,
+            'output_directory': str(output_dir),
+            'results': results
+        }
+        
+        # Lưu summary
+        summary_path = output_dir / "augmentation_summary.json"
+        with open(summary_path, 'w', encoding='utf-8') as f:
+            json.dump(summary, f, indent=2, ensure_ascii=False)
+        
+        self.logger.info(f"Augmentation completed. Summary saved to: {summary_path}")
+        return summary
+    
+    def load_yolo_config(self, config_path: str = None) -> Dict[str, Any]:
+        """
+        Load config từ YOLO detector
+        
+        Args:
+            config_path: Đường dẫn đến file config
+            
+        Returns:
+            Config dictionary
+        """
+        if config_path is None:
+            # Tìm config mặc định
+            default_config_path = "src/model/ID_cards_detector/config.py"
+            if os.path.exists(default_config_path):
+                config_path = default_config_path
+        
+        config = {}
+        
+        try:
+            # Import config từ YOLO detector
+            import sys
+            sys.path.append(str(Path("src/model/ID_cards_detector")))
+            
+            from config import DEFAULT_TRAINING_CONFIG, DEFAULT_INFERENCE_CONFIG
+            
+            config.update({
+                'yolo_training_config': DEFAULT_TRAINING_CONFIG,
+                'yolo_inference_config': DEFAULT_INFERENCE_CONFIG,
+                'detection': {
+                    'confidence_threshold': DEFAULT_INFERENCE_CONFIG.get('conf_threshold', 0.25),
+                    'iou_threshold': DEFAULT_INFERENCE_CONFIG.get('iou_threshold', 0.45),
+                    'padding': 10
+                },
+                'processing': {
+                    'apply_augmentation': True,
+                    'save_original': True,
+                    'num_augmentations': 3,
+                    'save_format': "jpg",
+                    'quality': 95,
+                    'target_size': [640, 640]
+                },
+                'crop_options': {
+                    'crop_mode': 'bbox',  # bbox, square, aspect_ratio
+                    'target_size': None,  # (width, height) hoặc None
+                    'padding': 10
+                }
+            })
+            
+            self.logger.info("Loaded YOLO config successfully")
+            
+        except Exception as e:
+            self.logger.warning(f"Failed to load YOLO config: {e}")
+            # Fallback config
+            config = {
+                'detection': {
+                    'confidence_threshold': 0.25,
+                    'iou_threshold': 0.45,
+                    'padding': 10
+                },
+                'processing': {
+                    'apply_augmentation': True,
+                    'save_original': True,
+                    'num_augmentations': 3,
+                    'save_format': "jpg",
+                    'quality': 95,
+                    'target_size': [640, 640]
+                },
+                'crop_options': {
+                    'crop_mode': 'bbox',
+                    'target_size': None,
+                    'padding': 10
+                }
+            }
+        
+        return config 
--- a/src/utils.py
+++ b/src/utils.py
@@ -41,14 +41,11 @@ def load_image(image_path: Path, target_size: Tuple[int, int] = None) -> Optiona
        image = cv2.imread(str(image_path))
        if image is None:
            return None
-        
        # Convert BGR to RGB
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
-        
        # Resize if target_size is provided
        if target_size:
            image = cv2.resize(image, target_size, interpolation=cv2.INTER_AREA)
-        
        return image
    except Exception as e:
        print(f"Error loading image {image_path}: {e}")