refactor YOLO crop model

2025-08-05 20:53:47 +07:00
parent a4e7573dca
commit 3fd270c8bd
24 changed files with 136 additions and 479 deletions
--- a/src/init.py
+++ b/src/init.py
@@ -1,9 +1,9 @@
 """
-Data Augmentation Package
+Data Augmentation Package for ID Cards
 """

-__version__ = "1.0.0"
-__author__ = "OCR Data Augmentation Tool"
+__version__ = "2.0.0"
+__author__ = "ID Card Data Augmentation Tool"

 from .utils import *
 from .image_processor import ImageProcessor
@@ -12,12 +12,12 @@ from .config_manager import ConfigManager

 __all__ = [
    "ImageProcessor",
-    "DataAugmentation",
+    "DataAugmentation", 
    "ConfigManager",
    "setup_logging",
    "get_image_files",
    "load_image",
    "save_image",
-    "validate_image",
+    "create_augmented_filename",
    "print_progress",
 ] 
--- a/src/pycache/init.cpython-313.pyc
+++ b/src/pycache/init.cpython-313.pyc
--- a/src/pycache/init.cpython-39.pyc
+++ b/src/pycache/init.cpython-39.pyc
--- a/src/pycache/config_manager.cpython-39.pyc
+++ b/src/pycache/config_manager.cpython-39.pyc
--- a/src/pycache/data_augmentation.cpython-39.pyc
+++ b/src/pycache/data_augmentation.cpython-39.pyc
--- a/src/pycache/image_processor.cpython-39.pyc
+++ b/src/pycache/image_processor.cpython-39.pyc
--- a/src/pycache/utils.cpython-313.pyc
+++ b/src/pycache/utils.cpython-313.pyc
--- a/src/pycache/utils.cpython-39.pyc
+++ b/src/pycache/utils.cpython-39.pyc
--- a/src/config.py
+++ b/src/config.py
@@ -1,40 +0,0 @@
-"""
-Configuration file for data augmentation
-"""
-import os
-from pathlib import Path
-
-# Paths
-BASE_DIR = Path(__file__).parent.parent
-DATA_DIR = BASE_DIR / "data"
-INPUT_IMAGES_DIR = DATA_DIR / "dataset" / "training_data" / "images"
-OUTPUT_DIR = DATA_DIR / "augmented_data"
-
-# Data augmentation parameters
-AUGMENTATION_CONFIG = {
-    "rotation_range": 15,  # degrees
-    "width_shift_range": 0.1,  # fraction of total width
-    "height_shift_range": 0.1,  # fraction of total height
-    "brightness_range": [0.8, 1.2],  # brightness factor
-    "zoom_range": [0.9, 1.1],  # zoom factor
-    "horizontal_flip": True,
-    "vertical_flip": False,
-    "fill_mode": "nearest",
-    "cval": 0,
-    "rescale": 1./255,
-}
-
-# Processing parameters
-PROCESSING_CONFIG = {
-    "target_size": (224, 224),  # (width, height)
-    "batch_size": 32,
-    "num_augmentations": 3,  # number of augmented versions per image
-    "save_format": "jpg",
-    "quality": 95,
-}
-
-# Supported image formats
-SUPPORTED_FORMATS = ['.jpg', '.jpeg', '.png', '.bmp', '.tiff']
-
-# Create output directory if it doesn't exist
-OUTPUT_DIR.mkdir(parents=True, exist_ok=True) 
--- a/src/data_augmentation.py
+++ b/src/data_augmentation.py
@@ -1,5 +1,5 @@
 """
-Data augmentation class for image augmentation - ONLY ROTATION
+Data augmentation class for image augmentation - ONLY ROTATION with quality preservation
 """
 import cv2
 import numpy as np
@@ -23,32 +23,134 @@ class DataAugmentation:
        self.config = config or {}
        self.image_processor = ImageProcessor()
    
-    def rotate_image(self, image: np.ndarray, angle: float) -> np.ndarray:
+    def rotate_image_preserve_quality(self, image: np.ndarray, angle: float) -> np.ndarray:
        """
-        Rotate image by given angle
+        Rotate image by given angle with white background and crop to preserve quality
        
        Args:
            image: Input image
            angle: Rotation angle in degrees
            
        Returns:
-            Rotated image
+            Rotated and cropped image
        """
        height, width = image.shape[:2]
-        center = (width // 2, height // 2)
+        
+        # Calculate new dimensions for rotation
+        angle_rad = math.radians(angle)
+        cos_val = abs(math.cos(angle_rad))
+        sin_val = abs(math.sin(angle_rad))
+        
+        # Calculate new width and height
+        new_width = int(width * cos_val + height * sin_val)
+        new_height = int(height * cos_val + width * sin_val)
+        
+        # Create larger canvas with white background
+        canvas = np.ones((new_height, new_width, 3), dtype=np.uint8) * 255
+        
+        # Calculate offset to center the image
+        offset_x = (new_width - width) // 2
+        offset_y = (new_height - height) // 2
+        
+        # Ensure offsets are valid
+        if offset_x >= 0 and offset_y >= 0 and offset_x + width <= new_width and offset_y + height <= new_height:
+            # Place original image in center of canvas
+            canvas[offset_y:offset_y+height, offset_x:offset_x+width] = image
+        else:
+            # If calculation is wrong, use a simpler approach
+            canvas = np.ones((max(height, width) * 2, max(height, width) * 2, 3), dtype=np.uint8) * 255
+            center_y, center_x = canvas.shape[0] // 2, canvas.shape[1] // 2
+            start_y = center_y - height // 2
+            start_x = center_x - width // 2
+            canvas[start_y:start_y+height, start_x:start_x+width] = image
+            new_width, new_height = canvas.shape[1], canvas.shape[0]
+        
+        # Calculate center for rotation
+        center = (new_width // 2, new_height // 2)
        
        # Create rotation matrix
        rotation_matrix = cv2.getRotationMatrix2D(center, angle, 1.0)
        
        # Perform rotation
-        rotated = cv2.warpAffine(image, rotation_matrix, (width, height), 
-                                borderMode=cv2.BORDER_REPLICATE)
+        rotated = cv2.warpAffine(canvas, rotation_matrix, (new_width, new_height), 
+                                borderMode=cv2.BORDER_CONSTANT, borderValue=(255, 255, 255))
+        
+        # Crop white borders to get the actual image content
+        rotated = self._crop_white_borders(rotated)
        
        return rotated
    
+    def _crop_white_borders(self, image: np.ndarray) -> np.ndarray:
+        """
+        Crop white borders from image to get the actual content
+        
+        Args:
+            image: Input image with white borders
+            
+        Returns:
+            Cropped image without white borders
+        """
+        # Convert to grayscale for edge detection
+        gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
+        
+        # Find non-white pixels (content)
+        non_white = gray < 250  # Threshold for white pixels
+        
+        # Find bounding box of content
+        coords = cv2.findNonZero(non_white.astype(np.uint8))
+        if coords is not None:
+            x, y, w, h = cv2.boundingRect(coords)
+            return image[y:y+h, x:x+w]
+        
+        return image
+    
+    def resize_preserve_aspect(self, image: np.ndarray, target_size: Tuple[int, int]) -> np.ndarray:
+        """
+        Resize image preserving aspect ratio with white padding
+        
+        Args:
+            image: Input image
+            target_size: Target size (width, height)
+            
+        Returns:
+            Resized image with preserved aspect ratio
+        """
+        target_width, target_height = target_size
+        img_height, img_width = image.shape[:2]
+        
+        # Calculate aspect ratios
+        target_aspect = target_width / target_height
+        img_aspect = img_width / img_height
+        
+        if img_aspect > target_aspect:
+            # Image is wider than target
+            new_width = target_width
+            new_height = int(target_width / img_aspect)
+        else:
+            # Image is taller than target
+            new_height = target_height
+            new_width = int(target_height * img_aspect)
+        
+        # Resize image
+        resized = cv2.resize(image, (new_width, new_height), interpolation=cv2.INTER_AREA)
+        
+        # Create white background
+        result = np.ones((target_height, target_width, 3), dtype=np.uint8) * 255
+        
+        # Calculate offset to center the image
+        offset_x = (target_width - new_width) // 2
+        offset_y = (target_height - new_height) // 2
+        
+        # Place resized image in center
+        result[offset_y:offset_y+new_height, offset_x:offset_x+new_width] = resized
+        
+        return result
+    
+
+    
    def augment_single_image(self, image: np.ndarray, num_augmentations: int = None) -> List[np.ndarray]:
        """
-        Apply rotation augmentation to a single image
+        Apply rotation augmentation to a single image with quality preservation
        
        Args:
            image: Input image
@@ -65,12 +167,18 @@ class DataAugmentation:
        angles = rotation_config.get("angles", [30, 60, 120, 150, 180, 210, 240, 300, 330])
        
        for i in range(num_augmentations):
+            # Start with original image
            augmented = image.copy()
            
-            # Apply rotation with random angle from the specified list
+            # Apply rotation with quality preservation
            if rotation_config.get("enabled", False):
                angle = random.choice(angles)
-                augmented = self.rotate_image(augmented, angle)
+                augmented = self.rotate_image_preserve_quality(augmented, angle)
+            
+            # Resize preserving aspect ratio
+            target_size = self.image_processor.target_size
+            if target_size:
+                augmented = self.resize_preserve_aspect(augmented, target_size)
            
            augmented_images.append(augmented)
        
@@ -78,7 +186,7 @@ class DataAugmentation:
    
    def augment_image_file(self, image_path: Path, output_dir: Path, num_augmentations: int = None) -> List[Path]:
        """
-        Augment a single image file and save results
+        Augment a single image file and save results with quality preservation
        
        Args:
            image_path: Path to input image
@@ -88,8 +196,8 @@ class DataAugmentation:
        Returns:
            List of paths to saved augmented images
        """
-        # Load image
-        image = load_image(image_path, self.image_processor.target_size)
+        # Load image without resizing to preserve original quality
+        image = load_image(image_path, None)  # Load original size
        if image is None:
            return []
        
--- a/src/image_processor.py
+++ b/src/image_processor.py
@@ -1,14 +1,14 @@
 """
-Image processing class for basic image operations
+Image processing class for data augmentation
 """
 import cv2
 import numpy as np
 from pathlib import Path
-from typing import Tuple, Optional, List
-from utils import load_image, save_image, validate_image, get_image_files
+from typing import Tuple, Optional
+from utils import load_image

 class ImageProcessor:
-    """Class for basic image processing operations"""
+    """Class for image processing operations used in data augmentation"""
    
    def __init__(self, target_size: Tuple[int, int] = None):
        """
@@ -17,67 +17,7 @@ class ImageProcessor:
        Args:
            target_size: Target size for image resizing (width, height)
        """
-        self.target_size = target_size or (224, 224)  # Default size
-    
-    def load_and_preprocess(self, image_path: Path) -> Optional[np.ndarray]:
-        """
-        Load and preprocess image
-        
-        Args:
-            image_path: Path to image file
-            
-        Returns:
-            Preprocessed image as numpy array or None if failed
-        """
-        if not validate_image(image_path):
-            print(f"Invalid image file: {image_path}")
-            return None
-        
-        image = load_image(image_path, self.target_size)
-        if image is None:
-            return None
-        
-        # Normalize pixel values
-        image = image.astype(np.float32) / 255.0
-        
-        return image
-    
-    def resize_image(self, image: np.ndarray, target_size: Tuple[int, int]) -> np.ndarray:
-        """
-        Resize image to target size
-        
-        Args:
-            image: Input image as numpy array
-            target_size: Target size (width, height)
-            
-        Returns:
-            Resized image
-        """
-        return cv2.resize(image, target_size, interpolation=cv2.INTER_AREA)
-    
-    def normalize_image(self, image: np.ndarray) -> np.ndarray:
-        """
-        Normalize image pixel values to [0, 1]
-        
-        Args:
-            image: Input image
-            
-        Returns:
-            Normalized image
-        """
-        return image.astype(np.float32) / 255.0
-    
-    def denormalize_image(self, image: np.ndarray) -> np.ndarray:
-        """
-        Denormalize image pixel values to [0, 255]
-        
-        Args:
-            image: Input image (normalized)
-            
-        Returns:
-            Denormalized image
-        """
-        return (image * 255).astype(np.uint8)
+        self.target_size = target_size or (640, 640)  # Default size for ID cards
    
    def get_image_info(self, image_path: Path) -> dict:
        """
@@ -107,68 +47,4 @@ class ImageProcessor:
            }
        except Exception as e:
            print(f"Error getting image info for {image_path}: {e}")
-            return {}
-    
-    def batch_process_images(self, input_dir: Path, output_dir: Path) -> List[Path]:
-        """
-        Process all images in a directory
-        
-        Args:
-            input_dir: Input directory containing images
-            output_dir: Output directory for processed images
-            
-        Returns:
-            List of processed image paths
-        """
-        image_files = get_image_files(input_dir)
-        processed_files = []
-        
-        print(f"Found {len(image_files)} images to process")
-        
-        for i, image_path in enumerate(image_files):
-            print_progress(i + 1, len(image_files), "Processing images")
-            
-            # Load and preprocess image
-            image = self.load_and_preprocess(image_path)
-            if image is None:
-                continue
-            
-            # Create output path
-            output_path = output_dir / image_path.name
-            
-            # Denormalize for saving
-            image = self.denormalize_image(image)
-            
-            # Save processed image
-            if save_image(image, output_path):
-                processed_files.append(output_path)
-        
-        print(f"\nProcessed {len(processed_files)} images successfully")
-        return processed_files
-    
-    def create_thumbnail(self, image: np.ndarray, size: Tuple[int, int] = (100, 100)) -> np.ndarray:
-        """
-        Create thumbnail of image
-        
-        Args:
-            image: Input image
-            size: Thumbnail size (width, height)
-            
-        Returns:
-            Thumbnail image
-        """
-        return cv2.resize(image, size, interpolation=cv2.INTER_AREA)
-    
-    def convert_to_grayscale(self, image: np.ndarray) -> np.ndarray:
-        """
-        Convert image to grayscale
-        
-        Args:
-            image: Input image (RGB)
-            
-        Returns:
-            Grayscale image
-        """
-        if len(image.shape) == 3:
-            return cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
-        return image 
+            return {} 
--- a/src/model/YOLO_processor/init.py
+++ b/src/model/YOLO_processor/init.py
--- a/src/model/YOLO_processor/id_card_processor.py
+++ b/src/model/YOLO_processor/id_card_processor.py
--- a/src/model/YOLO_processor/roboflow_id_detector.py
+++ b/src/model/YOLO_processor/roboflow_id_detector.py
--- a/src/model/pycache/init.cpython-313.pyc
+++ b/src/model/pycache/init.cpython-313.pyc
--- a/src/model/pycache/init.cpython-39.pyc
+++ b/src/model/pycache/init.cpython-39.pyc
--- a/src/model/pycache/id_card_processor.cpython-39.pyc
+++ b/src/model/pycache/id_card_processor.cpython-39.pyc
--- a/src/model/pycache/roboflow_id_detector.cpython-313.pyc
+++ b/src/model/pycache/roboflow_id_detector.cpython-313.pyc
--- a/src/model/pycache/roboflow_id_detector.cpython-39.pyc
+++ b/src/model/pycache/roboflow_id_detector.cpython-39.pyc
--- a/src/model/pycache/yolo_detector.cpython-39.pyc
+++ b/src/model/pycache/yolo_detector.cpython-39.pyc
--- a/src/utils.py
+++ b/src/utils.py
@@ -32,14 +32,7 @@ def get_image_files(directory: Path) -> List[Path]:
            image_files.extend(directory.glob(f"*{ext.upper()}"))
    return sorted(image_files)

-def validate_image(image_path: Path) -> bool:
-    """Validate if file is a valid image"""
-    try:
-        with Image.open(image_path) as img:
-            img.verify()
-        return True
-    except Exception:
-        return False
+

 def load_image(image_path: Path, target_size: Tuple[int, int] = None) -> Optional[np.ndarray]:
    """Load and resize image"""
@@ -83,9 +76,7 @@ def create_augmented_filename(original_path: Path, index: int, suffix: str = "au
    suffix = f"_{suffix}_{index:02d}"
    return original_path.parent / f"{stem}{suffix}{original_path.suffix}"

-def get_file_size_mb(file_path: Path) -> float:
-    """Get file size in MB"""
-    return file_path.stat().st_size / (1024 * 1024)
+

 def print_progress(current: int, total: int, prefix: str = "Progress"):
    """Print progress bar"""