done 7 transformations

2025-08-05 21:42:23 +07:00
parent 3fd270c8bd
commit 96a1de908f
6 changed files with 706 additions and 30 deletions
@@ -44,7 +44,13 @@ class ConfigManager:
                "log_file": "logs/data_augmentation.log"
            },
            "augmentation": {
-                "rotation": {"enabled": True, "angles": [30, 60, 120, 150, 180, 210, 240, 300, 330], "probability": 1.0}
+                "rotation": {"enabled": True, "angles": [30, 60, 120, 150, 180, 210, 240, 300, 330], "probability": 1.0},
+                "random_cropping": {"enabled": True, "ratio_range": [0.7, 1.0], "probability": 1.0},
+                "random_noise": {"enabled": True, "mean_range": [0.0, 0.7], "variance_range": [0.0, 0.1], "probability": 1.0},
+                "partial_blockage": {"enabled": True, "num_occlusions_range": [1, 100], "coverage_range": [0.0, 0.25], "variance_range": [0.0, 0.1], "probability": 1.0},
+                "grayscale": {"enabled": True, "probability": 1.0},
+                "blurring": {"enabled": True, "kernel_ratio_range": [0.0, 0.0084], "probability": 1.0},
+                "brightness_contrast": {"enabled": True, "alpha_range": [0.4, 3.0], "beta_range": [1, 100], "probability": 1.0}
            },
            "processing": {
                "target_size": [224, 224],
@@ -1,5 +1,5 @@
 """
-Data augmentation class for image augmentation - ONLY ROTATION with quality preservation
+Data augmentation class for image augmentation - ROTATION and RANDOM CROPPING with quality preservation
 """
 import cv2
 import numpy as np
@@ -11,7 +11,7 @@ from image_processor import ImageProcessor
 from utils import load_image, save_image, create_augmented_filename, print_progress

 class DataAugmentation:
-    """Class for image data augmentation - ONLY ROTATION"""
+    """Class for image data augmentation - ROTATION and RANDOM CROPPING"""
    
    def __init__(self, config: Dict[str, Any] = None):
        """
@@ -23,6 +23,223 @@ class DataAugmentation:
        self.config = config or {}
        self.image_processor = ImageProcessor()
    
+    def random_crop_preserve_quality(self, image: np.ndarray, crop_ratio_range: Tuple[float, float] = (0.7, 1.0)) -> np.ndarray:
+        """
+        Apply random cropping to simulate partially visible ID cards with white background preservation
+        
+        Args:
+            image: Input image
+            crop_ratio_range: Range for crop ratio (min_ratio, max_ratio)
+            
+        Returns:
+            Cropped image with white background
+        """
+        height, width = image.shape[:2]
+        
+        # Random crop ratio between 0.7 and 1.0
+        crop_ratio = random.uniform(crop_ratio_range[0], crop_ratio_range[1])
+        
+        # Calculate new dimensions
+        new_height = int(height * crop_ratio)
+        new_width = int(width * crop_ratio)
+        
+        # Ensure minimum size
+        new_height = max(new_height, int(height * 0.7))
+        new_width = max(new_width, int(width * 0.7))
+        
+        # Random starting position for cropping
+        max_start_y = height - new_height
+        max_start_x = width - new_width
+        
+        # Ensure we don't go out of bounds
+        if max_start_y < 0 or max_start_x < 0:
+            # If crop is larger than image, return original
+            return image
+        
+        start_y = random.randint(0, max_start_y)
+        start_x = random.randint(0, max_start_x)
+        
+        # Crop the image
+        cropped = image[start_y:start_y + new_height, start_x:start_x + new_width]
+        
+        # Create white background canvas with original size
+        canvas = np.ones((height, width, 3), dtype=np.uint8) * 255
+        
+        # Calculate offset to center the cropped image
+        offset_y = (height - new_height) // 2
+        offset_x = (width - new_width) // 2
+        
+        # Place cropped image in center of white canvas
+        canvas[offset_y:offset_y + new_height, offset_x:offset_x + new_width] = cropped
+        
+        return canvas
+    
+    def add_random_noise_preserve_quality(self, image: np.ndarray, mean_range: Tuple[float, float] = (0.0, 0.7), variance_range: Tuple[float, float] = (0.0, 0.1)) -> np.ndarray:
+        """
+        Add random noise to simulate worn-out ID cards with quality preservation
+        
+        Args:
+            image: Input image
+            mean_range: Range for noise mean (min_mean, max_mean)
+            variance_range: Range for noise variance (min_variance, max_variance)
+            
+        Returns:
+            Image with added noise
+        """
+        # Convert image to float for noise addition
+        image_float = image.astype(np.float32) / 255.0
+        
+        # Generate random noise parameters
+        mean = random.uniform(mean_range[0], mean_range[1])
+        variance = random.uniform(variance_range[0], variance_range[1])
+        
+        # Generate Gaussian noise
+        noise = np.random.normal(mean, np.sqrt(variance), image_float.shape)
+        
+        # Add noise to image
+        noisy_image = image_float + noise
+        
+        # Clip values to [0, 1] range
+        noisy_image = np.clip(noisy_image, 0.0, 1.0)
+        
+        # Convert back to uint8
+        noisy_image = (noisy_image * 255).astype(np.uint8)
+        
+        return noisy_image
+    
+    def add_partial_blockage_preserve_quality(self, image: np.ndarray, num_occlusions_range: Tuple[int, int] = (1, 100), coverage_range: Tuple[float, float] = (0.0, 0.25), variance_range: Tuple[float, float] = (0.0, 0.1)) -> np.ndarray:
+        """
+        Add partial blockage with horizontal lines of different colors to simulate occluded card details
+        
+        Args:
+            image: Input image
+            num_occlusions_range: Range for number of occlusions (min, max)
+            coverage_range: Range for coverage ratio (min, max)
+            variance_range: Range for line thickness variance (min, max)
+            
+        Returns:
+            Image with partial blockage
+        """
+        height, width = image.shape[:2]
+        result = image.copy()
+        
+        # Generate random parameters
+        num_occlusions = random.randint(num_occlusions_range[0], num_occlusions_range[1])
+        coverage = random.uniform(coverage_range[0], coverage_range[1])
+        variance = random.uniform(variance_range[0], variance_range[1])
+        
+        # Calculate total area to cover
+        total_area = height * width
+        target_coverage_area = int(total_area * coverage)
+        
+        # Colors for occlusion lines (different colors to simulate various objects)
+        occlusion_colors = [
+            (255, 255, 255),  # White
+            (0, 0, 0),        # Black
+            (128, 128, 128),  # Gray
+            (255, 0, 0),      # Red
+            (0, 255, 0),      # Green
+            (0, 0, 255),      # Blue
+            (255, 255, 0),    # Yellow
+            (255, 0, 255),    # Magenta
+            (0, 255, 255),    # Cyan
+        ]
+        
+        current_coverage = 0
+        
+        for _ in range(num_occlusions):
+            if current_coverage >= target_coverage_area:
+                break
+                
+            # Random line parameters
+            color = random.choice(occlusion_colors)
+            thickness = max(1, int(random.gauss(3, variance * 10)))
+            
+            # Random horizontal line coordinates (y1 = y2 for horizontal lines)
+            y = random.randint(0, height)
+            x1 = random.randint(0, width)
+            x2 = random.randint(x1, width)  # Ensure x2 >= x1 for proper line
+            
+            # Draw horizontal line
+            cv2.line(result, (x1, y), (x2, y), color, thickness)
+            
+            # Calculate coverage of this line (approximate)
+            line_length = x2 - x1
+            line_coverage = line_length * thickness
+            current_coverage += line_coverage
+        
+        return result
+    
+    def convert_to_grayscale_preserve_quality(self, image: np.ndarray) -> np.ndarray:
+        """
+        Convert image to grayscale to mimic Xerox/scan copies with quality preservation
+        
+        Args:
+            image: Input image
+            
+        Returns:
+            Grayscale image
+        """
+        # Convert to grayscale using OpenCV
+        grayscale = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
+        
+        # Convert back to 3-channel image to maintain compatibility
+        grayscale_3channel = cv2.cvtColor(grayscale, cv2.COLOR_GRAY2RGB)
+        
+        return grayscale_3channel
+    
+    def apply_blurring_preserve_quality(self, image: np.ndarray, kernel_ratio_range: Tuple[float, float] = (0.0, 0.0084)) -> np.ndarray:
+        """
+        Apply blurring to simulate blurred card images that are still readable using OpenCV filter2D
+        
+        Args:
+            image: Input image
+            kernel_ratio_range: Range for kernel ratio (min, max)
+            
+        Returns:
+            Blurred image
+        """
+        # Generate random kernel ratio
+        kernel_ratio = random.uniform(kernel_ratio_range[0], kernel_ratio_range[1])
+        
+        # Calculate kernel size based on image dimensions
+        height, width = image.shape[:2]
+        kernel_size = max(3, int(min(height, width) * kernel_ratio))
+        
+        # Ensure kernel size is odd
+        if kernel_size % 2 == 0:
+            kernel_size += 1
+        
+        # Create Gaussian kernel for blurring
+        kernel = cv2.getGaussianKernel(kernel_size, kernel_size / 3.0)
+        kernel_2d = kernel * kernel.T
+        
+        # Apply filter2D for blurring
+        blurred = cv2.filter2D(image, -1, kernel_2d)
+        
+        return blurred
+    
+    def adjust_brightness_contrast_preserve_quality(self, image: np.ndarray, alpha_range: Tuple[float, float] = (0.4, 3.0), beta_range: Tuple[int, int] = (1, 100)) -> np.ndarray:
+        """
+        Adjust brightness and contrast to mimic different environmental lighting conditions using OpenCV convertScaleAbs
+        
+        Args:
+            image: Input image
+            alpha_range: Range for contrast (alpha) (min, max)
+            beta_range: Range for brightness (beta) (min, max)
+            
+        Returns:
+            Image with adjusted brightness and contrast
+        """
+        # Generate random alpha and beta values
+        alpha = random.uniform(alpha_range[0], alpha_range[1])  # Contrast
+        beta = random.randint(beta_range[0], beta_range[1])     # Brightness
+        
+        # Apply brightness and contrast adjustment using convertScaleAbs
+        adjusted = cv2.convertScaleAbs(image, alpha=alpha, beta=beta)
+        
+        return adjusted
+    
    def rotate_image_preserve_quality(self, image: np.ndarray, angle: float) -> np.ndarray:
        """
        Rotate image by given angle with white background and crop to preserve quality
@@ -150,37 +367,132 @@ class DataAugmentation:
    
    def augment_single_image(self, image: np.ndarray, num_augmentations: int = None) -> List[np.ndarray]:
        """
-        Apply rotation augmentation to a single image with quality preservation
+        Apply each augmentation method separately to create independent augmented versions
        
        Args:
            image: Input image
-            num_augmentations: Number of augmented versions to create
+            num_augmentations: Number of augmented versions to create per method
            
        Returns:
-            List of augmented images
+            List of augmented images (each method creates separate versions)
        """
        num_augmentations = num_augmentations or 3  # Default value
        augmented_images = []
        
-        # Get rotation configuration
+        # Get configuration
        rotation_config = self.config.get("rotation", {})
-        angles = rotation_config.get("angles", [30, 60, 120, 150, 180, 210, 240, 300, 330])
+        cropping_config = self.config.get("random_cropping", {})
+        noise_config = self.config.get("random_noise", {})
+        blockage_config = self.config.get("partial_blockage", {})
+        grayscale_config = self.config.get("grayscale", {})
+        blurring_config = self.config.get("blurring", {})
+        brightness_contrast_config = self.config.get("brightness_contrast", {})
        
-        for i in range(num_augmentations):
-            # Start with original image
-            augmented = image.copy()
-            
-            # Apply rotation with quality preservation
-            if rotation_config.get("enabled", False):
+        # Configuration parameters
+        angles = rotation_config.get("angles", [30, 60, 120, 150, 180, 210, 240, 300, 330])
+        crop_ratio_range = cropping_config.get("ratio_range", (0.7, 1.0))
+        mean_range = noise_config.get("mean_range", (0.0, 0.7))
+        variance_range = noise_config.get("variance_range", (0.0, 0.1))
+        num_occlusions_range = blockage_config.get("num_occlusions_range", (1, 100))
+        coverage_range = blockage_config.get("coverage_range", (0.0, 0.25))
+        blockage_variance_range = blockage_config.get("variance_range", (0.0, 0.1))
+        kernel_ratio_range = blurring_config.get("kernel_ratio_range", (0.0, 0.0084))
+        alpha_range = brightness_contrast_config.get("alpha_range", (0.4, 3.0))
+        beta_range = brightness_contrast_config.get("beta_range", (1, 100))
+        
+        # Apply each method separately to create independent versions
+        
+        # 1. Rotation only
+        if rotation_config.get("enabled", False):
+            for i in range(num_augmentations):
+                augmented = image.copy()
                angle = random.choice(angles)
                augmented = self.rotate_image_preserve_quality(augmented, angle)
-            
-            # Resize preserving aspect ratio
-            target_size = self.image_processor.target_size
-            if target_size:
-                augmented = self.resize_preserve_aspect(augmented, target_size)
-            
-            augmented_images.append(augmented)
+                
+                # Resize preserving aspect ratio
+                target_size = self.image_processor.target_size
+                if target_size:
+                    augmented = self.resize_preserve_aspect(augmented, target_size)
+                
+                augmented_images.append(augmented)
+        
+        # 2. Random cropping only
+        if cropping_config.get("enabled", False):
+            for i in range(num_augmentations):
+                augmented = image.copy()
+                augmented = self.random_crop_preserve_quality(augmented, crop_ratio_range)
+                
+                # Resize preserving aspect ratio
+                target_size = self.image_processor.target_size
+                if target_size:
+                    augmented = self.resize_preserve_aspect(augmented, target_size)
+                
+                augmented_images.append(augmented)
+        
+        # 3. Random noise only
+        if noise_config.get("enabled", False):
+            for i in range(num_augmentations):
+                augmented = image.copy()
+                augmented = self.add_random_noise_preserve_quality(augmented, mean_range, variance_range)
+                
+                # Resize preserving aspect ratio
+                target_size = self.image_processor.target_size
+                if target_size:
+                    augmented = self.resize_preserve_aspect(augmented, target_size)
+                
+                augmented_images.append(augmented)
+        
+        # 4. Partial blockage only
+        if blockage_config.get("enabled", False):
+            for i in range(num_augmentations):
+                augmented = image.copy()
+                augmented = self.add_partial_blockage_preserve_quality(augmented, num_occlusions_range, coverage_range, blockage_variance_range)
+                
+                # Resize preserving aspect ratio
+                target_size = self.image_processor.target_size
+                if target_size:
+                    augmented = self.resize_preserve_aspect(augmented, target_size)
+                
+                augmented_images.append(augmented)
+        
+        # 5. Grayscale only
+        if grayscale_config.get("enabled", False):
+            for i in range(num_augmentations):
+                augmented = image.copy()
+                augmented = self.convert_to_grayscale_preserve_quality(augmented)
+                
+                # Resize preserving aspect ratio
+                target_size = self.image_processor.target_size
+                if target_size:
+                    augmented = self.resize_preserve_aspect(augmented, target_size)
+                
+                augmented_images.append(augmented)
+        
+        # 6. Blurring only
+        if blurring_config.get("enabled", False):
+            for i in range(num_augmentations):
+                augmented = image.copy()
+                augmented = self.apply_blurring_preserve_quality(augmented, kernel_ratio_range)
+                
+                # Resize preserving aspect ratio
+                target_size = self.image_processor.target_size
+                if target_size:
+                    augmented = self.resize_preserve_aspect(augmented, target_size)
+                
+                augmented_images.append(augmented)
+        
+        # 7. Brightness and contrast only
+        if brightness_contrast_config.get("enabled", False):
+            for i in range(num_augmentations):
+                augmented = image.copy()
+                augmented = self.adjust_brightness_contrast_preserve_quality(augmented, alpha_range, beta_range)
+                
+                # Resize preserving aspect ratio
+                target_size = self.image_processor.target_size
+                if target_size:
+                    augmented = self.resize_preserve_aspect(augmented, target_size)
+                
+                augmented_images.append(augmented)
        
        return augmented_images
    
@@ -191,7 +503,7 @@ class DataAugmentation:
        Args:
            image_path: Path to input image
            output_dir: Output directory for augmented images
-            num_augmentations: Number of augmented versions to create
+            num_augmentations: Number of augmented versions to create per method
            
        Returns:
            List of paths to saved augmented images
@@ -204,16 +516,24 @@ class DataAugmentation:
        # Apply augmentations
        augmented_images = self.augment_single_image(image, num_augmentations)
        
-        # Save augmented images
+        # Save augmented images with method names
        saved_paths = []
+        method_names = ["rotation", "cropping", "noise", "blockage", "grayscale", "blurring", "brightness_contrast"]
+        method_index = 0
+        
        for i, aug_image in enumerate(augmented_images):
-            # Create output filename
-            output_filename = create_augmented_filename(image_path, i + 1)
+            # Determine method name based on index
+            method_name = method_names[method_index // num_augmentations] if method_index // num_augmentations < len(method_names) else "aug"
+            
+            # Create output filename with method name
+            output_filename = create_augmented_filename(image_path, (i % num_augmentations) + 1, method_name)
            output_path = output_dir / output_filename.name
            
            # Save image
            if save_image(aug_image, output_path):
                saved_paths.append(output_path)
+            
+            method_index += 1
        
        return saved_paths
    
@@ -70,10 +70,10 @@ def save_image(image: np.ndarray, output_path: Path, quality: int = 95) -> bool:
        print(f"Error saving image {output_path}: {e}")
        return False

-def create_augmented_filename(original_path: Path, index: int, suffix: str = "aug") -> Path:
-    """Create filename for augmented image"""
+def create_augmented_filename(original_path: Path, index: int, method: str = "aug") -> Path:
+    """Create filename for augmented image with method name"""
    stem = original_path.stem
-    suffix = f"_{suffix}_{index:02d}"
+    suffix = f"_{method}_{index:02d}"
    return original_path.parent / f"{stem}{suffix}{original_path.suffix}"