diff --git a/.gitignore b/.gitignore index 70d391f..4af1b00 100644 --- a/.gitignore +++ b/.gitignore @@ -15,3 +15,5 @@ *.json *.pt *.ipynb +*.pyc +*.log \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..6270081 --- /dev/null +++ b/README.md @@ -0,0 +1,309 @@ +# ID Cards Data Augmentation Tool + +A comprehensive data augmentation tool specifically designed for ID card images, implementing 7 different augmentation techniques to simulate real-world scenarios. + +## 🎯 Overview + +This tool provides data augmentation capabilities for ID card images, implementing various transformation techniques that mimic real-world conditions such as worn-out cards, partial occlusion, different lighting conditions, and more. + +## ✨ Features + +### 7 Augmentation Techniques + +1. **Rotation** - Simulates cards at different angles +2. **Random Cropping** - Simulates partially visible cards +3. **Random Noise** - Simulates worn-out cards +4. **Horizontal Blockage** - Simulates occluded card details +5. **Grayscale Transformation** - Simulates Xerox/scan copies +6. **Blurring** - Simulates blurred but readable cards +7. **Brightness & Contrast** - Simulates different lighting conditions + +### Key Features + +- **Separate Methods**: Each augmentation technique is applied independently +- **Quality Preservation**: Maintains image quality with white background preservation +- **OpenCV Integration**: Uses OpenCV functions for reliable image processing +- **Configurable**: Easy configuration through YAML files +- **Progress Tracking**: Real-time progress monitoring +- **Batch Processing**: Process multiple images efficiently + +## 🚀 Installation + +### Prerequisites + +- Python 3.7+ +- OpenCV +- NumPy +- PyYAML +- PIL (Pillow) + +### Setup + +1. **Clone the repository**: +```bash +git clone +cd IDcardsGenerator +``` + +2. **Install dependencies**: +```bash +pip install opencv-python numpy pyyaml pillow +``` + +3. **Activate conda environment** (if using GPU): +```bash +conda activate gpu +``` + +## 📁 Project Structure + +``` +IDcardsGenerator/ +├── config/ +│ └── config.yaml # Main configuration file +├── data/ +│ └── IDcards/ +│ └── processed/ # Input images directory +├── src/ +│ ├── data_augmentation.py # Core augmentation logic +│ ├── config_manager.py # Configuration management +│ ├── image_processor.py # Image processing utilities +│ └── utils.py # Utility functions +├── logs/ # Log files +├── out/ # Output directory +└── main.py # Main script +``` + +## ⚙️ Configuration + +### Main Configuration (`config/config.yaml`) + +```yaml +# Data augmentation parameters +augmentation: + # Rotation + rotation: + enabled: true + angles: [30, 60, 120, 150, 180, 210, 240, 300, 330] + probability: 1.0 + + # Random cropping + random_cropping: + enabled: true + ratio_range: [0.7, 1.0] + probability: 1.0 + + # Random noise + random_noise: + enabled: true + mean_range: [0.0, 0.7] + variance_range: [0.0, 0.1] + probability: 1.0 + + # Partial blockage + partial_blockage: + enabled: true + num_occlusions_range: [1, 100] + coverage_range: [0.0, 0.25] + variance_range: [0.0, 0.1] + probability: 1.0 + + # Grayscale transformation + grayscale: + enabled: true + probability: 1.0 + + # Blurring + blurring: + enabled: true + kernel_ratio_range: [0.0, 0.0084] + probability: 1.0 + + # Brightness and contrast + brightness_contrast: + enabled: true + alpha_range: [0.4, 3.0] + beta_range: [1, 100] + probability: 1.0 + +# Processing configuration +processing: + target_size: [640, 640] + num_augmentations: 3 + save_format: "jpg" + quality: 95 +``` + +## 🎮 Usage + +### Basic Usage + +```bash +python main.py --input-dir data/IDcards/processed --output-dir out +``` + +### Command Line Options + +```bash +python main.py [OPTIONS] + +Options: + --config CONFIG Path to configuration file (default: config/config.yaml) + --input-dir INPUT_DIR Input directory containing images + --output-dir OUTPUT_DIR Output directory for augmented images + --num-augmentations N Number of augmented versions per image (default: 3) + --target-size SIZE Target size for images (width x height) + --preview Preview augmentation on first image only + --info Show information about images in input directory + --list-presets List available presets and exit + --log-level LEVEL Logging level (DEBUG, INFO, WARNING, ERROR) +``` + +### Examples + +1. **Preview augmentation**: +```bash +python main.py --preview --input-dir data/IDcards/processed --output-dir test_output +``` + +2. **Show image information**: +```bash +python main.py --info --input-dir data/IDcards/processed +``` + +3. **Custom number of augmentations**: +```bash +python main.py --input-dir data/IDcards/processed --output-dir out --num-augmentations 5 +``` + +4. **Custom target size**: +```bash +python main.py --input-dir data/IDcards/processed --output-dir out --target-size 512x512 +``` + +## 📊 Output + +### File Naming Convention + +The tool creates separate files for each augmentation method: + +``` +im1_rotation_01.png # Rotation method +im1_cropping_01.png # Random cropping method +im1_noise_01.png # Random noise method +im1_blockage_01.png # Partial blockage method +im1_grayscale_01.png # Grayscale method +im1_blurring_01.png # Blurring method +im1_brightness_contrast_01.png # Brightness/contrast method +``` + +### Output Summary + +After processing, you'll see a summary like: + +``` +================================================== +AUGMENTATION SUMMARY +================================================== +Original images: 106 +Augmented images: 2226 +Augmentation ratio: 21.00 +Successful augmentations: 106 +Output directory: out +================================================== +``` + +## 🔧 Augmentation Techniques Details + +### 1. Rotation +- **Purpose**: Simulates cards at different angles +- **Angles**: 30°, 60°, 120°, 150°, 180°, 210°, 240°, 300°, 330° +- **Method**: OpenCV rotation with white background preservation + +### 2. Random Cropping +- **Purpose**: Simulates partially visible ID cards +- **Ratio Range**: 0.7 to 1.0 (70% to 100% of original size) +- **Method**: Random crop with white background preservation + +### 3. Random Noise +- **Purpose**: Simulates worn-out cards +- **Mean Range**: 0.0 to 0.7 +- **Variance Range**: 0.0 to 0.1 +- **Method**: Gaussian noise addition + +### 4. Horizontal Blockage +- **Purpose**: Simulates occluded card details +- **Lines**: 1 to 100 horizontal lines +- **Coverage**: 0% to 25% of image area +- **Colors**: Multiple colors to simulate various objects + +### 5. Grayscale Transformation +- **Purpose**: Simulates Xerox/scan copies +- **Method**: OpenCV `cv2.cvtColor()` function +- **Output**: 3-channel grayscale image + +### 6. Blurring +- **Purpose**: Simulates blurred but readable cards +- **Kernel Ratio**: 0.0 to 0.0084 +- **Method**: OpenCV `cv2.filter2D()` with Gaussian kernel + +### 7. Brightness & Contrast +- **Purpose**: Simulates different lighting conditions +- **Alpha Range**: 0.4 to 3.0 (contrast) +- **Beta Range**: 1 to 100 (brightness) +- **Method**: OpenCV `cv2.convertScaleAbs()` + +## 🛠️ Development + +### Adding New Augmentation Methods + +1. Add the method to `src/data_augmentation.py` +2. Update configuration in `config/config.yaml` +3. Update default config in `src/config_manager.py` +4. Test with preview mode + +### Code Structure + +- **`main.py`**: Entry point and command-line interface +- **`src/data_augmentation.py`**: Core augmentation logic +- **`src/config_manager.py`**: Configuration management +- **`src/image_processor.py`**: Image processing utilities +- **`src/utils.py`**: Utility functions + +## 📝 Logging + +The tool provides comprehensive logging: + +- **File logging**: `logs/data_augmentation.log` +- **Console logging**: Real-time progress updates +- **Log levels**: DEBUG, INFO, WARNING, ERROR + +## 🤝 Contributing + +1. Fork the repository +2. Create a feature branch +3. Make your changes +4. Test thoroughly +5. Submit a pull request + +## 📄 License + +This project is licensed under the MIT License - see the LICENSE file for details. + +## 🙏 Acknowledgments + +- OpenCV for image processing capabilities +- NumPy for numerical operations +- PyYAML for configuration management + +## 📞 Support + +For issues and questions: +1. Check the logs in `logs/data_augmentation.log` +2. Review the configuration in `config/config.yaml` +3. Test with preview mode first +4. Create an issue with detailed information + +--- + +**Note**: This tool is specifically designed for ID card augmentation and may need adjustments for other image types. \ No newline at end of file diff --git a/config/config.yaml b/config/config.yaml index cf5299c..85a79ae 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -7,13 +7,52 @@ paths: output_dir: "out" log_file: "logs/data_augmentation.log" -# Data augmentation parameters - ONLY ROTATION +# Data augmentation parameters - ROTATION and RANDOM CROPPING augmentation: - # Geometric transformations - ONLY ROTATION + # Geometric transformations rotation: enabled: true angles: [30, 60, 120, 150, 180, 210, 240, 300, 330] # Specific rotation angles probability: 1.0 # Always apply rotation + + # Random cropping to simulate partially visible ID cards + random_cropping: + enabled: true + ratio_range: [0.7, 1.0] # Crop ratio range (min, max) + probability: 1.0 # Always apply cropping + + # Random noise to simulate worn-out ID cards + random_noise: + enabled: true + mean_range: [0.0, 0.7] # Noise mean range (min, max) + variance_range: [0.0, 0.1] # Noise variance range (min, max) + probability: 1.0 # Always apply noise + + # Partial blockage to simulate occluded card details + partial_blockage: + enabled: true + num_occlusions_range: [1, 100] # Number of occlusion lines (min, max) + coverage_range: [0.0, 0.25] # Coverage ratio (min, max) + variance_range: [0.0, 0.1] # Line thickness variance (min, max) + probability: 1.0 # Always apply blockage + + # Grayscale transformation to mimic Xerox/scan copies + grayscale: + enabled: true + probability: 1.0 # Always apply grayscale + + # Blurring to simulate blurred card images that are still readable + blurring: + enabled: true + kernel_ratio_range: [0.0, 0.0084] # Kernel ratio range (min, max) + probability: 1.0 # Always apply blurring + + # Brightness and contrast adjustment to mimic different environmental lighting conditions + brightness_contrast: + enabled: true + alpha_range: [0.4, 3.0] # Contrast range (min, max) + beta_range: [1, 100] # Brightness range (min, max) + probability: 1.0 # Always apply brightness/contrast adjustment # Processing configuration processing: diff --git a/src/config_manager.py b/src/config_manager.py index 62d612f..eba2d7a 100644 --- a/src/config_manager.py +++ b/src/config_manager.py @@ -44,7 +44,13 @@ class ConfigManager: "log_file": "logs/data_augmentation.log" }, "augmentation": { - "rotation": {"enabled": True, "angles": [30, 60, 120, 150, 180, 210, 240, 300, 330], "probability": 1.0} + "rotation": {"enabled": True, "angles": [30, 60, 120, 150, 180, 210, 240, 300, 330], "probability": 1.0}, + "random_cropping": {"enabled": True, "ratio_range": [0.7, 1.0], "probability": 1.0}, + "random_noise": {"enabled": True, "mean_range": [0.0, 0.7], "variance_range": [0.0, 0.1], "probability": 1.0}, + "partial_blockage": {"enabled": True, "num_occlusions_range": [1, 100], "coverage_range": [0.0, 0.25], "variance_range": [0.0, 0.1], "probability": 1.0}, + "grayscale": {"enabled": True, "probability": 1.0}, + "blurring": {"enabled": True, "kernel_ratio_range": [0.0, 0.0084], "probability": 1.0}, + "brightness_contrast": {"enabled": True, "alpha_range": [0.4, 3.0], "beta_range": [1, 100], "probability": 1.0} }, "processing": { "target_size": [224, 224], diff --git a/src/data_augmentation.py b/src/data_augmentation.py index fbc9b63..de2627a 100644 --- a/src/data_augmentation.py +++ b/src/data_augmentation.py @@ -1,5 +1,5 @@ """ -Data augmentation class for image augmentation - ONLY ROTATION with quality preservation +Data augmentation class for image augmentation - ROTATION and RANDOM CROPPING with quality preservation """ import cv2 import numpy as np @@ -11,7 +11,7 @@ from image_processor import ImageProcessor from utils import load_image, save_image, create_augmented_filename, print_progress class DataAugmentation: - """Class for image data augmentation - ONLY ROTATION""" + """Class for image data augmentation - ROTATION and RANDOM CROPPING""" def __init__(self, config: Dict[str, Any] = None): """ @@ -23,6 +23,223 @@ class DataAugmentation: self.config = config or {} self.image_processor = ImageProcessor() + def random_crop_preserve_quality(self, image: np.ndarray, crop_ratio_range: Tuple[float, float] = (0.7, 1.0)) -> np.ndarray: + """ + Apply random cropping to simulate partially visible ID cards with white background preservation + + Args: + image: Input image + crop_ratio_range: Range for crop ratio (min_ratio, max_ratio) + + Returns: + Cropped image with white background + """ + height, width = image.shape[:2] + + # Random crop ratio between 0.7 and 1.0 + crop_ratio = random.uniform(crop_ratio_range[0], crop_ratio_range[1]) + + # Calculate new dimensions + new_height = int(height * crop_ratio) + new_width = int(width * crop_ratio) + + # Ensure minimum size + new_height = max(new_height, int(height * 0.7)) + new_width = max(new_width, int(width * 0.7)) + + # Random starting position for cropping + max_start_y = height - new_height + max_start_x = width - new_width + + # Ensure we don't go out of bounds + if max_start_y < 0 or max_start_x < 0: + # If crop is larger than image, return original + return image + + start_y = random.randint(0, max_start_y) + start_x = random.randint(0, max_start_x) + + # Crop the image + cropped = image[start_y:start_y + new_height, start_x:start_x + new_width] + + # Create white background canvas with original size + canvas = np.ones((height, width, 3), dtype=np.uint8) * 255 + + # Calculate offset to center the cropped image + offset_y = (height - new_height) // 2 + offset_x = (width - new_width) // 2 + + # Place cropped image in center of white canvas + canvas[offset_y:offset_y + new_height, offset_x:offset_x + new_width] = cropped + + return canvas + + def add_random_noise_preserve_quality(self, image: np.ndarray, mean_range: Tuple[float, float] = (0.0, 0.7), variance_range: Tuple[float, float] = (0.0, 0.1)) -> np.ndarray: + """ + Add random noise to simulate worn-out ID cards with quality preservation + + Args: + image: Input image + mean_range: Range for noise mean (min_mean, max_mean) + variance_range: Range for noise variance (min_variance, max_variance) + + Returns: + Image with added noise + """ + # Convert image to float for noise addition + image_float = image.astype(np.float32) / 255.0 + + # Generate random noise parameters + mean = random.uniform(mean_range[0], mean_range[1]) + variance = random.uniform(variance_range[0], variance_range[1]) + + # Generate Gaussian noise + noise = np.random.normal(mean, np.sqrt(variance), image_float.shape) + + # Add noise to image + noisy_image = image_float + noise + + # Clip values to [0, 1] range + noisy_image = np.clip(noisy_image, 0.0, 1.0) + + # Convert back to uint8 + noisy_image = (noisy_image * 255).astype(np.uint8) + + return noisy_image + + def add_partial_blockage_preserve_quality(self, image: np.ndarray, num_occlusions_range: Tuple[int, int] = (1, 100), coverage_range: Tuple[float, float] = (0.0, 0.25), variance_range: Tuple[float, float] = (0.0, 0.1)) -> np.ndarray: + """ + Add partial blockage with horizontal lines of different colors to simulate occluded card details + + Args: + image: Input image + num_occlusions_range: Range for number of occlusions (min, max) + coverage_range: Range for coverage ratio (min, max) + variance_range: Range for line thickness variance (min, max) + + Returns: + Image with partial blockage + """ + height, width = image.shape[:2] + result = image.copy() + + # Generate random parameters + num_occlusions = random.randint(num_occlusions_range[0], num_occlusions_range[1]) + coverage = random.uniform(coverage_range[0], coverage_range[1]) + variance = random.uniform(variance_range[0], variance_range[1]) + + # Calculate total area to cover + total_area = height * width + target_coverage_area = int(total_area * coverage) + + # Colors for occlusion lines (different colors to simulate various objects) + occlusion_colors = [ + (255, 255, 255), # White + (0, 0, 0), # Black + (128, 128, 128), # Gray + (255, 0, 0), # Red + (0, 255, 0), # Green + (0, 0, 255), # Blue + (255, 255, 0), # Yellow + (255, 0, 255), # Magenta + (0, 255, 255), # Cyan + ] + + current_coverage = 0 + + for _ in range(num_occlusions): + if current_coverage >= target_coverage_area: + break + + # Random line parameters + color = random.choice(occlusion_colors) + thickness = max(1, int(random.gauss(3, variance * 10))) + + # Random horizontal line coordinates (y1 = y2 for horizontal lines) + y = random.randint(0, height) + x1 = random.randint(0, width) + x2 = random.randint(x1, width) # Ensure x2 >= x1 for proper line + + # Draw horizontal line + cv2.line(result, (x1, y), (x2, y), color, thickness) + + # Calculate coverage of this line (approximate) + line_length = x2 - x1 + line_coverage = line_length * thickness + current_coverage += line_coverage + + return result + + def convert_to_grayscale_preserve_quality(self, image: np.ndarray) -> np.ndarray: + """ + Convert image to grayscale to mimic Xerox/scan copies with quality preservation + + Args: + image: Input image + + Returns: + Grayscale image + """ + # Convert to grayscale using OpenCV + grayscale = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY) + + # Convert back to 3-channel image to maintain compatibility + grayscale_3channel = cv2.cvtColor(grayscale, cv2.COLOR_GRAY2RGB) + + return grayscale_3channel + + def apply_blurring_preserve_quality(self, image: np.ndarray, kernel_ratio_range: Tuple[float, float] = (0.0, 0.0084)) -> np.ndarray: + """ + Apply blurring to simulate blurred card images that are still readable using OpenCV filter2D + + Args: + image: Input image + kernel_ratio_range: Range for kernel ratio (min, max) + + Returns: + Blurred image + """ + # Generate random kernel ratio + kernel_ratio = random.uniform(kernel_ratio_range[0], kernel_ratio_range[1]) + + # Calculate kernel size based on image dimensions + height, width = image.shape[:2] + kernel_size = max(3, int(min(height, width) * kernel_ratio)) + + # Ensure kernel size is odd + if kernel_size % 2 == 0: + kernel_size += 1 + + # Create Gaussian kernel for blurring + kernel = cv2.getGaussianKernel(kernel_size, kernel_size / 3.0) + kernel_2d = kernel * kernel.T + + # Apply filter2D for blurring + blurred = cv2.filter2D(image, -1, kernel_2d) + + return blurred + + def adjust_brightness_contrast_preserve_quality(self, image: np.ndarray, alpha_range: Tuple[float, float] = (0.4, 3.0), beta_range: Tuple[int, int] = (1, 100)) -> np.ndarray: + """ + Adjust brightness and contrast to mimic different environmental lighting conditions using OpenCV convertScaleAbs + + Args: + image: Input image + alpha_range: Range for contrast (alpha) (min, max) + beta_range: Range for brightness (beta) (min, max) + + Returns: + Image with adjusted brightness and contrast + """ + # Generate random alpha and beta values + alpha = random.uniform(alpha_range[0], alpha_range[1]) # Contrast + beta = random.randint(beta_range[0], beta_range[1]) # Brightness + + # Apply brightness and contrast adjustment using convertScaleAbs + adjusted = cv2.convertScaleAbs(image, alpha=alpha, beta=beta) + + return adjusted + def rotate_image_preserve_quality(self, image: np.ndarray, angle: float) -> np.ndarray: """ Rotate image by given angle with white background and crop to preserve quality @@ -150,37 +367,132 @@ class DataAugmentation: def augment_single_image(self, image: np.ndarray, num_augmentations: int = None) -> List[np.ndarray]: """ - Apply rotation augmentation to a single image with quality preservation + Apply each augmentation method separately to create independent augmented versions Args: image: Input image - num_augmentations: Number of augmented versions to create + num_augmentations: Number of augmented versions to create per method Returns: - List of augmented images + List of augmented images (each method creates separate versions) """ num_augmentations = num_augmentations or 3 # Default value augmented_images = [] - # Get rotation configuration + # Get configuration rotation_config = self.config.get("rotation", {}) - angles = rotation_config.get("angles", [30, 60, 120, 150, 180, 210, 240, 300, 330]) + cropping_config = self.config.get("random_cropping", {}) + noise_config = self.config.get("random_noise", {}) + blockage_config = self.config.get("partial_blockage", {}) + grayscale_config = self.config.get("grayscale", {}) + blurring_config = self.config.get("blurring", {}) + brightness_contrast_config = self.config.get("brightness_contrast", {}) - for i in range(num_augmentations): - # Start with original image - augmented = image.copy() - - # Apply rotation with quality preservation - if rotation_config.get("enabled", False): + # Configuration parameters + angles = rotation_config.get("angles", [30, 60, 120, 150, 180, 210, 240, 300, 330]) + crop_ratio_range = cropping_config.get("ratio_range", (0.7, 1.0)) + mean_range = noise_config.get("mean_range", (0.0, 0.7)) + variance_range = noise_config.get("variance_range", (0.0, 0.1)) + num_occlusions_range = blockage_config.get("num_occlusions_range", (1, 100)) + coverage_range = blockage_config.get("coverage_range", (0.0, 0.25)) + blockage_variance_range = blockage_config.get("variance_range", (0.0, 0.1)) + kernel_ratio_range = blurring_config.get("kernel_ratio_range", (0.0, 0.0084)) + alpha_range = brightness_contrast_config.get("alpha_range", (0.4, 3.0)) + beta_range = brightness_contrast_config.get("beta_range", (1, 100)) + + # Apply each method separately to create independent versions + + # 1. Rotation only + if rotation_config.get("enabled", False): + for i in range(num_augmentations): + augmented = image.copy() angle = random.choice(angles) augmented = self.rotate_image_preserve_quality(augmented, angle) - - # Resize preserving aspect ratio - target_size = self.image_processor.target_size - if target_size: - augmented = self.resize_preserve_aspect(augmented, target_size) - - augmented_images.append(augmented) + + # Resize preserving aspect ratio + target_size = self.image_processor.target_size + if target_size: + augmented = self.resize_preserve_aspect(augmented, target_size) + + augmented_images.append(augmented) + + # 2. Random cropping only + if cropping_config.get("enabled", False): + for i in range(num_augmentations): + augmented = image.copy() + augmented = self.random_crop_preserve_quality(augmented, crop_ratio_range) + + # Resize preserving aspect ratio + target_size = self.image_processor.target_size + if target_size: + augmented = self.resize_preserve_aspect(augmented, target_size) + + augmented_images.append(augmented) + + # 3. Random noise only + if noise_config.get("enabled", False): + for i in range(num_augmentations): + augmented = image.copy() + augmented = self.add_random_noise_preserve_quality(augmented, mean_range, variance_range) + + # Resize preserving aspect ratio + target_size = self.image_processor.target_size + if target_size: + augmented = self.resize_preserve_aspect(augmented, target_size) + + augmented_images.append(augmented) + + # 4. Partial blockage only + if blockage_config.get("enabled", False): + for i in range(num_augmentations): + augmented = image.copy() + augmented = self.add_partial_blockage_preserve_quality(augmented, num_occlusions_range, coverage_range, blockage_variance_range) + + # Resize preserving aspect ratio + target_size = self.image_processor.target_size + if target_size: + augmented = self.resize_preserve_aspect(augmented, target_size) + + augmented_images.append(augmented) + + # 5. Grayscale only + if grayscale_config.get("enabled", False): + for i in range(num_augmentations): + augmented = image.copy() + augmented = self.convert_to_grayscale_preserve_quality(augmented) + + # Resize preserving aspect ratio + target_size = self.image_processor.target_size + if target_size: + augmented = self.resize_preserve_aspect(augmented, target_size) + + augmented_images.append(augmented) + + # 6. Blurring only + if blurring_config.get("enabled", False): + for i in range(num_augmentations): + augmented = image.copy() + augmented = self.apply_blurring_preserve_quality(augmented, kernel_ratio_range) + + # Resize preserving aspect ratio + target_size = self.image_processor.target_size + if target_size: + augmented = self.resize_preserve_aspect(augmented, target_size) + + augmented_images.append(augmented) + + # 7. Brightness and contrast only + if brightness_contrast_config.get("enabled", False): + for i in range(num_augmentations): + augmented = image.copy() + augmented = self.adjust_brightness_contrast_preserve_quality(augmented, alpha_range, beta_range) + + # Resize preserving aspect ratio + target_size = self.image_processor.target_size + if target_size: + augmented = self.resize_preserve_aspect(augmented, target_size) + + augmented_images.append(augmented) return augmented_images @@ -191,7 +503,7 @@ class DataAugmentation: Args: image_path: Path to input image output_dir: Output directory for augmented images - num_augmentations: Number of augmented versions to create + num_augmentations: Number of augmented versions to create per method Returns: List of paths to saved augmented images @@ -204,16 +516,24 @@ class DataAugmentation: # Apply augmentations augmented_images = self.augment_single_image(image, num_augmentations) - # Save augmented images + # Save augmented images with method names saved_paths = [] + method_names = ["rotation", "cropping", "noise", "blockage", "grayscale", "blurring", "brightness_contrast"] + method_index = 0 + for i, aug_image in enumerate(augmented_images): - # Create output filename - output_filename = create_augmented_filename(image_path, i + 1) + # Determine method name based on index + method_name = method_names[method_index // num_augmentations] if method_index // num_augmentations < len(method_names) else "aug" + + # Create output filename with method name + output_filename = create_augmented_filename(image_path, (i % num_augmentations) + 1, method_name) output_path = output_dir / output_filename.name # Save image if save_image(aug_image, output_path): saved_paths.append(output_path) + + method_index += 1 return saved_paths diff --git a/src/utils.py b/src/utils.py index 816baa8..c891d8d 100644 --- a/src/utils.py +++ b/src/utils.py @@ -70,10 +70,10 @@ def save_image(image: np.ndarray, output_path: Path, quality: int = 95) -> bool: print(f"Error saving image {output_path}: {e}") return False -def create_augmented_filename(original_path: Path, index: int, suffix: str = "aug") -> Path: - """Create filename for augmented image""" +def create_augmented_filename(original_path: Path, index: int, method: str = "aug") -> Path: + """Create filename for augmented image with method name""" stem = original_path.stem - suffix = f"_{suffix}_{index:02d}" + suffix = f"_{method}_{index:02d}" return original_path.parent / f"{stem}{suffix}{original_path.suffix}"