done 7 transformations

This commit is contained in:
Nguyễn Phước Thành
2025-08-05 21:42:23 +07:00
parent 3fd270c8bd
commit 96a1de908f
6 changed files with 706 additions and 30 deletions

View File

@@ -44,7 +44,13 @@ class ConfigManager:
"log_file": "logs/data_augmentation.log"
},
"augmentation": {
"rotation": {"enabled": True, "angles": [30, 60, 120, 150, 180, 210, 240, 300, 330], "probability": 1.0}
"rotation": {"enabled": True, "angles": [30, 60, 120, 150, 180, 210, 240, 300, 330], "probability": 1.0},
"random_cropping": {"enabled": True, "ratio_range": [0.7, 1.0], "probability": 1.0},
"random_noise": {"enabled": True, "mean_range": [0.0, 0.7], "variance_range": [0.0, 0.1], "probability": 1.0},
"partial_blockage": {"enabled": True, "num_occlusions_range": [1, 100], "coverage_range": [0.0, 0.25], "variance_range": [0.0, 0.1], "probability": 1.0},
"grayscale": {"enabled": True, "probability": 1.0},
"blurring": {"enabled": True, "kernel_ratio_range": [0.0, 0.0084], "probability": 1.0},
"brightness_contrast": {"enabled": True, "alpha_range": [0.4, 3.0], "beta_range": [1, 100], "probability": 1.0}
},
"processing": {
"target_size": [224, 224],

View File

@@ -1,5 +1,5 @@
"""
Data augmentation class for image augmentation - ONLY ROTATION with quality preservation
Data augmentation class for image augmentation - ROTATION and RANDOM CROPPING with quality preservation
"""
import cv2
import numpy as np
@@ -11,7 +11,7 @@ from image_processor import ImageProcessor
from utils import load_image, save_image, create_augmented_filename, print_progress
class DataAugmentation:
"""Class for image data augmentation - ONLY ROTATION"""
"""Class for image data augmentation - ROTATION and RANDOM CROPPING"""
def __init__(self, config: Dict[str, Any] = None):
"""
@@ -23,6 +23,223 @@ class DataAugmentation:
self.config = config or {}
self.image_processor = ImageProcessor()
def random_crop_preserve_quality(self, image: np.ndarray, crop_ratio_range: Tuple[float, float] = (0.7, 1.0)) -> np.ndarray:
"""
Apply random cropping to simulate partially visible ID cards with white background preservation
Args:
image: Input image
crop_ratio_range: Range for crop ratio (min_ratio, max_ratio)
Returns:
Cropped image with white background
"""
height, width = image.shape[:2]
# Random crop ratio between 0.7 and 1.0
crop_ratio = random.uniform(crop_ratio_range[0], crop_ratio_range[1])
# Calculate new dimensions
new_height = int(height * crop_ratio)
new_width = int(width * crop_ratio)
# Ensure minimum size
new_height = max(new_height, int(height * 0.7))
new_width = max(new_width, int(width * 0.7))
# Random starting position for cropping
max_start_y = height - new_height
max_start_x = width - new_width
# Ensure we don't go out of bounds
if max_start_y < 0 or max_start_x < 0:
# If crop is larger than image, return original
return image
start_y = random.randint(0, max_start_y)
start_x = random.randint(0, max_start_x)
# Crop the image
cropped = image[start_y:start_y + new_height, start_x:start_x + new_width]
# Create white background canvas with original size
canvas = np.ones((height, width, 3), dtype=np.uint8) * 255
# Calculate offset to center the cropped image
offset_y = (height - new_height) // 2
offset_x = (width - new_width) // 2
# Place cropped image in center of white canvas
canvas[offset_y:offset_y + new_height, offset_x:offset_x + new_width] = cropped
return canvas
def add_random_noise_preserve_quality(self, image: np.ndarray, mean_range: Tuple[float, float] = (0.0, 0.7), variance_range: Tuple[float, float] = (0.0, 0.1)) -> np.ndarray:
"""
Add random noise to simulate worn-out ID cards with quality preservation
Args:
image: Input image
mean_range: Range for noise mean (min_mean, max_mean)
variance_range: Range for noise variance (min_variance, max_variance)
Returns:
Image with added noise
"""
# Convert image to float for noise addition
image_float = image.astype(np.float32) / 255.0
# Generate random noise parameters
mean = random.uniform(mean_range[0], mean_range[1])
variance = random.uniform(variance_range[0], variance_range[1])
# Generate Gaussian noise
noise = np.random.normal(mean, np.sqrt(variance), image_float.shape)
# Add noise to image
noisy_image = image_float + noise
# Clip values to [0, 1] range
noisy_image = np.clip(noisy_image, 0.0, 1.0)
# Convert back to uint8
noisy_image = (noisy_image * 255).astype(np.uint8)
return noisy_image
def add_partial_blockage_preserve_quality(self, image: np.ndarray, num_occlusions_range: Tuple[int, int] = (1, 100), coverage_range: Tuple[float, float] = (0.0, 0.25), variance_range: Tuple[float, float] = (0.0, 0.1)) -> np.ndarray:
"""
Add partial blockage with horizontal lines of different colors to simulate occluded card details
Args:
image: Input image
num_occlusions_range: Range for number of occlusions (min, max)
coverage_range: Range for coverage ratio (min, max)
variance_range: Range for line thickness variance (min, max)
Returns:
Image with partial blockage
"""
height, width = image.shape[:2]
result = image.copy()
# Generate random parameters
num_occlusions = random.randint(num_occlusions_range[0], num_occlusions_range[1])
coverage = random.uniform(coverage_range[0], coverage_range[1])
variance = random.uniform(variance_range[0], variance_range[1])
# Calculate total area to cover
total_area = height * width
target_coverage_area = int(total_area * coverage)
# Colors for occlusion lines (different colors to simulate various objects)
occlusion_colors = [
(255, 255, 255), # White
(0, 0, 0), # Black
(128, 128, 128), # Gray
(255, 0, 0), # Red
(0, 255, 0), # Green
(0, 0, 255), # Blue
(255, 255, 0), # Yellow
(255, 0, 255), # Magenta
(0, 255, 255), # Cyan
]
current_coverage = 0
for _ in range(num_occlusions):
if current_coverage >= target_coverage_area:
break
# Random line parameters
color = random.choice(occlusion_colors)
thickness = max(1, int(random.gauss(3, variance * 10)))
# Random horizontal line coordinates (y1 = y2 for horizontal lines)
y = random.randint(0, height)
x1 = random.randint(0, width)
x2 = random.randint(x1, width) # Ensure x2 >= x1 for proper line
# Draw horizontal line
cv2.line(result, (x1, y), (x2, y), color, thickness)
# Calculate coverage of this line (approximate)
line_length = x2 - x1
line_coverage = line_length * thickness
current_coverage += line_coverage
return result
def convert_to_grayscale_preserve_quality(self, image: np.ndarray) -> np.ndarray:
"""
Convert image to grayscale to mimic Xerox/scan copies with quality preservation
Args:
image: Input image
Returns:
Grayscale image
"""
# Convert to grayscale using OpenCV
grayscale = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
# Convert back to 3-channel image to maintain compatibility
grayscale_3channel = cv2.cvtColor(grayscale, cv2.COLOR_GRAY2RGB)
return grayscale_3channel
def apply_blurring_preserve_quality(self, image: np.ndarray, kernel_ratio_range: Tuple[float, float] = (0.0, 0.0084)) -> np.ndarray:
"""
Apply blurring to simulate blurred card images that are still readable using OpenCV filter2D
Args:
image: Input image
kernel_ratio_range: Range for kernel ratio (min, max)
Returns:
Blurred image
"""
# Generate random kernel ratio
kernel_ratio = random.uniform(kernel_ratio_range[0], kernel_ratio_range[1])
# Calculate kernel size based on image dimensions
height, width = image.shape[:2]
kernel_size = max(3, int(min(height, width) * kernel_ratio))
# Ensure kernel size is odd
if kernel_size % 2 == 0:
kernel_size += 1
# Create Gaussian kernel for blurring
kernel = cv2.getGaussianKernel(kernel_size, kernel_size / 3.0)
kernel_2d = kernel * kernel.T
# Apply filter2D for blurring
blurred = cv2.filter2D(image, -1, kernel_2d)
return blurred
def adjust_brightness_contrast_preserve_quality(self, image: np.ndarray, alpha_range: Tuple[float, float] = (0.4, 3.0), beta_range: Tuple[int, int] = (1, 100)) -> np.ndarray:
"""
Adjust brightness and contrast to mimic different environmental lighting conditions using OpenCV convertScaleAbs
Args:
image: Input image
alpha_range: Range for contrast (alpha) (min, max)
beta_range: Range for brightness (beta) (min, max)
Returns:
Image with adjusted brightness and contrast
"""
# Generate random alpha and beta values
alpha = random.uniform(alpha_range[0], alpha_range[1]) # Contrast
beta = random.randint(beta_range[0], beta_range[1]) # Brightness
# Apply brightness and contrast adjustment using convertScaleAbs
adjusted = cv2.convertScaleAbs(image, alpha=alpha, beta=beta)
return adjusted
def rotate_image_preserve_quality(self, image: np.ndarray, angle: float) -> np.ndarray:
"""
Rotate image by given angle with white background and crop to preserve quality
@@ -150,37 +367,132 @@ class DataAugmentation:
def augment_single_image(self, image: np.ndarray, num_augmentations: int = None) -> List[np.ndarray]:
"""
Apply rotation augmentation to a single image with quality preservation
Apply each augmentation method separately to create independent augmented versions
Args:
image: Input image
num_augmentations: Number of augmented versions to create
num_augmentations: Number of augmented versions to create per method
Returns:
List of augmented images
List of augmented images (each method creates separate versions)
"""
num_augmentations = num_augmentations or 3 # Default value
augmented_images = []
# Get rotation configuration
# Get configuration
rotation_config = self.config.get("rotation", {})
angles = rotation_config.get("angles", [30, 60, 120, 150, 180, 210, 240, 300, 330])
cropping_config = self.config.get("random_cropping", {})
noise_config = self.config.get("random_noise", {})
blockage_config = self.config.get("partial_blockage", {})
grayscale_config = self.config.get("grayscale", {})
blurring_config = self.config.get("blurring", {})
brightness_contrast_config = self.config.get("brightness_contrast", {})
for i in range(num_augmentations):
# Start with original image
augmented = image.copy()
# Apply rotation with quality preservation
if rotation_config.get("enabled", False):
# Configuration parameters
angles = rotation_config.get("angles", [30, 60, 120, 150, 180, 210, 240, 300, 330])
crop_ratio_range = cropping_config.get("ratio_range", (0.7, 1.0))
mean_range = noise_config.get("mean_range", (0.0, 0.7))
variance_range = noise_config.get("variance_range", (0.0, 0.1))
num_occlusions_range = blockage_config.get("num_occlusions_range", (1, 100))
coverage_range = blockage_config.get("coverage_range", (0.0, 0.25))
blockage_variance_range = blockage_config.get("variance_range", (0.0, 0.1))
kernel_ratio_range = blurring_config.get("kernel_ratio_range", (0.0, 0.0084))
alpha_range = brightness_contrast_config.get("alpha_range", (0.4, 3.0))
beta_range = brightness_contrast_config.get("beta_range", (1, 100))
# Apply each method separately to create independent versions
# 1. Rotation only
if rotation_config.get("enabled", False):
for i in range(num_augmentations):
augmented = image.copy()
angle = random.choice(angles)
augmented = self.rotate_image_preserve_quality(augmented, angle)
# Resize preserving aspect ratio
target_size = self.image_processor.target_size
if target_size:
augmented = self.resize_preserve_aspect(augmented, target_size)
augmented_images.append(augmented)
# Resize preserving aspect ratio
target_size = self.image_processor.target_size
if target_size:
augmented = self.resize_preserve_aspect(augmented, target_size)
augmented_images.append(augmented)
# 2. Random cropping only
if cropping_config.get("enabled", False):
for i in range(num_augmentations):
augmented = image.copy()
augmented = self.random_crop_preserve_quality(augmented, crop_ratio_range)
# Resize preserving aspect ratio
target_size = self.image_processor.target_size
if target_size:
augmented = self.resize_preserve_aspect(augmented, target_size)
augmented_images.append(augmented)
# 3. Random noise only
if noise_config.get("enabled", False):
for i in range(num_augmentations):
augmented = image.copy()
augmented = self.add_random_noise_preserve_quality(augmented, mean_range, variance_range)
# Resize preserving aspect ratio
target_size = self.image_processor.target_size
if target_size:
augmented = self.resize_preserve_aspect(augmented, target_size)
augmented_images.append(augmented)
# 4. Partial blockage only
if blockage_config.get("enabled", False):
for i in range(num_augmentations):
augmented = image.copy()
augmented = self.add_partial_blockage_preserve_quality(augmented, num_occlusions_range, coverage_range, blockage_variance_range)
# Resize preserving aspect ratio
target_size = self.image_processor.target_size
if target_size:
augmented = self.resize_preserve_aspect(augmented, target_size)
augmented_images.append(augmented)
# 5. Grayscale only
if grayscale_config.get("enabled", False):
for i in range(num_augmentations):
augmented = image.copy()
augmented = self.convert_to_grayscale_preserve_quality(augmented)
# Resize preserving aspect ratio
target_size = self.image_processor.target_size
if target_size:
augmented = self.resize_preserve_aspect(augmented, target_size)
augmented_images.append(augmented)
# 6. Blurring only
if blurring_config.get("enabled", False):
for i in range(num_augmentations):
augmented = image.copy()
augmented = self.apply_blurring_preserve_quality(augmented, kernel_ratio_range)
# Resize preserving aspect ratio
target_size = self.image_processor.target_size
if target_size:
augmented = self.resize_preserve_aspect(augmented, target_size)
augmented_images.append(augmented)
# 7. Brightness and contrast only
if brightness_contrast_config.get("enabled", False):
for i in range(num_augmentations):
augmented = image.copy()
augmented = self.adjust_brightness_contrast_preserve_quality(augmented, alpha_range, beta_range)
# Resize preserving aspect ratio
target_size = self.image_processor.target_size
if target_size:
augmented = self.resize_preserve_aspect(augmented, target_size)
augmented_images.append(augmented)
return augmented_images
@@ -191,7 +503,7 @@ class DataAugmentation:
Args:
image_path: Path to input image
output_dir: Output directory for augmented images
num_augmentations: Number of augmented versions to create
num_augmentations: Number of augmented versions to create per method
Returns:
List of paths to saved augmented images
@@ -204,16 +516,24 @@ class DataAugmentation:
# Apply augmentations
augmented_images = self.augment_single_image(image, num_augmentations)
# Save augmented images
# Save augmented images with method names
saved_paths = []
method_names = ["rotation", "cropping", "noise", "blockage", "grayscale", "blurring", "brightness_contrast"]
method_index = 0
for i, aug_image in enumerate(augmented_images):
# Create output filename
output_filename = create_augmented_filename(image_path, i + 1)
# Determine method name based on index
method_name = method_names[method_index // num_augmentations] if method_index // num_augmentations < len(method_names) else "aug"
# Create output filename with method name
output_filename = create_augmented_filename(image_path, (i % num_augmentations) + 1, method_name)
output_path = output_dir / output_filename.name
# Save image
if save_image(aug_image, output_path):
saved_paths.append(output_path)
method_index += 1
return saved_paths

View File

@@ -70,10 +70,10 @@ def save_image(image: np.ndarray, output_path: Path, quality: int = 95) -> bool:
print(f"Error saving image {output_path}: {e}")
return False
def create_augmented_filename(original_path: Path, index: int, suffix: str = "aug") -> Path:
"""Create filename for augmented image"""
def create_augmented_filename(original_path: Path, index: int, method: str = "aug") -> Path:
"""Create filename for augmented image with method name"""
stem = original_path.stem
suffix = f"_{suffix}_{index:02d}"
suffix = f"_{method}_{index:02d}"
return original_path.parent / f"{stem}{suffix}{original_path.suffix}"