refactor YOLO crop model

This commit is contained in:
Nguyễn Phước Thành
2025-08-05 20:53:47 +07:00
parent a4e7573dca
commit 3fd270c8bd
24 changed files with 136 additions and 479 deletions

View File

@@ -1,9 +1,9 @@
"""
Data Augmentation Package
Data Augmentation Package for ID Cards
"""
__version__ = "1.0.0"
__author__ = "OCR Data Augmentation Tool"
__version__ = "2.0.0"
__author__ = "ID Card Data Augmentation Tool"
from .utils import *
from .image_processor import ImageProcessor
@@ -12,12 +12,12 @@ from .config_manager import ConfigManager
__all__ = [
"ImageProcessor",
"DataAugmentation",
"DataAugmentation",
"ConfigManager",
"setup_logging",
"get_image_files",
"load_image",
"save_image",
"validate_image",
"create_augmented_filename",
"print_progress",
]

Binary file not shown.

View File

@@ -1,40 +0,0 @@
"""
Configuration file for data augmentation
"""
import os
from pathlib import Path
# Paths
BASE_DIR = Path(__file__).parent.parent
DATA_DIR = BASE_DIR / "data"
INPUT_IMAGES_DIR = DATA_DIR / "dataset" / "training_data" / "images"
OUTPUT_DIR = DATA_DIR / "augmented_data"
# Data augmentation parameters
AUGMENTATION_CONFIG = {
"rotation_range": 15, # degrees
"width_shift_range": 0.1, # fraction of total width
"height_shift_range": 0.1, # fraction of total height
"brightness_range": [0.8, 1.2], # brightness factor
"zoom_range": [0.9, 1.1], # zoom factor
"horizontal_flip": True,
"vertical_flip": False,
"fill_mode": "nearest",
"cval": 0,
"rescale": 1./255,
}
# Processing parameters
PROCESSING_CONFIG = {
"target_size": (224, 224), # (width, height)
"batch_size": 32,
"num_augmentations": 3, # number of augmented versions per image
"save_format": "jpg",
"quality": 95,
}
# Supported image formats
SUPPORTED_FORMATS = ['.jpg', '.jpeg', '.png', '.bmp', '.tiff']
# Create output directory if it doesn't exist
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)

View File

@@ -1,5 +1,5 @@
"""
Data augmentation class for image augmentation - ONLY ROTATION
Data augmentation class for image augmentation - ONLY ROTATION with quality preservation
"""
import cv2
import numpy as np
@@ -23,32 +23,134 @@ class DataAugmentation:
self.config = config or {}
self.image_processor = ImageProcessor()
def rotate_image(self, image: np.ndarray, angle: float) -> np.ndarray:
def rotate_image_preserve_quality(self, image: np.ndarray, angle: float) -> np.ndarray:
"""
Rotate image by given angle
Rotate image by given angle with white background and crop to preserve quality
Args:
image: Input image
angle: Rotation angle in degrees
Returns:
Rotated image
Rotated and cropped image
"""
height, width = image.shape[:2]
center = (width // 2, height // 2)
# Calculate new dimensions for rotation
angle_rad = math.radians(angle)
cos_val = abs(math.cos(angle_rad))
sin_val = abs(math.sin(angle_rad))
# Calculate new width and height
new_width = int(width * cos_val + height * sin_val)
new_height = int(height * cos_val + width * sin_val)
# Create larger canvas with white background
canvas = np.ones((new_height, new_width, 3), dtype=np.uint8) * 255
# Calculate offset to center the image
offset_x = (new_width - width) // 2
offset_y = (new_height - height) // 2
# Ensure offsets are valid
if offset_x >= 0 and offset_y >= 0 and offset_x + width <= new_width and offset_y + height <= new_height:
# Place original image in center of canvas
canvas[offset_y:offset_y+height, offset_x:offset_x+width] = image
else:
# If calculation is wrong, use a simpler approach
canvas = np.ones((max(height, width) * 2, max(height, width) * 2, 3), dtype=np.uint8) * 255
center_y, center_x = canvas.shape[0] // 2, canvas.shape[1] // 2
start_y = center_y - height // 2
start_x = center_x - width // 2
canvas[start_y:start_y+height, start_x:start_x+width] = image
new_width, new_height = canvas.shape[1], canvas.shape[0]
# Calculate center for rotation
center = (new_width // 2, new_height // 2)
# Create rotation matrix
rotation_matrix = cv2.getRotationMatrix2D(center, angle, 1.0)
# Perform rotation
rotated = cv2.warpAffine(image, rotation_matrix, (width, height),
borderMode=cv2.BORDER_REPLICATE)
rotated = cv2.warpAffine(canvas, rotation_matrix, (new_width, new_height),
borderMode=cv2.BORDER_CONSTANT, borderValue=(255, 255, 255))
# Crop white borders to get the actual image content
rotated = self._crop_white_borders(rotated)
return rotated
def _crop_white_borders(self, image: np.ndarray) -> np.ndarray:
"""
Crop white borders from image to get the actual content
Args:
image: Input image with white borders
Returns:
Cropped image without white borders
"""
# Convert to grayscale for edge detection
gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
# Find non-white pixels (content)
non_white = gray < 250 # Threshold for white pixels
# Find bounding box of content
coords = cv2.findNonZero(non_white.astype(np.uint8))
if coords is not None:
x, y, w, h = cv2.boundingRect(coords)
return image[y:y+h, x:x+w]
return image
def resize_preserve_aspect(self, image: np.ndarray, target_size: Tuple[int, int]) -> np.ndarray:
"""
Resize image preserving aspect ratio with white padding
Args:
image: Input image
target_size: Target size (width, height)
Returns:
Resized image with preserved aspect ratio
"""
target_width, target_height = target_size
img_height, img_width = image.shape[:2]
# Calculate aspect ratios
target_aspect = target_width / target_height
img_aspect = img_width / img_height
if img_aspect > target_aspect:
# Image is wider than target
new_width = target_width
new_height = int(target_width / img_aspect)
else:
# Image is taller than target
new_height = target_height
new_width = int(target_height * img_aspect)
# Resize image
resized = cv2.resize(image, (new_width, new_height), interpolation=cv2.INTER_AREA)
# Create white background
result = np.ones((target_height, target_width, 3), dtype=np.uint8) * 255
# Calculate offset to center the image
offset_x = (target_width - new_width) // 2
offset_y = (target_height - new_height) // 2
# Place resized image in center
result[offset_y:offset_y+new_height, offset_x:offset_x+new_width] = resized
return result
def augment_single_image(self, image: np.ndarray, num_augmentations: int = None) -> List[np.ndarray]:
"""
Apply rotation augmentation to a single image
Apply rotation augmentation to a single image with quality preservation
Args:
image: Input image
@@ -65,12 +167,18 @@ class DataAugmentation:
angles = rotation_config.get("angles", [30, 60, 120, 150, 180, 210, 240, 300, 330])
for i in range(num_augmentations):
# Start with original image
augmented = image.copy()
# Apply rotation with random angle from the specified list
# Apply rotation with quality preservation
if rotation_config.get("enabled", False):
angle = random.choice(angles)
augmented = self.rotate_image(augmented, angle)
augmented = self.rotate_image_preserve_quality(augmented, angle)
# Resize preserving aspect ratio
target_size = self.image_processor.target_size
if target_size:
augmented = self.resize_preserve_aspect(augmented, target_size)
augmented_images.append(augmented)
@@ -78,7 +186,7 @@ class DataAugmentation:
def augment_image_file(self, image_path: Path, output_dir: Path, num_augmentations: int = None) -> List[Path]:
"""
Augment a single image file and save results
Augment a single image file and save results with quality preservation
Args:
image_path: Path to input image
@@ -88,8 +196,8 @@ class DataAugmentation:
Returns:
List of paths to saved augmented images
"""
# Load image
image = load_image(image_path, self.image_processor.target_size)
# Load image without resizing to preserve original quality
image = load_image(image_path, None) # Load original size
if image is None:
return []

View File

@@ -1,14 +1,14 @@
"""
Image processing class for basic image operations
Image processing class for data augmentation
"""
import cv2
import numpy as np
from pathlib import Path
from typing import Tuple, Optional, List
from utils import load_image, save_image, validate_image, get_image_files
from typing import Tuple, Optional
from utils import load_image
class ImageProcessor:
"""Class for basic image processing operations"""
"""Class for image processing operations used in data augmentation"""
def __init__(self, target_size: Tuple[int, int] = None):
"""
@@ -17,67 +17,7 @@ class ImageProcessor:
Args:
target_size: Target size for image resizing (width, height)
"""
self.target_size = target_size or (224, 224) # Default size
def load_and_preprocess(self, image_path: Path) -> Optional[np.ndarray]:
"""
Load and preprocess image
Args:
image_path: Path to image file
Returns:
Preprocessed image as numpy array or None if failed
"""
if not validate_image(image_path):
print(f"Invalid image file: {image_path}")
return None
image = load_image(image_path, self.target_size)
if image is None:
return None
# Normalize pixel values
image = image.astype(np.float32) / 255.0
return image
def resize_image(self, image: np.ndarray, target_size: Tuple[int, int]) -> np.ndarray:
"""
Resize image to target size
Args:
image: Input image as numpy array
target_size: Target size (width, height)
Returns:
Resized image
"""
return cv2.resize(image, target_size, interpolation=cv2.INTER_AREA)
def normalize_image(self, image: np.ndarray) -> np.ndarray:
"""
Normalize image pixel values to [0, 1]
Args:
image: Input image
Returns:
Normalized image
"""
return image.astype(np.float32) / 255.0
def denormalize_image(self, image: np.ndarray) -> np.ndarray:
"""
Denormalize image pixel values to [0, 255]
Args:
image: Input image (normalized)
Returns:
Denormalized image
"""
return (image * 255).astype(np.uint8)
self.target_size = target_size or (640, 640) # Default size for ID cards
def get_image_info(self, image_path: Path) -> dict:
"""
@@ -107,68 +47,4 @@ class ImageProcessor:
}
except Exception as e:
print(f"Error getting image info for {image_path}: {e}")
return {}
def batch_process_images(self, input_dir: Path, output_dir: Path) -> List[Path]:
"""
Process all images in a directory
Args:
input_dir: Input directory containing images
output_dir: Output directory for processed images
Returns:
List of processed image paths
"""
image_files = get_image_files(input_dir)
processed_files = []
print(f"Found {len(image_files)} images to process")
for i, image_path in enumerate(image_files):
print_progress(i + 1, len(image_files), "Processing images")
# Load and preprocess image
image = self.load_and_preprocess(image_path)
if image is None:
continue
# Create output path
output_path = output_dir / image_path.name
# Denormalize for saving
image = self.denormalize_image(image)
# Save processed image
if save_image(image, output_path):
processed_files.append(output_path)
print(f"\nProcessed {len(processed_files)} images successfully")
return processed_files
def create_thumbnail(self, image: np.ndarray, size: Tuple[int, int] = (100, 100)) -> np.ndarray:
"""
Create thumbnail of image
Args:
image: Input image
size: Thumbnail size (width, height)
Returns:
Thumbnail image
"""
return cv2.resize(image, size, interpolation=cv2.INTER_AREA)
def convert_to_grayscale(self, image: np.ndarray) -> np.ndarray:
"""
Convert image to grayscale
Args:
image: Input image (RGB)
Returns:
Grayscale image
"""
if len(image.shape) == 3:
return cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
return image
return {}

View File

@@ -32,14 +32,7 @@ def get_image_files(directory: Path) -> List[Path]:
image_files.extend(directory.glob(f"*{ext.upper()}"))
return sorted(image_files)
def validate_image(image_path: Path) -> bool:
"""Validate if file is a valid image"""
try:
with Image.open(image_path) as img:
img.verify()
return True
except Exception:
return False
def load_image(image_path: Path, target_size: Tuple[int, int] = None) -> Optional[np.ndarray]:
"""Load and resize image"""
@@ -83,9 +76,7 @@ def create_augmented_filename(original_path: Path, index: int, suffix: str = "au
suffix = f"_{suffix}_{index:02d}"
return original_path.parent / f"{stem}{suffix}{original_path.suffix}"
def get_file_size_mb(file_path: Path) -> float:
"""Get file size in MB"""
return file_path.stat().st_size / (1024 * 1024)
def print_progress(current: int, total: int, prefix: str = "Progress"):
"""Print progress bar"""