update augment + YOLO pipeline

This commit is contained in:
Nguyễn Phước Thành
2025-08-06 20:52:39 +07:00
parent 4ee14f17d3
commit 51d3a66cc4
9 changed files with 989 additions and 407 deletions

View File

@@ -363,8 +363,6 @@ class DataAugmentation:
return result
def augment_single_image(self, image: np.ndarray, num_augmentations: int = None) -> List[np.ndarray]:
"""
Apply each augmentation method separately to create independent augmented versions
@@ -455,20 +453,7 @@ class DataAugmentation:
augmented_images.append(augmented)
# 5. Grayscale only
if grayscale_config.get("enabled", False):
for i in range(num_augmentations):
augmented = image.copy()
augmented = self.convert_to_grayscale_preserve_quality(augmented)
# Resize preserving aspect ratio
target_size = self.image_processor.target_size
if target_size:
augmented = self.resize_preserve_aspect(augmented, target_size)
augmented_images.append(augmented)
# 6. Blurring only
# 5. Blurring only
if blurring_config.get("enabled", False):
for i in range(num_augmentations):
augmented = image.copy()
@@ -481,7 +466,7 @@ class DataAugmentation:
augmented_images.append(augmented)
# 7. Brightness and contrast only
# 6. Brightness/Contrast only
if brightness_contrast_config.get("enabled", False):
for i in range(num_augmentations):
augmented = image.copy()
@@ -494,6 +479,11 @@ class DataAugmentation:
augmented_images.append(augmented)
# 7. Apply grayscale as final step to ALL augmented images
if grayscale_config.get("enabled", False):
for i in range(len(augmented_images)):
augmented_images[i] = self.convert_to_grayscale_preserve_quality(augmented_images[i])
return augmented_images
def augment_image_file(self, image_path: Path, output_dir: Path, num_augmentations: int = None) -> List[Path]:
@@ -518,7 +508,7 @@ class DataAugmentation:
# Save augmented images with method names
saved_paths = []
method_names = ["rotation", "cropping", "noise", "blockage", "grayscale", "blurring", "brightness_contrast"]
method_names = ["rotation", "cropping", "noise", "blockage", "blurring", "brightness_contrast", "grayscale"]
method_index = 0
for i, aug_image in enumerate(augmented_images):

611
src/id_card_detector.py Normal file
View File

@@ -0,0 +1,611 @@
"""
ID Card Detector Module
Sử dụng YOLO để detect và cắt ID cards từ ảnh lớn, kết hợp với data augmentation
Tích hợp với YOLOv8 French ID Card Detection model
"""
import cv2
import numpy as np
from pathlib import Path
from typing import List, Tuple, Optional, Dict, Any, Union
import torch
import torch.nn as nn
from ultralytics import YOLO
import logging
from data_augmentation import DataAugmentation
from utils import load_image, save_image, create_augmented_filename, print_progress
import os
import json
import yaml
class IDCardDetector:
"""Class để detect và cắt ID cards từ ảnh lớn sử dụng YOLO"""
def __init__(self, model_path: str = None, config: Dict[str, Any] = None):
"""
Initialize ID Card Detector
Args:
model_path: Đường dẫn đến model YOLO đã train
config: Configuration dictionary
"""
self.config = config or {}
self.model_path = model_path
self.model = None
self.data_augmentation = DataAugmentation(config)
self.logger = self._setup_logger()
# Default model path nếu không được cung cấp
if not model_path:
default_model_path = "data/weights/id_cards_yolov8n.pt"
if os.path.exists(default_model_path):
model_path = default_model_path
self.model_path = model_path
# Load YOLO model nếu có
if model_path and os.path.exists(model_path):
self.load_model(model_path)
def _setup_logger(self) -> logging.Logger:
"""Setup logger cho module"""
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
if not logger.handlers:
handler = logging.StreamHandler()
formatter = logging.Formatter(
'%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
handler.setFormatter(formatter)
logger.addHandler(handler)
return logger
def load_model(self, model_path: str) -> bool:
"""
Load YOLO model từ file
Args:
model_path: Đường dẫn đến model file
Returns:
True nếu load thành công, False nếu thất bại
"""
try:
self.model = YOLO(model_path)
self.logger.info(f"Loaded YOLO model from: {model_path}")
return True
except Exception as e:
self.logger.error(f"Failed to load model: {e}")
return False
def detect_id_cards(self, image: np.ndarray, confidence: float = 0.5, iou_threshold: float = 0.45) -> List[Dict[str, Any]]:
"""
Detect ID cards trong ảnh sử dụng YOLO
Args:
image: Input image
confidence: Confidence threshold
iou_threshold: IoU threshold cho NMS
Returns:
List các detection results với format:
{
'bbox': [x1, y1, x2, y2],
'confidence': float,
'class_id': int,
'class_name': str
}
"""
if self.model is None:
self.logger.error("Model chưa được load!")
return []
try:
# Run inference
results = self.model(image, conf=confidence, iou=float(iou_threshold), verbose=False)
detections = []
for result in results:
boxes = result.boxes
if boxes is not None:
for box in boxes:
# Get bbox coordinates
x1, y1, x2, y2 = box.xyxy[0].cpu().numpy()
# Get confidence and class
confidence_score = float(box.conf[0].cpu().numpy())
class_id = int(box.cls[0].cpu().numpy())
class_name = self.model.names[class_id] if hasattr(self.model, 'names') else f"class_{class_id}"
detection = {
'bbox': [int(x1), int(y1), int(x2), int(y2)],
'confidence': confidence_score,
'class_id': class_id,
'class_name': class_name
}
detections.append(detection)
self.logger.info(f"Detected {len(detections)} ID cards")
return detections
except Exception as e:
self.logger.error(f"Error during detection: {e}")
return []
def crop_id_card(self, image: np.ndarray, bbox: List[int], padding: int = 10,
crop_mode: str = "bbox", target_size: Tuple[int, int] = None) -> np.ndarray:
"""
Cắt ID card từ ảnh gốc dựa trên bbox với nhiều options
Args:
image: Input image
bbox: Bounding box [x1, y1, x2, y2]
padding: Padding thêm xung quanh bbox
crop_mode: Mode cắt ("bbox", "square", "aspect_ratio")
target_size: Kích thước target (width, height) nếu muốn resize
Returns:
Cropped ID card image
"""
x1, y1, x2, y2 = bbox
# Thêm padding
height, width = image.shape[:2]
x1 = max(0, x1 - padding)
y1 = max(0, y1 - padding)
x2 = min(width, x2 + padding)
y2 = min(height, y2 + padding)
# Cắt ảnh theo mode
if crop_mode == "bbox":
# Cắt theo bbox gốc
cropped = image[y1:y2, x1:x2]
elif crop_mode == "square":
# Cắt thành hình vuông
center_x = (x1 + x2) // 2
center_y = (y1 + y2) // 2
size = max(x2 - x1, y2 - y1)
half_size = size // 2
x1 = max(0, center_x - half_size)
y1 = max(0, center_y - half_size)
x2 = min(width, center_x + half_size)
y2 = min(height, center_y + half_size)
cropped = image[y1:y2, x1:x2]
elif crop_mode == "aspect_ratio":
# Cắt theo tỷ lệ khung hình chuẩn (3:4 cho ID card)
bbox_width = x2 - x1
bbox_height = y2 - y1
center_x = (x1 + x2) // 2
center_y = (y1 + y2) // 2
# Tỷ lệ 3:4 cho ID card
target_ratio = 3 / 4
current_ratio = bbox_width / bbox_height
if current_ratio > target_ratio:
# Bbox quá rộng, giữ chiều cao
new_width = int(bbox_height * target_ratio)
half_width = new_width // 2
x1 = max(0, center_x - half_width)
x2 = min(width, center_x + half_width)
else:
# Bbox quá cao, giữ chiều rộng
new_height = int(bbox_width / target_ratio)
half_height = new_height // 2
y1 = max(0, center_y - half_height)
y2 = min(height, center_y + half_height)
cropped = image[y1:y2, x1:x2]
else:
# Default: cắt theo bbox
cropped = image[y1:y2, x1:x2]
# Resize nếu có target_size
if target_size:
cropped = cv2.resize(cropped, target_size, interpolation=cv2.INTER_AREA)
return cropped
def process_single_image(self, image_path: Union[str, Path], output_dir: Path,
confidence: float = 0.5, iou_threshold: float = 0.45,
crop_mode: str = "bbox", target_size: Tuple[int, int] = None,
padding: int = 10, card_counter: int = 0) -> Dict[str, Any]:
"""
Xử lý một ảnh: detect ID cards, cắt và áp dụng augmentation
Args:
image_path: Đường dẫn đến ảnh input
output_dir: Thư mục output
apply_augmentation: Có áp dụng data augmentation không
save_original: Có lưu ảnh gốc không
confidence: Confidence threshold
iou_threshold: IoU threshold
crop_mode: Mode cắt ("bbox", "square", "aspect_ratio")
target_size: Kích thước target (width, height) hoặc None
padding: Padding thêm xung quanh bbox
Returns:
Dictionary chứa kết quả xử lý
"""
image_path = Path(image_path)
if not image_path.exists():
self.logger.error(f"Image not found: {image_path}")
return {}
# Load ảnh
image = load_image(str(image_path))
if image is None:
self.logger.error(f"Failed to load image: {image_path}")
return {}
# Detect ID cards
detections = self.detect_id_cards(image, confidence, float(iou_threshold))
if not detections:
self.logger.warning(f"No ID cards detected in: {image_path}")
return {
'image_path': str(image_path),
'detections': [],
'processed_cards': []
}
# Tạo thư mục output
output_dir.mkdir(parents=True, exist_ok=True)
processed_cards = []
current_card_counter = card_counter
for i, detection in enumerate(detections):
# Cắt ID card với options mới
cropped_card = self.crop_id_card(
image,
detection['bbox'],
padding=padding,
crop_mode=crop_mode,
target_size=target_size
)
# Tạo tên file unique cho mỗi ID card
current_card_counter += 1
card_filename = f"id_card_{current_card_counter:03d}.jpg"
card_path = output_dir / card_filename
# Lưu ảnh gốc
save_image(cropped_card, card_path)
processed_cards.append({
'original_path': str(card_path),
'detection_info': detection,
'crop_info': {
'mode': crop_mode,
'target_size': target_size,
'padding': padding
}
})
result = {
'image_path': str(image_path),
'detections': detections,
'processed_cards': processed_cards,
'total_cards': len(processed_cards),
'crop_settings': {
'mode': crop_mode,
'target_size': target_size,
'padding': padding
}
}
self.logger.info(f"Processed {len(processed_cards)} cards from {image_path.name}")
return result
def batch_process(self, input_dir: Union[str, Path], output_dir: Union[str, Path],
confidence: float = 0.5, iou_threshold: float = 0.45,
crop_mode: str = "bbox", target_size: Tuple[int, int] = None,
padding: int = 10) -> Dict[str, Any]:
"""
Xử lý batch nhiều ảnh
Args:
input_dir: Thư mục chứa ảnh input
output_dir: Thư mục output
apply_augmentation: Có áp dụng data augmentation không
save_original: Có lưu ảnh gốc không
confidence: Confidence threshold
iou_threshold: IoU threshold
crop_mode: Mode cắt ("bbox", "square", "aspect_ratio")
target_size: Kích thước target (width, height) hoặc None
padding: Padding thêm xung quanh bbox
Returns:
Dictionary chứa kết quả batch processing
"""
input_dir = Path(input_dir)
output_dir = Path(output_dir)
if not input_dir.exists():
self.logger.error(f"Input directory not found: {input_dir}")
return {}
# Tạo thư mục output
output_dir.mkdir(parents=True, exist_ok=True)
# Tìm tất cả ảnh
supported_formats = self.config.get('supported_formats', ['.jpg', '.jpeg', '.png', '.bmp', '.tiff'])
image_files = []
for fmt in supported_formats:
image_files.extend(input_dir.glob(f"*{fmt}"))
image_files.extend(input_dir.glob(f"*{fmt.upper()}"))
if not image_files:
self.logger.warning(f"No supported images found in: {input_dir}")
return {}
self.logger.info(f"Found {len(image_files)} images to process")
results = {}
total_cards = 0
global_card_counter = 0 # Counter để tạo tên file unique
for i, image_path in enumerate(image_files):
self.logger.info(f"Processing {i+1}/{len(image_files)}: {image_path.name}")
# Xử lý ảnh - chỉ detect và crop, không augment
result = self.process_single_image(
image_path,
output_dir,
confidence,
iou_threshold,
crop_mode,
target_size,
padding,
global_card_counter
)
# Cập nhật counter
global_card_counter += len(result.get('detections', []))
results[image_path.name] = result
total_cards += len(result.get('detections', [])) # Số lượng ID cards thực tế đã detect
# Print progress
print_progress(i + 1, len(image_files), f"Processed {image_path.name}")
# Tạo summary
summary = {
'total_images': len(image_files),
'total_cards_detected': total_cards,
'images_with_cards': len([r for r in results.values() if r.get('detections')]),
'images_without_cards': len([r for r in results.values() if not r.get('detections')]),
'output_directory': str(output_dir),
'crop_settings': {
'mode': crop_mode,
'target_size': target_size,
'padding': padding
},
'results': results
}
# Lưu summary
summary_path = output_dir / "processing_summary.json"
with open(summary_path, 'w', encoding='utf-8') as f:
json.dump(summary, f, indent=2, ensure_ascii=False)
self.logger.info(f"Batch processing completed. Summary saved to: {summary_path}")
return summary
def get_detection_statistics(self, results: Dict[str, Any]) -> Dict[str, Any]:
"""
Tính toán thống kê từ kết quả detection
Args:
results: Kết quả từ batch_process
Returns:
Dictionary chứa thống kê
"""
if not results:
return {}
total_images = results.get('total_images', 0)
total_cards = results.get('total_cards_detected', 0)
images_with_cards = results.get('images_with_cards', 0)
# Tính confidence statistics
all_confidences = []
for image_result in results.get('results', {}).values():
for detection in image_result.get('detections', []):
all_confidences.append(detection.get('confidence', 0))
stats = {
'total_images_processed': total_images,
'total_cards_detected': total_cards,
'images_with_cards': images_with_cards,
'images_without_cards': total_images - images_with_cards,
'average_cards_per_image': total_cards / total_images if total_images > 0 else 0,
'detection_rate': images_with_cards / total_images if total_images > 0 else 0,
'confidence_statistics': {
'min': min(all_confidences) if all_confidences else 0,
'max': max(all_confidences) if all_confidences else 0,
'mean': np.mean(all_confidences) if all_confidences else 0,
'std': np.std(all_confidences) if all_confidences else 0
}
}
return stats
def augment_cropped_cards(self, input_dir: Union[str, Path], output_dir: Union[str, Path],
num_augmentations: int = 3) -> Dict[str, Any]:
"""
Augment tất cả ID cards đã crop trong thư mục input
Args:
input_dir: Thư mục chứa ID cards đã crop
output_dir: Thư mục output cho augmented images
num_augmentations: Số lượng augmentation cho mỗi card
Returns:
Dictionary chứa kết quả augmentation
"""
input_dir = Path(input_dir)
output_dir = Path(output_dir)
if not input_dir.exists():
self.logger.error(f"Input directory not found: {input_dir}")
return {}
# Tạo thư mục output
output_dir.mkdir(parents=True, exist_ok=True)
# Tìm tất cả ID cards đã crop
card_files = list(input_dir.glob("id_card_*.jpg"))
if not card_files:
self.logger.warning(f"No ID card files found in: {input_dir}")
return {}
self.logger.info(f"Found {len(card_files)} ID cards to augment")
results = {}
total_augmented = 0
for i, card_path in enumerate(card_files):
self.logger.info(f"Augmenting {i+1}/{len(card_files)}: {card_path.name}")
# Load ID card
card_image = load_image(str(card_path))
if card_image is None:
self.logger.error(f"Failed to load card: {card_path}")
continue
# Augment card
try:
augmented_cards = self.data_augmentation.augment_single_image(
card_image,
num_augmentations=num_augmentations
)
# Debug: Kiểm tra số lượng augmented cards
self.logger.info(f"Generated {len(augmented_cards)} augmented cards for {card_path.name}")
# Debug: Kiểm tra config
self.logger.info(f"DataAugmentation config: {self.data_augmentation.config}")
except Exception as e:
self.logger.error(f"Error during augmentation: {e}")
augmented_cards = []
# Save augmented cards
card_results = []
for j, aug_card in enumerate(augmented_cards):
aug_filename = f"{card_path.stem}_aug_{j+1}.jpg"
aug_path = output_dir / aug_filename
save_image(aug_card, aug_path)
card_results.append({
'augmented_path': str(aug_path),
'augmentation_index': j+1
})
results[card_path.name] = {
'original_path': str(card_path),
'augmented_cards': card_results,
'total_augmented': len(card_results)
}
total_augmented += len(card_results)
# Print progress
print_progress(i + 1, len(card_files), f"Augmented {card_path.name}")
# Tạo summary
summary = {
'total_cards': len(card_files),
'total_augmented': total_augmented,
'output_directory': str(output_dir),
'results': results
}
# Lưu summary
summary_path = output_dir / "augmentation_summary.json"
with open(summary_path, 'w', encoding='utf-8') as f:
json.dump(summary, f, indent=2, ensure_ascii=False)
self.logger.info(f"Augmentation completed. Summary saved to: {summary_path}")
return summary
def load_yolo_config(self, config_path: str = None) -> Dict[str, Any]:
"""
Load config từ YOLO detector
Args:
config_path: Đường dẫn đến file config
Returns:
Config dictionary
"""
if config_path is None:
# Tìm config mặc định
default_config_path = "src/model/ID_cards_detector/config.py"
if os.path.exists(default_config_path):
config_path = default_config_path
config = {}
try:
# Import config từ YOLO detector
import sys
sys.path.append(str(Path("src/model/ID_cards_detector")))
from config import DEFAULT_TRAINING_CONFIG, DEFAULT_INFERENCE_CONFIG
config.update({
'yolo_training_config': DEFAULT_TRAINING_CONFIG,
'yolo_inference_config': DEFAULT_INFERENCE_CONFIG,
'detection': {
'confidence_threshold': DEFAULT_INFERENCE_CONFIG.get('conf_threshold', 0.25),
'iou_threshold': DEFAULT_INFERENCE_CONFIG.get('iou_threshold', 0.45),
'padding': 10
},
'processing': {
'apply_augmentation': True,
'save_original': True,
'num_augmentations': 3,
'save_format': "jpg",
'quality': 95,
'target_size': [640, 640]
},
'crop_options': {
'crop_mode': 'bbox', # bbox, square, aspect_ratio
'target_size': None, # (width, height) hoặc None
'padding': 10
}
})
self.logger.info("Loaded YOLO config successfully")
except Exception as e:
self.logger.warning(f"Failed to load YOLO config: {e}")
# Fallback config
config = {
'detection': {
'confidence_threshold': 0.25,
'iou_threshold': 0.45,
'padding': 10
},
'processing': {
'apply_augmentation': True,
'save_original': True,
'num_augmentations': 3,
'save_format': "jpg",
'quality': 95,
'target_size': [640, 640]
},
'crop_options': {
'crop_mode': 'bbox',
'target_size': None,
'padding': 10
}
}
return config

View File

@@ -41,14 +41,11 @@ def load_image(image_path: Path, target_size: Tuple[int, int] = None) -> Optiona
image = cv2.imread(str(image_path))
if image is None:
return None
# Convert BGR to RGB
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
# Resize if target_size is provided
if target_size:
image = cv2.resize(image, target_size, interpolation=cv2.INTER_AREA)
return image
except Exception as e:
print(f"Error loading image {image_path}: {e}")