update augment + YOLO pipeline
This commit is contained in:
@@ -363,8 +363,6 @@ class DataAugmentation:
|
||||
|
||||
return result
|
||||
|
||||
|
||||
|
||||
def augment_single_image(self, image: np.ndarray, num_augmentations: int = None) -> List[np.ndarray]:
|
||||
"""
|
||||
Apply each augmentation method separately to create independent augmented versions
|
||||
@@ -455,20 +453,7 @@ class DataAugmentation:
|
||||
|
||||
augmented_images.append(augmented)
|
||||
|
||||
# 5. Grayscale only
|
||||
if grayscale_config.get("enabled", False):
|
||||
for i in range(num_augmentations):
|
||||
augmented = image.copy()
|
||||
augmented = self.convert_to_grayscale_preserve_quality(augmented)
|
||||
|
||||
# Resize preserving aspect ratio
|
||||
target_size = self.image_processor.target_size
|
||||
if target_size:
|
||||
augmented = self.resize_preserve_aspect(augmented, target_size)
|
||||
|
||||
augmented_images.append(augmented)
|
||||
|
||||
# 6. Blurring only
|
||||
# 5. Blurring only
|
||||
if blurring_config.get("enabled", False):
|
||||
for i in range(num_augmentations):
|
||||
augmented = image.copy()
|
||||
@@ -481,7 +466,7 @@ class DataAugmentation:
|
||||
|
||||
augmented_images.append(augmented)
|
||||
|
||||
# 7. Brightness and contrast only
|
||||
# 6. Brightness/Contrast only
|
||||
if brightness_contrast_config.get("enabled", False):
|
||||
for i in range(num_augmentations):
|
||||
augmented = image.copy()
|
||||
@@ -494,6 +479,11 @@ class DataAugmentation:
|
||||
|
||||
augmented_images.append(augmented)
|
||||
|
||||
# 7. Apply grayscale as final step to ALL augmented images
|
||||
if grayscale_config.get("enabled", False):
|
||||
for i in range(len(augmented_images)):
|
||||
augmented_images[i] = self.convert_to_grayscale_preserve_quality(augmented_images[i])
|
||||
|
||||
return augmented_images
|
||||
|
||||
def augment_image_file(self, image_path: Path, output_dir: Path, num_augmentations: int = None) -> List[Path]:
|
||||
@@ -518,7 +508,7 @@ class DataAugmentation:
|
||||
|
||||
# Save augmented images with method names
|
||||
saved_paths = []
|
||||
method_names = ["rotation", "cropping", "noise", "blockage", "grayscale", "blurring", "brightness_contrast"]
|
||||
method_names = ["rotation", "cropping", "noise", "blockage", "blurring", "brightness_contrast", "grayscale"]
|
||||
method_index = 0
|
||||
|
||||
for i, aug_image in enumerate(augmented_images):
|
||||
|
611
src/id_card_detector.py
Normal file
611
src/id_card_detector.py
Normal file
@@ -0,0 +1,611 @@
|
||||
"""
|
||||
ID Card Detector Module
|
||||
Sử dụng YOLO để detect và cắt ID cards từ ảnh lớn, kết hợp với data augmentation
|
||||
Tích hợp với YOLOv8 French ID Card Detection model
|
||||
"""
|
||||
import cv2
|
||||
import numpy as np
|
||||
from pathlib import Path
|
||||
from typing import List, Tuple, Optional, Dict, Any, Union
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
from ultralytics import YOLO
|
||||
import logging
|
||||
from data_augmentation import DataAugmentation
|
||||
from utils import load_image, save_image, create_augmented_filename, print_progress
|
||||
import os
|
||||
import json
|
||||
import yaml
|
||||
|
||||
class IDCardDetector:
|
||||
"""Class để detect và cắt ID cards từ ảnh lớn sử dụng YOLO"""
|
||||
|
||||
def __init__(self, model_path: str = None, config: Dict[str, Any] = None):
|
||||
"""
|
||||
Initialize ID Card Detector
|
||||
|
||||
Args:
|
||||
model_path: Đường dẫn đến model YOLO đã train
|
||||
config: Configuration dictionary
|
||||
"""
|
||||
self.config = config or {}
|
||||
self.model_path = model_path
|
||||
self.model = None
|
||||
self.data_augmentation = DataAugmentation(config)
|
||||
self.logger = self._setup_logger()
|
||||
|
||||
# Default model path nếu không được cung cấp
|
||||
if not model_path:
|
||||
default_model_path = "data/weights/id_cards_yolov8n.pt"
|
||||
if os.path.exists(default_model_path):
|
||||
model_path = default_model_path
|
||||
self.model_path = model_path
|
||||
|
||||
# Load YOLO model nếu có
|
||||
if model_path and os.path.exists(model_path):
|
||||
self.load_model(model_path)
|
||||
|
||||
def _setup_logger(self) -> logging.Logger:
|
||||
"""Setup logger cho module"""
|
||||
logger = logging.getLogger(__name__)
|
||||
logger.setLevel(logging.INFO)
|
||||
|
||||
if not logger.handlers:
|
||||
handler = logging.StreamHandler()
|
||||
formatter = logging.Formatter(
|
||||
'%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
||||
)
|
||||
handler.setFormatter(formatter)
|
||||
logger.addHandler(handler)
|
||||
|
||||
return logger
|
||||
|
||||
def load_model(self, model_path: str) -> bool:
|
||||
"""
|
||||
Load YOLO model từ file
|
||||
|
||||
Args:
|
||||
model_path: Đường dẫn đến model file
|
||||
|
||||
Returns:
|
||||
True nếu load thành công, False nếu thất bại
|
||||
"""
|
||||
try:
|
||||
self.model = YOLO(model_path)
|
||||
self.logger.info(f"Loaded YOLO model from: {model_path}")
|
||||
return True
|
||||
except Exception as e:
|
||||
self.logger.error(f"Failed to load model: {e}")
|
||||
return False
|
||||
|
||||
def detect_id_cards(self, image: np.ndarray, confidence: float = 0.5, iou_threshold: float = 0.45) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Detect ID cards trong ảnh sử dụng YOLO
|
||||
|
||||
Args:
|
||||
image: Input image
|
||||
confidence: Confidence threshold
|
||||
iou_threshold: IoU threshold cho NMS
|
||||
|
||||
Returns:
|
||||
List các detection results với format:
|
||||
{
|
||||
'bbox': [x1, y1, x2, y2],
|
||||
'confidence': float,
|
||||
'class_id': int,
|
||||
'class_name': str
|
||||
}
|
||||
"""
|
||||
if self.model is None:
|
||||
self.logger.error("Model chưa được load!")
|
||||
return []
|
||||
|
||||
try:
|
||||
# Run inference
|
||||
results = self.model(image, conf=confidence, iou=float(iou_threshold), verbose=False)
|
||||
|
||||
detections = []
|
||||
for result in results:
|
||||
boxes = result.boxes
|
||||
if boxes is not None:
|
||||
for box in boxes:
|
||||
# Get bbox coordinates
|
||||
x1, y1, x2, y2 = box.xyxy[0].cpu().numpy()
|
||||
|
||||
# Get confidence and class
|
||||
confidence_score = float(box.conf[0].cpu().numpy())
|
||||
class_id = int(box.cls[0].cpu().numpy())
|
||||
class_name = self.model.names[class_id] if hasattr(self.model, 'names') else f"class_{class_id}"
|
||||
|
||||
detection = {
|
||||
'bbox': [int(x1), int(y1), int(x2), int(y2)],
|
||||
'confidence': confidence_score,
|
||||
'class_id': class_id,
|
||||
'class_name': class_name
|
||||
}
|
||||
detections.append(detection)
|
||||
|
||||
self.logger.info(f"Detected {len(detections)} ID cards")
|
||||
return detections
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error during detection: {e}")
|
||||
return []
|
||||
|
||||
def crop_id_card(self, image: np.ndarray, bbox: List[int], padding: int = 10,
|
||||
crop_mode: str = "bbox", target_size: Tuple[int, int] = None) -> np.ndarray:
|
||||
"""
|
||||
Cắt ID card từ ảnh gốc dựa trên bbox với nhiều options
|
||||
|
||||
Args:
|
||||
image: Input image
|
||||
bbox: Bounding box [x1, y1, x2, y2]
|
||||
padding: Padding thêm xung quanh bbox
|
||||
crop_mode: Mode cắt ("bbox", "square", "aspect_ratio")
|
||||
target_size: Kích thước target (width, height) nếu muốn resize
|
||||
|
||||
Returns:
|
||||
Cropped ID card image
|
||||
"""
|
||||
x1, y1, x2, y2 = bbox
|
||||
|
||||
# Thêm padding
|
||||
height, width = image.shape[:2]
|
||||
x1 = max(0, x1 - padding)
|
||||
y1 = max(0, y1 - padding)
|
||||
x2 = min(width, x2 + padding)
|
||||
y2 = min(height, y2 + padding)
|
||||
|
||||
# Cắt ảnh theo mode
|
||||
if crop_mode == "bbox":
|
||||
# Cắt theo bbox gốc
|
||||
cropped = image[y1:y2, x1:x2]
|
||||
elif crop_mode == "square":
|
||||
# Cắt thành hình vuông
|
||||
center_x = (x1 + x2) // 2
|
||||
center_y = (y1 + y2) // 2
|
||||
size = max(x2 - x1, y2 - y1)
|
||||
half_size = size // 2
|
||||
|
||||
x1 = max(0, center_x - half_size)
|
||||
y1 = max(0, center_y - half_size)
|
||||
x2 = min(width, center_x + half_size)
|
||||
y2 = min(height, center_y + half_size)
|
||||
|
||||
cropped = image[y1:y2, x1:x2]
|
||||
elif crop_mode == "aspect_ratio":
|
||||
# Cắt theo tỷ lệ khung hình chuẩn (3:4 cho ID card)
|
||||
bbox_width = x2 - x1
|
||||
bbox_height = y2 - y1
|
||||
center_x = (x1 + x2) // 2
|
||||
center_y = (y1 + y2) // 2
|
||||
|
||||
# Tỷ lệ 3:4 cho ID card
|
||||
target_ratio = 3 / 4
|
||||
current_ratio = bbox_width / bbox_height
|
||||
|
||||
if current_ratio > target_ratio:
|
||||
# Bbox quá rộng, giữ chiều cao
|
||||
new_width = int(bbox_height * target_ratio)
|
||||
half_width = new_width // 2
|
||||
x1 = max(0, center_x - half_width)
|
||||
x2 = min(width, center_x + half_width)
|
||||
else:
|
||||
# Bbox quá cao, giữ chiều rộng
|
||||
new_height = int(bbox_width / target_ratio)
|
||||
half_height = new_height // 2
|
||||
y1 = max(0, center_y - half_height)
|
||||
y2 = min(height, center_y + half_height)
|
||||
|
||||
cropped = image[y1:y2, x1:x2]
|
||||
else:
|
||||
# Default: cắt theo bbox
|
||||
cropped = image[y1:y2, x1:x2]
|
||||
|
||||
# Resize nếu có target_size
|
||||
if target_size:
|
||||
cropped = cv2.resize(cropped, target_size, interpolation=cv2.INTER_AREA)
|
||||
|
||||
return cropped
|
||||
|
||||
def process_single_image(self, image_path: Union[str, Path], output_dir: Path,
|
||||
confidence: float = 0.5, iou_threshold: float = 0.45,
|
||||
crop_mode: str = "bbox", target_size: Tuple[int, int] = None,
|
||||
padding: int = 10, card_counter: int = 0) -> Dict[str, Any]:
|
||||
"""
|
||||
Xử lý một ảnh: detect ID cards, cắt và áp dụng augmentation
|
||||
|
||||
Args:
|
||||
image_path: Đường dẫn đến ảnh input
|
||||
output_dir: Thư mục output
|
||||
apply_augmentation: Có áp dụng data augmentation không
|
||||
save_original: Có lưu ảnh gốc không
|
||||
confidence: Confidence threshold
|
||||
iou_threshold: IoU threshold
|
||||
crop_mode: Mode cắt ("bbox", "square", "aspect_ratio")
|
||||
target_size: Kích thước target (width, height) hoặc None
|
||||
padding: Padding thêm xung quanh bbox
|
||||
|
||||
Returns:
|
||||
Dictionary chứa kết quả xử lý
|
||||
"""
|
||||
image_path = Path(image_path)
|
||||
if not image_path.exists():
|
||||
self.logger.error(f"Image not found: {image_path}")
|
||||
return {}
|
||||
|
||||
# Load ảnh
|
||||
image = load_image(str(image_path))
|
||||
if image is None:
|
||||
self.logger.error(f"Failed to load image: {image_path}")
|
||||
return {}
|
||||
|
||||
# Detect ID cards
|
||||
detections = self.detect_id_cards(image, confidence, float(iou_threshold))
|
||||
|
||||
if not detections:
|
||||
self.logger.warning(f"No ID cards detected in: {image_path}")
|
||||
return {
|
||||
'image_path': str(image_path),
|
||||
'detections': [],
|
||||
'processed_cards': []
|
||||
}
|
||||
|
||||
# Tạo thư mục output
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
processed_cards = []
|
||||
current_card_counter = card_counter
|
||||
|
||||
for i, detection in enumerate(detections):
|
||||
# Cắt ID card với options mới
|
||||
cropped_card = self.crop_id_card(
|
||||
image,
|
||||
detection['bbox'],
|
||||
padding=padding,
|
||||
crop_mode=crop_mode,
|
||||
target_size=target_size
|
||||
)
|
||||
|
||||
# Tạo tên file unique cho mỗi ID card
|
||||
current_card_counter += 1
|
||||
card_filename = f"id_card_{current_card_counter:03d}.jpg"
|
||||
card_path = output_dir / card_filename
|
||||
|
||||
# Lưu ảnh gốc
|
||||
save_image(cropped_card, card_path)
|
||||
processed_cards.append({
|
||||
'original_path': str(card_path),
|
||||
'detection_info': detection,
|
||||
'crop_info': {
|
||||
'mode': crop_mode,
|
||||
'target_size': target_size,
|
||||
'padding': padding
|
||||
}
|
||||
})
|
||||
|
||||
result = {
|
||||
'image_path': str(image_path),
|
||||
'detections': detections,
|
||||
'processed_cards': processed_cards,
|
||||
'total_cards': len(processed_cards),
|
||||
'crop_settings': {
|
||||
'mode': crop_mode,
|
||||
'target_size': target_size,
|
||||
'padding': padding
|
||||
}
|
||||
}
|
||||
|
||||
self.logger.info(f"Processed {len(processed_cards)} cards from {image_path.name}")
|
||||
return result
|
||||
|
||||
def batch_process(self, input_dir: Union[str, Path], output_dir: Union[str, Path],
|
||||
confidence: float = 0.5, iou_threshold: float = 0.45,
|
||||
crop_mode: str = "bbox", target_size: Tuple[int, int] = None,
|
||||
padding: int = 10) -> Dict[str, Any]:
|
||||
"""
|
||||
Xử lý batch nhiều ảnh
|
||||
|
||||
Args:
|
||||
input_dir: Thư mục chứa ảnh input
|
||||
output_dir: Thư mục output
|
||||
apply_augmentation: Có áp dụng data augmentation không
|
||||
save_original: Có lưu ảnh gốc không
|
||||
confidence: Confidence threshold
|
||||
iou_threshold: IoU threshold
|
||||
crop_mode: Mode cắt ("bbox", "square", "aspect_ratio")
|
||||
target_size: Kích thước target (width, height) hoặc None
|
||||
padding: Padding thêm xung quanh bbox
|
||||
|
||||
Returns:
|
||||
Dictionary chứa kết quả batch processing
|
||||
"""
|
||||
input_dir = Path(input_dir)
|
||||
output_dir = Path(output_dir)
|
||||
|
||||
if not input_dir.exists():
|
||||
self.logger.error(f"Input directory not found: {input_dir}")
|
||||
return {}
|
||||
|
||||
# Tạo thư mục output
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Tìm tất cả ảnh
|
||||
supported_formats = self.config.get('supported_formats', ['.jpg', '.jpeg', '.png', '.bmp', '.tiff'])
|
||||
image_files = []
|
||||
for fmt in supported_formats:
|
||||
image_files.extend(input_dir.glob(f"*{fmt}"))
|
||||
image_files.extend(input_dir.glob(f"*{fmt.upper()}"))
|
||||
|
||||
if not image_files:
|
||||
self.logger.warning(f"No supported images found in: {input_dir}")
|
||||
return {}
|
||||
|
||||
self.logger.info(f"Found {len(image_files)} images to process")
|
||||
|
||||
results = {}
|
||||
total_cards = 0
|
||||
global_card_counter = 0 # Counter để tạo tên file unique
|
||||
|
||||
for i, image_path in enumerate(image_files):
|
||||
self.logger.info(f"Processing {i+1}/{len(image_files)}: {image_path.name}")
|
||||
|
||||
# Xử lý ảnh - chỉ detect và crop, không augment
|
||||
result = self.process_single_image(
|
||||
image_path,
|
||||
output_dir,
|
||||
confidence,
|
||||
iou_threshold,
|
||||
crop_mode,
|
||||
target_size,
|
||||
padding,
|
||||
global_card_counter
|
||||
)
|
||||
|
||||
# Cập nhật counter
|
||||
global_card_counter += len(result.get('detections', []))
|
||||
|
||||
results[image_path.name] = result
|
||||
total_cards += len(result.get('detections', [])) # Số lượng ID cards thực tế đã detect
|
||||
|
||||
# Print progress
|
||||
print_progress(i + 1, len(image_files), f"Processed {image_path.name}")
|
||||
|
||||
# Tạo summary
|
||||
summary = {
|
||||
'total_images': len(image_files),
|
||||
'total_cards_detected': total_cards,
|
||||
'images_with_cards': len([r for r in results.values() if r.get('detections')]),
|
||||
'images_without_cards': len([r for r in results.values() if not r.get('detections')]),
|
||||
'output_directory': str(output_dir),
|
||||
'crop_settings': {
|
||||
'mode': crop_mode,
|
||||
'target_size': target_size,
|
||||
'padding': padding
|
||||
},
|
||||
'results': results
|
||||
}
|
||||
|
||||
# Lưu summary
|
||||
summary_path = output_dir / "processing_summary.json"
|
||||
with open(summary_path, 'w', encoding='utf-8') as f:
|
||||
json.dump(summary, f, indent=2, ensure_ascii=False)
|
||||
|
||||
self.logger.info(f"Batch processing completed. Summary saved to: {summary_path}")
|
||||
return summary
|
||||
|
||||
def get_detection_statistics(self, results: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""
|
||||
Tính toán thống kê từ kết quả detection
|
||||
|
||||
Args:
|
||||
results: Kết quả từ batch_process
|
||||
|
||||
Returns:
|
||||
Dictionary chứa thống kê
|
||||
"""
|
||||
if not results:
|
||||
return {}
|
||||
|
||||
total_images = results.get('total_images', 0)
|
||||
total_cards = results.get('total_cards_detected', 0)
|
||||
images_with_cards = results.get('images_with_cards', 0)
|
||||
|
||||
# Tính confidence statistics
|
||||
all_confidences = []
|
||||
for image_result in results.get('results', {}).values():
|
||||
for detection in image_result.get('detections', []):
|
||||
all_confidences.append(detection.get('confidence', 0))
|
||||
|
||||
stats = {
|
||||
'total_images_processed': total_images,
|
||||
'total_cards_detected': total_cards,
|
||||
'images_with_cards': images_with_cards,
|
||||
'images_without_cards': total_images - images_with_cards,
|
||||
'average_cards_per_image': total_cards / total_images if total_images > 0 else 0,
|
||||
'detection_rate': images_with_cards / total_images if total_images > 0 else 0,
|
||||
'confidence_statistics': {
|
||||
'min': min(all_confidences) if all_confidences else 0,
|
||||
'max': max(all_confidences) if all_confidences else 0,
|
||||
'mean': np.mean(all_confidences) if all_confidences else 0,
|
||||
'std': np.std(all_confidences) if all_confidences else 0
|
||||
}
|
||||
}
|
||||
|
||||
return stats
|
||||
|
||||
def augment_cropped_cards(self, input_dir: Union[str, Path], output_dir: Union[str, Path],
|
||||
num_augmentations: int = 3) -> Dict[str, Any]:
|
||||
"""
|
||||
Augment tất cả ID cards đã crop trong thư mục input
|
||||
|
||||
Args:
|
||||
input_dir: Thư mục chứa ID cards đã crop
|
||||
output_dir: Thư mục output cho augmented images
|
||||
num_augmentations: Số lượng augmentation cho mỗi card
|
||||
|
||||
Returns:
|
||||
Dictionary chứa kết quả augmentation
|
||||
"""
|
||||
input_dir = Path(input_dir)
|
||||
output_dir = Path(output_dir)
|
||||
|
||||
if not input_dir.exists():
|
||||
self.logger.error(f"Input directory not found: {input_dir}")
|
||||
return {}
|
||||
|
||||
# Tạo thư mục output
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Tìm tất cả ID cards đã crop
|
||||
card_files = list(input_dir.glob("id_card_*.jpg"))
|
||||
|
||||
if not card_files:
|
||||
self.logger.warning(f"No ID card files found in: {input_dir}")
|
||||
return {}
|
||||
|
||||
self.logger.info(f"Found {len(card_files)} ID cards to augment")
|
||||
|
||||
results = {}
|
||||
total_augmented = 0
|
||||
|
||||
for i, card_path in enumerate(card_files):
|
||||
self.logger.info(f"Augmenting {i+1}/{len(card_files)}: {card_path.name}")
|
||||
|
||||
# Load ID card
|
||||
card_image = load_image(str(card_path))
|
||||
if card_image is None:
|
||||
self.logger.error(f"Failed to load card: {card_path}")
|
||||
continue
|
||||
|
||||
# Augment card
|
||||
try:
|
||||
augmented_cards = self.data_augmentation.augment_single_image(
|
||||
card_image,
|
||||
num_augmentations=num_augmentations
|
||||
)
|
||||
|
||||
# Debug: Kiểm tra số lượng augmented cards
|
||||
self.logger.info(f"Generated {len(augmented_cards)} augmented cards for {card_path.name}")
|
||||
|
||||
# Debug: Kiểm tra config
|
||||
self.logger.info(f"DataAugmentation config: {self.data_augmentation.config}")
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error during augmentation: {e}")
|
||||
augmented_cards = []
|
||||
|
||||
# Save augmented cards
|
||||
card_results = []
|
||||
for j, aug_card in enumerate(augmented_cards):
|
||||
aug_filename = f"{card_path.stem}_aug_{j+1}.jpg"
|
||||
aug_path = output_dir / aug_filename
|
||||
save_image(aug_card, aug_path)
|
||||
|
||||
card_results.append({
|
||||
'augmented_path': str(aug_path),
|
||||
'augmentation_index': j+1
|
||||
})
|
||||
|
||||
results[card_path.name] = {
|
||||
'original_path': str(card_path),
|
||||
'augmented_cards': card_results,
|
||||
'total_augmented': len(card_results)
|
||||
}
|
||||
|
||||
total_augmented += len(card_results)
|
||||
|
||||
# Print progress
|
||||
print_progress(i + 1, len(card_files), f"Augmented {card_path.name}")
|
||||
|
||||
# Tạo summary
|
||||
summary = {
|
||||
'total_cards': len(card_files),
|
||||
'total_augmented': total_augmented,
|
||||
'output_directory': str(output_dir),
|
||||
'results': results
|
||||
}
|
||||
|
||||
# Lưu summary
|
||||
summary_path = output_dir / "augmentation_summary.json"
|
||||
with open(summary_path, 'w', encoding='utf-8') as f:
|
||||
json.dump(summary, f, indent=2, ensure_ascii=False)
|
||||
|
||||
self.logger.info(f"Augmentation completed. Summary saved to: {summary_path}")
|
||||
return summary
|
||||
|
||||
def load_yolo_config(self, config_path: str = None) -> Dict[str, Any]:
|
||||
"""
|
||||
Load config từ YOLO detector
|
||||
|
||||
Args:
|
||||
config_path: Đường dẫn đến file config
|
||||
|
||||
Returns:
|
||||
Config dictionary
|
||||
"""
|
||||
if config_path is None:
|
||||
# Tìm config mặc định
|
||||
default_config_path = "src/model/ID_cards_detector/config.py"
|
||||
if os.path.exists(default_config_path):
|
||||
config_path = default_config_path
|
||||
|
||||
config = {}
|
||||
|
||||
try:
|
||||
# Import config từ YOLO detector
|
||||
import sys
|
||||
sys.path.append(str(Path("src/model/ID_cards_detector")))
|
||||
|
||||
from config import DEFAULT_TRAINING_CONFIG, DEFAULT_INFERENCE_CONFIG
|
||||
|
||||
config.update({
|
||||
'yolo_training_config': DEFAULT_TRAINING_CONFIG,
|
||||
'yolo_inference_config': DEFAULT_INFERENCE_CONFIG,
|
||||
'detection': {
|
||||
'confidence_threshold': DEFAULT_INFERENCE_CONFIG.get('conf_threshold', 0.25),
|
||||
'iou_threshold': DEFAULT_INFERENCE_CONFIG.get('iou_threshold', 0.45),
|
||||
'padding': 10
|
||||
},
|
||||
'processing': {
|
||||
'apply_augmentation': True,
|
||||
'save_original': True,
|
||||
'num_augmentations': 3,
|
||||
'save_format': "jpg",
|
||||
'quality': 95,
|
||||
'target_size': [640, 640]
|
||||
},
|
||||
'crop_options': {
|
||||
'crop_mode': 'bbox', # bbox, square, aspect_ratio
|
||||
'target_size': None, # (width, height) hoặc None
|
||||
'padding': 10
|
||||
}
|
||||
})
|
||||
|
||||
self.logger.info("Loaded YOLO config successfully")
|
||||
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Failed to load YOLO config: {e}")
|
||||
# Fallback config
|
||||
config = {
|
||||
'detection': {
|
||||
'confidence_threshold': 0.25,
|
||||
'iou_threshold': 0.45,
|
||||
'padding': 10
|
||||
},
|
||||
'processing': {
|
||||
'apply_augmentation': True,
|
||||
'save_original': True,
|
||||
'num_augmentations': 3,
|
||||
'save_format': "jpg",
|
||||
'quality': 95,
|
||||
'target_size': [640, 640]
|
||||
},
|
||||
'crop_options': {
|
||||
'crop_mode': 'bbox',
|
||||
'target_size': None,
|
||||
'padding': 10
|
||||
}
|
||||
}
|
||||
|
||||
return config
|
@@ -41,14 +41,11 @@ def load_image(image_path: Path, target_size: Tuple[int, int] = None) -> Optiona
|
||||
image = cv2.imread(str(image_path))
|
||||
if image is None:
|
||||
return None
|
||||
|
||||
# Convert BGR to RGB
|
||||
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
|
||||
|
||||
# Resize if target_size is provided
|
||||
if target_size:
|
||||
image = cv2.resize(image, target_size, interpolation=cv2.INTER_AREA)
|
||||
|
||||
return image
|
||||
except Exception as e:
|
||||
print(f"Error loading image {image_path}: {e}")
|
||||
|
Reference in New Issue
Block a user