combine augment

This commit is contained in:
Nguyễn Phước Thành
2025-08-06 21:44:39 +07:00
parent 51d3a66cc4
commit f63589a10a
4 changed files with 851 additions and 355 deletions

61
main.py
View File

@@ -214,11 +214,11 @@ def preview_augmentation(input_dir: Path, output_dir: Path, config: Dict[str, An
else:
print("⚠️ No ID cards detected, proceeding with normal augmentation")
# Normal augmentation (fallback)
# Normal augmentation (fallback) with new logic
augmented_paths = augmenter.augment_image_file(
image_files[0],
output_dir,
num_augmentations=3
num_target_images=3
)
if augmented_paths:
@@ -270,6 +270,7 @@ def main():
processing_config = config_manager.get_processing_config()
augmentation_config = config_manager.get_augmentation_config()
logging_config = config_manager.get_logging_config()
data_strategy_config = config.get("data_strategy", {})
# Setup logging
logger = setup_logging(logging_config.get("level", "INFO"))
@@ -324,10 +325,20 @@ def main():
logger.error(f"No images found in {input_dir}")
sys.exit(1)
# Get data strategy parameters
multiplication_factor = data_strategy_config.get("multiplication_factor", 3.0)
random_seed = data_strategy_config.get("random_seed")
logger.info(f"Found {len(image_files)} images to process")
logger.info(f"Output directory: {output_dir}")
logger.info(f"Number of augmentations per image: {processing_config.get('num_augmentations', 3)}")
logger.info(f"Data strategy: multiplication_factor = {multiplication_factor}")
if multiplication_factor < 1.0:
logger.info(f"SAMPLING MODE: Will process {multiplication_factor*100:.1f}% of input images")
else:
logger.info(f"MULTIPLICATION MODE: Target {multiplication_factor}x dataset size")
logger.info(f"Target size: {processing_config.get('target_size', [224, 224])}")
if random_seed:
logger.info(f"Random seed: {random_seed}")
# Process with ID detection if enabled
if id_detection_config.get('enabled', False):
@@ -360,23 +371,51 @@ def main():
target_size=id_detection_config.get('target_size'),
padding=id_detection_config.get('padding', 10)
)
# Bước 2: Augment các card đã crop
logger.info("Step 2: Augment cropped ID cards...")
# Bước 2: Augment các card đã crop với strategy mới
logger.info("Step 2: Augment cropped ID cards with smart strategy...")
augmenter = DataAugmentation(augmentation_config)
augmenter.batch_augment(
# Truyền full config để augmenter có thể access data_strategy
augmenter.config.update({"data_strategy": data_strategy_config})
augment_results = augmenter.batch_augment(
processed_dir,
output_dir,
num_augmentations=processing_config.get("num_augmentations", 3)
multiplication_factor=multiplication_factor,
random_seed=random_seed
)
# Log results
if augment_results:
logger.info(f"Augmentation Summary:")
logger.info(f" Input images: {augment_results.get('input_images', 0)}")
logger.info(f" Selected for processing: {augment_results.get('selected_images', 0)}")
logger.info(f" Target total: {augment_results.get('target_total', 0)}")
logger.info(f" Actually generated: {augment_results.get('actual_generated', 0)}")
logger.info(f" Efficiency: {augment_results.get('efficiency', 0):.1%}")
else:
# Augment trực tiếp ảnh gốc
logger.info("Starting normal batch augmentation (direct augmentation)...")
# Augment trực tiếp ảnh gốc với strategy mới
logger.info("Starting smart batch augmentation (direct augmentation)...")
augmenter = DataAugmentation(augmentation_config)
augmenter.batch_augment(
# Truyền full config để augmenter có thể access data_strategy
augmenter.config.update({"data_strategy": data_strategy_config})
augment_results = augmenter.batch_augment(
input_dir,
output_dir,
num_augmentations=processing_config.get("num_augmentations", 3)
multiplication_factor=multiplication_factor,
random_seed=random_seed
)
# Log results
if augment_results:
logger.info(f"Augmentation Summary:")
logger.info(f" Input images: {augment_results.get('input_images', 0)}")
logger.info(f" Selected for processing: {augment_results.get('selected_images', 0)}")
logger.info(f" Target total: {augment_results.get('target_total', 0)}")
logger.info(f" Actually generated: {augment_results.get('actual_generated', 0)}")
logger.info(f" Efficiency: {augment_results.get('efficiency', 0):.1%}")
logger.info("Data processing completed successfully")