mirror of
https://github.com/Theodor-Springmann-Stiftung/kgpz_web.git
synced 2025-10-29 17:15:31 +00:00
162 lines
5.3 KiB
Python
Executable File
162 lines
5.3 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""
|
|
Batch Processing Script for Historical Newspaper Images
|
|
|
|
Simple script to process multiple images with the newspaper cleaning pipeline.
|
|
Includes progress tracking and error handling.
|
|
"""
|
|
|
|
import os
|
|
import sys
|
|
import time
|
|
import json
|
|
from pathlib import Path
|
|
from image_cleaner import NewspaperImageCleaner, create_comparison_image
|
|
|
|
|
|
def process_batch(input_dir=".", output_dir="cleaned", config_file=None,
|
|
create_comparisons=True, file_pattern="*.jpg"):
|
|
"""
|
|
Process all newspaper images in a directory.
|
|
|
|
Args:
|
|
input_dir: Directory containing input images
|
|
output_dir: Directory for cleaned images
|
|
config_file: JSON file with custom parameters
|
|
create_comparisons: Whether to create before/after comparisons
|
|
file_pattern: Glob pattern for files to process
|
|
"""
|
|
|
|
# Load custom config if provided
|
|
config = None
|
|
if config_file and os.path.exists(config_file):
|
|
with open(config_file, 'r') as f:
|
|
config = json.load(f)
|
|
print(f"Loaded custom config from {config_file}")
|
|
|
|
# Initialize cleaner
|
|
cleaner = NewspaperImageCleaner(config)
|
|
|
|
# Setup paths
|
|
input_path = Path(input_dir)
|
|
output_path = Path(output_dir)
|
|
output_path.mkdir(exist_ok=True)
|
|
|
|
if create_comparisons:
|
|
comparison_path = output_path / "comparisons"
|
|
comparison_path.mkdir(exist_ok=True)
|
|
|
|
# Find all image files
|
|
image_files = list(input_path.glob(file_pattern))
|
|
image_files.extend(input_path.glob("*.jpeg"))
|
|
image_files.extend(input_path.glob("*.JPG"))
|
|
image_files.extend(input_path.glob("*.JPEG"))
|
|
|
|
if not image_files:
|
|
print(f"No image files found in {input_dir}")
|
|
return
|
|
|
|
print(f"Found {len(image_files)} images to process")
|
|
print(f"Output directory: {output_path.absolute()}")
|
|
|
|
# Process each image
|
|
success_count = 0
|
|
error_count = 0
|
|
start_time = time.time()
|
|
|
|
for i, img_file in enumerate(image_files, 1):
|
|
print(f"\n[{i}/{len(image_files)}] Processing: {img_file.name}")
|
|
|
|
try:
|
|
# Process image
|
|
output_file = output_path / f"cleaned_{img_file.name}"
|
|
processed, original = cleaner.process_image(img_file, output_file)
|
|
|
|
# Create comparison if requested
|
|
if create_comparisons:
|
|
comp_file = comparison_path / f"comparison_{img_file.name}"
|
|
create_comparison_image(original, processed, comp_file)
|
|
|
|
success_count += 1
|
|
print(f"✓ Completed: {img_file.name}")
|
|
|
|
except Exception as e:
|
|
error_count += 1
|
|
print(f"✗ Error processing {img_file.name}: {str(e)}")
|
|
|
|
# Summary
|
|
elapsed_time = time.time() - start_time
|
|
print(f"\n" + "="*50)
|
|
print(f"Batch Processing Complete")
|
|
print(f"{"="*50}")
|
|
print(f"Successfully processed: {success_count}")
|
|
print(f"Errors: {error_count}")
|
|
print(f"Total time: {elapsed_time:.1f} seconds")
|
|
print(f"Average time per image: {elapsed_time/len(image_files):.1f} seconds")
|
|
print(f"Output directory: {output_path.absolute()}")
|
|
|
|
|
|
def create_sample_config():
|
|
"""Create a sample configuration file for customization."""
|
|
config = {
|
|
"bilateral_d": 9,
|
|
"bilateral_sigma_color": 75,
|
|
"bilateral_sigma_space": 75,
|
|
"clahe_clip_limit": 2.0,
|
|
"clahe_grid_size": [8, 8],
|
|
"gamma": 1.2,
|
|
"denoise_h": 10,
|
|
"morph_kernel_size": 2,
|
|
"unsharp_amount": 1.5,
|
|
"unsharp_radius": 1.0,
|
|
"unsharp_threshold": 0
|
|
}
|
|
|
|
with open("config.json", "w") as f:
|
|
json.dump(config, f, indent=4)
|
|
|
|
print("Created config.json with default parameters.")
|
|
print("Edit this file to customize processing settings.")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
import argparse
|
|
|
|
parser = argparse.ArgumentParser(
|
|
description="Batch process historical newspaper images",
|
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
epilog="""
|
|
Examples:
|
|
python batch_process.py # Process current directory
|
|
python batch_process.py -i scans -o clean # Process 'scans' folder
|
|
python batch_process.py --no-comparisons # Skip comparison images
|
|
python batch_process.py --config custom.json # Use custom settings
|
|
"""
|
|
)
|
|
|
|
parser.add_argument("-i", "--input", default=".",
|
|
help="Input directory (default: current directory)")
|
|
parser.add_argument("-o", "--output", default="cleaned",
|
|
help="Output directory (default: cleaned)")
|
|
parser.add_argument("-c", "--config",
|
|
help="JSON config file with custom parameters")
|
|
parser.add_argument("--no-comparisons", action="store_true",
|
|
help="Skip creating before/after comparison images")
|
|
parser.add_argument("--pattern", default="*.jpg",
|
|
help="File pattern to match (default: *.jpg)")
|
|
parser.add_argument("--create-config", action="store_true",
|
|
help="Create sample config file and exit")
|
|
|
|
args = parser.parse_args()
|
|
|
|
if args.create_config:
|
|
create_sample_config()
|
|
sys.exit(0)
|
|
|
|
process_batch(
|
|
input_dir=args.input,
|
|
output_dir=args.output,
|
|
config_file=args.config,
|
|
create_comparisons=not args.no_comparisons,
|
|
file_pattern=args.pattern
|
|
) |