mirror of
https://github.com/Theodor-Springmann-Stiftung/KGPZ.git
synced 2025-10-28 16:45:31 +00:00
42 lines
1.1 KiB
Bash
42 lines
1.1 KiB
Bash
#!/bin/bash
|
|
|
|
# Create the images directory if it doesn't exist
|
|
mkdir -p images
|
|
|
|
# Loop through all PDF files in the current directory
|
|
for pdf in *.pdf; do
|
|
# Extract the date from the filename
|
|
date=$(echo "$pdf" | grep -oP '\d{4}-\d{2}-\d{2}')
|
|
|
|
if [ -z "$date" ]; then
|
|
echo "Skipping $pdf: No date found in filename"
|
|
continue
|
|
fi
|
|
|
|
# Initialize counter for this PDF
|
|
counter=1
|
|
|
|
# Use pdfimages to extract images, storing them in a temporary directory
|
|
temp_dir=$(mktemp -d)
|
|
pdfimages -all "$pdf" "$temp_dir/img"
|
|
|
|
# Move and rename the extracted images
|
|
for img in "$temp_dir"/*; do
|
|
# Get the file extension
|
|
ext="${img##*.}"
|
|
|
|
# Move and rename the image
|
|
mv "$img" "images/${date}_${counter}.${ext}"
|
|
|
|
# Increment counter
|
|
((counter++))
|
|
done
|
|
|
|
# Remove the temporary directory
|
|
rm -rf "$temp_dir"
|
|
|
|
echo "Processed $pdf: Extracted $((counter-1)) images"
|
|
done
|
|
|
|
echo "Image extraction complete. All images are in the 'images' directory."
|