diff --git a/scripts/surgical_fix.py b/scripts/surgical_fix.py new file mode 100644 index 0000000..faab74b --- /dev/null +++ b/scripts/surgical_fix.py @@ -0,0 +1,114 @@ +#!/usr/bin/env python3 +""" +Surgical fix using exact string patterns found in the file. +""" + +import sys + +def fix_align_patterns(content): + """Fix known problematic align patterns.""" + + transformations = 0 + + # List of exact patterns to replace + patterns = [ + # Pattern 1: Simple case with line break in middle + ( + 'gehorsamsten Diener\nJacob Michael Reinhold Lenz', + 'gehorsamsten Diener\nJacob Michael Reinhold Lenz' + ), + + # Pattern 2: Multi-line signature block + ( + 'Hoch Edelgeborner Hochgelahrter Herr Secretair\nVerehrungswürdigster Gönner\nEw. HochEdelgebh:', + 'Hoch Edelgeborner Hochgelahrter Herr Secretair\nVerehrungswürdigster Gönner\nEw. HochEdelgebh:' + ), + + # Pattern 3: With tag containing line break + ( + '\n Interfusa nitentes\nVites aequora Cycladas.\n', + '\n Interfusa nitentes\nVites aequora Cycladas.\n' + ), + + # Pattern 4: Signature with aq + ( + 'Sie ewig liebender Alcibiades\nJ. M. R. L.', + 'Sie ewig liebender Alcibiades\nJ. M. R. L.' + ), + ] + + for old_pattern, new_pattern in patterns: + if old_pattern in content: + content = content.replace(old_pattern, new_pattern) + transformations += 1 + print(f"Applied transformation {transformations}") + + # Now handle the more complex multi-line patterns using targeted replacements + # Let's find and fix the complex ones one by one + + # Find patterns like "texttext" within align tags + import re + + # Pattern for align elements containing line breaks + align_pattern = r']*>(.*?)' + + def fix_align_content(match): + pos = match.group(1) + content_part = match.group(2) + + # Check if this content contains line elements + if ']*(?:/>|>))', content_part) + + result = [] + current_text = "" + + for part in parts: + if part.startswith('{current_text}') + current_text = "" + result.append(part) + else: + current_text += part + + # Add remaining text + if current_text.strip(): + result.append(f'{current_text}') + + return ''.join(result) + + # Apply the pattern replacement + new_content = re.sub(align_pattern, fix_align_content, content, flags=re.DOTALL) + + if new_content != content: + additional_transforms = len(re.findall(r'\s*\s*