#!/usr/bin/env python3 """ Surgical fix using exact string patterns found in the file. """ import sys def fix_align_patterns(content): """Fix known problematic align patterns.""" transformations = 0 # List of exact patterns to replace patterns = [ # Pattern 1: Simple case with line break in middle ( 'gehorsamsten Diener\nJacob Michael Reinhold Lenz', 'gehorsamsten Diener\nJacob Michael Reinhold Lenz' ), # Pattern 2: Multi-line signature block ( 'Hoch Edelgeborner Hochgelahrter Herr Secretair\nVerehrungswürdigster Gönner\nEw. HochEdelgebh:', 'Hoch Edelgeborner Hochgelahrter Herr Secretair\nVerehrungswürdigster Gönner\nEw. HochEdelgebh:' ), # Pattern 3: With tag containing line break ( '\n Interfusa nitentes\nVites aequora Cycladas.\n', '\n Interfusa nitentes\nVites aequora Cycladas.\n' ), # Pattern 4: Signature with aq ( 'Sie ewig liebender Alcibiades\nJ. M. R. L.', 'Sie ewig liebender Alcibiades\nJ. M. R. L.' ), ] for old_pattern, new_pattern in patterns: if old_pattern in content: content = content.replace(old_pattern, new_pattern) transformations += 1 print(f"Applied transformation {transformations}") # Now handle the more complex multi-line patterns using targeted replacements # Let's find and fix the complex ones one by one # Find patterns like "texttext" within align tags import re # Pattern for align elements containing line breaks align_pattern = r']*>(.*?)' def fix_align_content(match): pos = match.group(1) content_part = match.group(2) # Check if this content contains line elements if ']*(?:/>|>))', content_part) result = [] current_text = "" for part in parts: if part.startswith('{current_text}') current_text = "" result.append(part) else: current_text += part # Add remaining text if current_text.strip(): result.append(f'{current_text}') return ''.join(result) # Apply the pattern replacement new_content = re.sub(align_pattern, fix_align_content, content, flags=re.DOTALL) if new_content != content: additional_transforms = len(re.findall(r'\s*\s*