From d213b16555a68d6caae8c72c191fa097994c2d94 Mon Sep 17 00:00:00 2001 From: Simon Martens Date: Fri, 25 Jul 2025 15:56:31 +0200 Subject: [PATCH] linter start: briefe, references, meta --- scripts/lint.py | 93 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 93 insertions(+) create mode 100755 scripts/lint.py diff --git a/scripts/lint.py b/scripts/lint.py new file mode 100755 index 0000000..e176be5 --- /dev/null +++ b/scripts/lint.py @@ -0,0 +1,93 @@ +#!/usr/bin/env python3 +import sys +import re +from lxml import etree + +def lint_briefe(filename): + """ + Lint briefe.xml file by: + 1. Removing all whitespace before and after page, line, letterText, opus, and document tags + 2. Inserting newlines before each of these elements + 3. Inserting newline after self-closing page tags + 4. Inserting extra newline before letterText elements + """ + with open(filename, 'r', encoding='utf-8') as f: + content = f.read() + + # Target tags to process + tags = ['page', 'line', 'letterText', 'opus', 'document'] + + # Remove whitespace around target tags + for tag in tags: + # Remove whitespace before opening tags + content = re.sub(r'\s*(<' + tag + r'[^>]*>)', r'\1', content) + # Remove whitespace before closing tags + content = re.sub(r'\s*()', r'\1', content) + # Remove whitespace after opening tags (but not self-closing) + content = re.sub(r'(<' + tag + r'[^/>]*>)\s*', r'\1', content) + # Remove whitespace after closing tags + content = re.sub(r'()\s*', r'\1', content) + # Remove whitespace after self-closing tags + content = re.sub(r'(<' + tag + r'[^>]*/>\s*)', lambda m: m.group(1).rstrip(), content) + + # Insert newlines before target elements + for tag in tags: + # Before opening tags (not preceded by newline) + content = re.sub(r'([^\n])(<' + tag + r'[^>]*>)', r'\1\n\2', content) + # Before self-closing tags (not preceded by newline) + content = re.sub(r'([^\n])(<' + tag + r'[^>]*/>)', r'\1\n\2', content) + # Before closing tags (not preceded by newline) + content = re.sub(r'([^\n])()', r'\1\n\2', content) + + # Insert newline after self-closing page tags + content = re.sub(r'(]*/>)([^\n])', r'\1\n\2', content) + + # Insert extra newline before letterText elements (empty line) + content = re.sub(r'(\n)(]*>)', r'\1\n\2', content) + + # Write the linted content back + with open(filename, 'w', encoding='utf-8') as f: + f.write(content) + + print(f"Linted {filename}") + +def lint_meta_or_references(filename): + """ + Lint meta.xml or references.xml using lxml pretty print. + Preserves comments, empty tags, and all content while formatting. + """ + # Parse with lxml preserving comments + parser = etree.XMLParser(strip_cdata=False, remove_blank_text=False, remove_comments=False) + tree = etree.parse(filename, parser) + + # Pretty print with proper formatting + etree.indent(tree, space=" ") + + # Write back to file with XML declaration + with open(filename, 'wb') as f: + tree.write(f, encoding='utf-8', xml_declaration=True, pretty_print=True) + + print(f"Linted {filename}") + +def main(): + if len(sys.argv) != 3: + print("Usage: lint.py ") + print("Available types: briefe, meta, references") + sys.exit(1) + + lint_type = sys.argv[1] + filename = sys.argv[2] + + if lint_type == "briefe": + lint_briefe(filename) + elif lint_type == "meta": + lint_meta_or_references(filename) + elif lint_type == "references": + lint_meta_or_references(filename) + else: + print(f"Unknown lint type: {lint_type}") + print("Available types: briefe, meta, references") + sys.exit(1) + +if __name__ == "__main__": + main() \ No newline at end of file