From ad109c724ae848199528b78ea63ec98d0e30468f Mon Sep 17 00:00:00 2001 From: Simon Martens Date: Wed, 17 Jul 2024 18:49:58 +0200 Subject: [PATCH] Added custom linter files --- Scripts/custom_linter.py | 63 ++++++++++++++++++++++++++++++++ Scripts/process_linter_output.py | 16 ++++++++ 2 files changed, 79 insertions(+) create mode 100644 Scripts/custom_linter.py create mode 100644 Scripts/process_linter_output.py diff --git a/Scripts/custom_linter.py b/Scripts/custom_linter.py new file mode 100644 index 0000000..ca90d96 --- /dev/null +++ b/Scripts/custom_linter.py @@ -0,0 +1,63 @@ +import os +from lxml import etree + +NAMESPACE = {'kgpz': 'https://www.koenigsberger-zeitungen.de'} +SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__)) +XML_DIR = os.path.abspath(os.path.join(SCRIPT_DIR, '..', 'XML')) + +def parse_xml_file(filepath): + try: + parser = etree.XMLParser(remove_blank_text=True) + tree = etree.parse(filepath, parser) + return tree.getroot() + except etree.ParseError as e: + print(f"Error parsing {filepath}: {e}") + return None + +def get_all_ids(root, tag): + return set(elem.get('id') for elem in root.xpath(f'.//kgpz:{tag}', namespaces=NAMESPACE)) + +def check_references(beitrag_root, reference_data, filename): + errors = [] + for ref_type, ref_tag in [('akteur', 'akteur'), ('kategorie', 'kategorie'), + ('ort', 'ort'), ('werk', 'werk')]: + for ref in beitrag_root.xpath(f'//kgpz:{ref_tag}', namespaces=NAMESPACE): + ref_id = ref.get('ref') + if ref_id not in reference_data[ref_type]: + line_number = ref.sourceline + errors.append((filename, line_number, f"INVALID REFERENCE ({ref_type}:{ref_id})")) + return errors + +def main(): + reference_data = { + 'akteur': get_all_ids(parse_xml_file(os.path.join(XML_DIR, 'akteure.xml')), 'akteur'), + 'kategorie': get_all_ids(parse_xml_file(os.path.join(XML_DIR, 'kategorien.xml')), 'kategorie'), + 'ort': get_all_ids(parse_xml_file(os.path.join(XML_DIR, 'orte.xml')), 'ort'), + 'werk': get_all_ids(parse_xml_file(os.path.join(XML_DIR, 'werke.xml')), 'werk'), + } + + all_errors = [] + + beitraege_dir = os.path.join(XML_DIR, 'beitraege') + for filename in os.listdir(beitraege_dir): + if filename.endswith('-beitraege.xml'): + beitrag_root = parse_xml_file(os.path.join(beitraege_dir, filename)) + if beitrag_root is not None: + errors = check_references(beitrag_root, reference_data, filename) + all_errors.extend(errors) + + all_errors.sort(key=lambda x: (x[0], x[1])) + + with open('linter_results.txt', 'w') as f: + for filename, line_number, error_message in all_errors: + f.write(f"{filename}:{line_number}:{error_message}\n") + + if all_errors: + for filename, line_number, error_message in all_errors: + print(f"{filename}, Line {line_number}: {error_message}") + exit(1) # Exit with error code if there are any errors + else: + print("No errors found.") + +if __name__ == "__main__": + main() diff --git a/Scripts/process_linter_output.py b/Scripts/process_linter_output.py new file mode 100644 index 0000000..5dd0b67 --- /dev/null +++ b/Scripts/process_linter_output.py @@ -0,0 +1,16 @@ +import os + +def main(): + if not os.path.exists('linter_results.txt'): + print("No linter results found.") + return + + with open('linter_results.txt', 'r') as f: + for line in f: + parts = line.strip().split(':', 2) + if len(parts) == 3: + filename, line_number, error_message = parts + print(f"::error file={filename},line={line_number}::{error_message}") + +if __name__ == "__main__": + main()