Changed output of custom linter

This commit is contained in:
Simon Martens
2024-07-17 19:04:01 +02:00
parent 35cc933362
commit 87d64e214b

View File

@@ -2,8 +2,8 @@ import os
from lxml import etree from lxml import etree
NAMESPACE = {'kgpz': 'https://www.koenigsberger-zeitungen.de'} NAMESPACE = {'kgpz': 'https://www.koenigsberger-zeitungen.de'}
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__)) REPO_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
XML_DIR = os.path.abspath(os.path.join(SCRIPT_DIR, '..', 'XML')) XML_DIR = os.path.join(REPO_ROOT, 'XML')
def parse_xml_file(filepath): def parse_xml_file(filepath):
try: try:
@@ -17,15 +17,16 @@ def parse_xml_file(filepath):
def get_all_ids(root, tag): def get_all_ids(root, tag):
return set(elem.get('id') for elem in root.xpath(f'.//kgpz:{tag}', namespaces=NAMESPACE)) return set(elem.get('id') for elem in root.xpath(f'.//kgpz:{tag}', namespaces=NAMESPACE))
def check_references(beitrag_root, reference_data, filename): def check_references(beitrag_root, reference_data, filepath):
errors = [] errors = []
relative_path = os.path.relpath(filepath, REPO_ROOT)
for ref_type, ref_tag in [('akteur', 'akteur'), ('kategorie', 'kategorie'), for ref_type, ref_tag in [('akteur', 'akteur'), ('kategorie', 'kategorie'),
('ort', 'ort'), ('werk', 'werk')]: ('ort', 'ort'), ('werk', 'werk')]:
for ref in beitrag_root.xpath(f'//kgpz:{ref_tag}', namespaces=NAMESPACE): for ref in beitrag_root.xpath(f'//kgpz:{ref_tag}', namespaces=NAMESPACE):
ref_id = ref.get('ref') ref_id = ref.get('ref')
if ref_id not in reference_data[ref_type]: if ref_id not in reference_data[ref_type]:
line_number = ref.sourceline line_number = ref.sourceline
errors.append((filename, line_number, f"INVALID REFERENCE ({ref_type}:{ref_id})")) errors.append((relative_path, line_number, f"INVALID REFERENCE ({ref_type}:{ref_id})"))
return errors return errors
def main(): def main():
@@ -41,20 +42,21 @@ def main():
beitraege_dir = os.path.join(XML_DIR, 'beitraege') beitraege_dir = os.path.join(XML_DIR, 'beitraege')
for filename in os.listdir(beitraege_dir): for filename in os.listdir(beitraege_dir):
if filename.endswith('-beitraege.xml'): if filename.endswith('-beitraege.xml'):
beitrag_root = parse_xml_file(os.path.join(beitraege_dir, filename)) filepath = os.path.join(beitraege_dir, filename)
beitrag_root = parse_xml_file(filepath)
if beitrag_root is not None: if beitrag_root is not None:
errors = check_references(beitrag_root, reference_data, filename) errors = check_references(beitrag_root, reference_data, filepath)
all_errors.extend(errors) all_errors.extend(errors)
all_errors.sort(key=lambda x: (x[0], x[1])) all_errors.sort(key=lambda x: (x[0], x[1]))
with open('linter_results.txt', 'w') as f: with open('linter_results.txt', 'w') as f:
for filename, line_number, error_message in all_errors: for filepath, line_number, error_message in all_errors:
f.write(f"{filename}:{line_number}:{error_message}\n") f.write(f"{filepath}:{line_number}:{error_message}\n")
if all_errors: if all_errors:
for filename, line_number, error_message in all_errors: for filepath, line_number, error_message in all_errors:
print(f"{filename}, Line {line_number}: {error_message}") print(f"{filepath}, Line {line_number}: {error_message}")
exit(1) # Exit with error code if there are any errors exit(1) # Exit with error code if there are any errors
else: else:
print("No errors found.") print("No errors found.")