mirror of
https://github.com/Theodor-Springmann-Stiftung/lenz-briefe.git
synced 2025-10-29 17:15:31 +00:00
Verweise linter
This commit is contained in:
File diff suppressed because it is too large
Load Diff
@@ -1,11 +1,17 @@
|
|||||||
import os
|
import os
|
||||||
import json
|
import sys
|
||||||
from lxml import etree
|
from lxml import etree
|
||||||
|
|
||||||
REPO_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
|
REPO_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
|
||||||
XML_DIR = os.path.join(REPO_ROOT, 'data', 'xml')
|
XML_DIR = os.path.join(REPO_ROOT, 'data', 'xml')
|
||||||
|
|
||||||
|
# Namespace map for your "lenz" default namespace
|
||||||
|
NAMESPACE_MAP = {"lenz": "https://lenz-archiv.de"}
|
||||||
|
|
||||||
def parse_xml_file(filepath):
|
def parse_xml_file(filepath):
|
||||||
|
"""
|
||||||
|
Parse an XML file using lxml and return the root element, or None on parse error.
|
||||||
|
"""
|
||||||
try:
|
try:
|
||||||
parser = etree.XMLParser(remove_blank_text=True)
|
parser = etree.XMLParser(remove_blank_text=True)
|
||||||
tree = etree.parse(filepath, parser)
|
tree = etree.parse(filepath, parser)
|
||||||
@@ -14,74 +20,118 @@ def parse_xml_file(filepath):
|
|||||||
print(f"Error parsing {filepath}: {e}")
|
print(f"Error parsing {filepath}: {e}")
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def get_all_ids(root, tag):
|
def get_letter_desc_ids(meta_root):
|
||||||
return set(elem.get('letter') for elem in root.xpath(f'.//{tag}'))
|
"""
|
||||||
|
Retrieve all 'letter' attribute values from <letterDesc letter="...">
|
||||||
|
in the meta.xml file, which uses the default namespace https://lenz-archiv.de.
|
||||||
|
Returns a set of string IDs.
|
||||||
|
"""
|
||||||
|
letter_ids = set()
|
||||||
|
# Find all <letterDesc> in the lenz namespace
|
||||||
|
letter_descs = meta_root.xpath("//lenz:letterDesc", namespaces=NAMESPACE_MAP)
|
||||||
|
for desc in letter_descs:
|
||||||
|
val = desc.get("letter")
|
||||||
|
if val:
|
||||||
|
letter_ids.add(val)
|
||||||
|
return letter_ids
|
||||||
|
|
||||||
def get_all_refs(root, tag, attr):
|
def get_person_def_ids(ref_root):
|
||||||
return set(elem.get(attr) for elem in root.xpath(f'.//{tag}'))
|
"""
|
||||||
|
Retrieve all 'index' attributes from <personDef index="..."> in references.xml.
|
||||||
|
If references.xml is un-namespaced, we can use a non-namespace XPath: //personDef
|
||||||
|
Adjust if references.xml also has a namespace.
|
||||||
|
"""
|
||||||
|
return set(elem.get("index") for elem in ref_root.xpath("//personDef"))
|
||||||
|
|
||||||
|
def get_location_def_ids(ref_root):
|
||||||
|
"""
|
||||||
|
Retrieve all 'index' attributes from <locationDef index="..."> in references.xml.
|
||||||
|
"""
|
||||||
|
return set(elem.get("index") for elem in ref_root.xpath("//locationDef"))
|
||||||
|
|
||||||
|
def get_app_def_ids(ref_root):
|
||||||
|
"""
|
||||||
|
Retrieve all 'index' attributes from <appDef index="..."> in references.xml.
|
||||||
|
"""
|
||||||
|
return set(elem.get("index") for elem in ref_root.xpath("//appDef"))
|
||||||
|
|
||||||
def check_references(root, reference_data, filepath):
|
def check_references(root, reference_data, filepath):
|
||||||
|
"""
|
||||||
|
Check various references across briefe.xml, meta.xml, and traditions.xml
|
||||||
|
against known IDs from meta.xml (letterDesc) and references.xml (personDef, locationDef, appDef).
|
||||||
|
"""
|
||||||
errors = []
|
errors = []
|
||||||
relative_path = os.path.relpath(filepath, REPO_ROOT)
|
relative_path = os.path.relpath(filepath, REPO_ROOT)
|
||||||
|
|
||||||
def add_error(element, ref_type, ref_id):
|
def add_error(element, ref_type, ref_id):
|
||||||
line_number = element.sourceline
|
line_number = element.sourceline
|
||||||
error_message = f"Invalid reference ({ref_type}:{ref_id})"
|
msg = f"Invalid reference ({ref_type}:{ref_id})"
|
||||||
errors.append({
|
errors.append({
|
||||||
"file": relative_path,
|
"file": relative_path,
|
||||||
"line": line_number,
|
"line": line_number,
|
||||||
"message": error_message
|
"message": msg
|
||||||
})
|
})
|
||||||
|
|
||||||
# Check letterText in briefe to letterDesc in meta
|
# 1) Check <letterText letter="..."> references to meta.xml's letterDesc
|
||||||
for letter_text in root.xpath('//letterText'):
|
# (Assumes <letterText> is un-namespaced, in briefe.xml or wherever.)
|
||||||
letter_id = letter_text.get('letter')
|
for letter_text in root.xpath("//letterText"):
|
||||||
if letter_id not in reference_data['letterDesc']:
|
letter_id = letter_text.get("letter")
|
||||||
add_error(letter_text, 'letterText', letter_id)
|
if letter_id not in reference_data["letterDesc"]:
|
||||||
|
add_error(letter_text, "letterText", letter_id)
|
||||||
|
|
||||||
# Check letterTradition in traditions to letterDesc in meta
|
# 2) Check <letterTradition letter="..."> references to meta.xml's letterDesc
|
||||||
for letter_tradition in root.xpath('//letterTradition'):
|
for letter_tradition in root.xpath("//letterTradition"):
|
||||||
letter_id = letter_tradition.get('letter')
|
letter_id = letter_tradition.get("letter")
|
||||||
if letter_id not in reference_data['letterDesc']:
|
if letter_id not in reference_data["letterDesc"]:
|
||||||
add_error(letter_tradition, 'letterTradition', letter_id)
|
add_error(letter_tradition, "letterTradition", letter_id)
|
||||||
|
|
||||||
# Check hand in briefe with personDef in references
|
# 3) Check <location ref="..."> in the lenz namespace, referencing locationDef
|
||||||
for hand in root.xpath('//hand'):
|
# e.g. <location ref="3" />
|
||||||
ref = hand.get('ref')
|
for location_elem in root.xpath("//lenz:location", namespaces=NAMESPACE_MAP):
|
||||||
if ref not in reference_data['personDef']:
|
ref = location_elem.get("ref")
|
||||||
add_error(hand, 'hand', ref)
|
if ref not in reference_data["locationDef"]:
|
||||||
|
add_error(location_elem, "location", ref)
|
||||||
|
|
||||||
# Check sender and receiver in meta with personDef in references
|
# 4) Check <person ref="..."> in the lenz namespace, referencing personDef
|
||||||
for person in root.xpath('//sender | //receiver'):
|
# e.g. <person ref="1" />
|
||||||
ref = person.get('ref')
|
for person_elem in root.xpath("//lenz:person", namespaces=NAMESPACE_MAP):
|
||||||
if ref not in reference_data['personDef']:
|
ref = person_elem.get("ref")
|
||||||
add_error(person, 'sender/receiver', ref)
|
if ref not in reference_data["personDef"]:
|
||||||
|
add_error(person_elem, "person", ref)
|
||||||
|
|
||||||
# Check location in meta with locationDef in references
|
# 5) Check <app ref="..."> (un-namespaced?), referencing appDef
|
||||||
for location in root.xpath('//location'):
|
for app_elem in root.xpath("//app"):
|
||||||
ref = location.get('ref')
|
ref = app_elem.get("ref")
|
||||||
if ref not in reference_data['locationDef']:
|
if ref not in reference_data["appDef"]:
|
||||||
add_error(location, 'location', ref)
|
add_error(app_elem, "app", ref)
|
||||||
|
|
||||||
# Check app in traditions with appDef in references
|
|
||||||
for app in root.xpath('//app'):
|
|
||||||
ref = app.get('ref')
|
|
||||||
if ref not in reference_data['appDef']:
|
|
||||||
add_error(app, 'app', ref)
|
|
||||||
|
|
||||||
return errors
|
return errors
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
|
# Parse meta.xml (has letterDesc in default "lenz" namespace)
|
||||||
|
meta_root = parse_xml_file(os.path.join(XML_DIR, "meta.xml"))
|
||||||
|
if meta_root is None:
|
||||||
|
print("Could not parse meta.xml; aborting.")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
# Parse references.xml for personDef, locationDef, appDef
|
||||||
|
ref_root = parse_xml_file(os.path.join(XML_DIR, "references.xml"))
|
||||||
|
if ref_root is None:
|
||||||
|
print("Could not parse references.xml; aborting.")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
# Build our cross-file reference data
|
||||||
reference_data = {
|
reference_data = {
|
||||||
'letterDesc': get_all_ids(parse_xml_file(os.path.join(XML_DIR, 'meta.xml')), 'letterDesc'),
|
"letterDesc": get_letter_desc_ids(meta_root),
|
||||||
'personDef': get_all_refs(parse_xml_file(os.path.join(XML_DIR, 'references.xml')), 'personDef', 'index'),
|
"personDef": get_person_def_ids(ref_root),
|
||||||
'locationDef': get_all_refs(parse_xml_file(os.path.join(XML_DIR, 'references.xml')), 'locationDef', 'index'),
|
"locationDef": get_location_def_ids(ref_root),
|
||||||
'appDef': get_all_refs(parse_xml_file(os.path.join(XML_DIR, 'references.xml')), 'appDef', 'index'),
|
"appDef": get_app_def_ids(ref_root),
|
||||||
}
|
}
|
||||||
|
|
||||||
all_errors = []
|
all_errors = []
|
||||||
|
|
||||||
files_to_check = ['briefe.xml', 'meta.xml', 'traditions.xml']
|
# Check references in the following files
|
||||||
|
files_to_check = ["briefe.xml", "meta.xml", "traditions.xml"]
|
||||||
for filename in files_to_check:
|
for filename in files_to_check:
|
||||||
filepath = os.path.join(XML_DIR, filename)
|
filepath = os.path.join(XML_DIR, filename)
|
||||||
root = parse_xml_file(filepath)
|
root = parse_xml_file(filepath)
|
||||||
@@ -89,16 +139,17 @@ def main():
|
|||||||
errors = check_references(root, reference_data, filepath)
|
errors = check_references(root, reference_data, filepath)
|
||||||
all_errors.extend(errors)
|
all_errors.extend(errors)
|
||||||
|
|
||||||
|
# Report any errors
|
||||||
if all_errors:
|
if all_errors:
|
||||||
print("The linter found the following errors:")
|
print("The linter found the following errors:")
|
||||||
for error in all_errors:
|
for error in all_errors:
|
||||||
print(f"{error['file']}, Line {error['line']}: {error['message']}")
|
print(f"{error['file']}, Line {error['line']}: {error['message']}")
|
||||||
|
|
||||||
# GitHub Actions output
|
# Print GitHub Actions compatible error lines
|
||||||
for error in all_errors:
|
for error in all_errors:
|
||||||
print(f"::error file={error['file']},line={error['line']}::{error['message']}")
|
print(f"::error file={error['file']},line={error['line']}::{error['message']}")
|
||||||
|
|
||||||
exit(1) # Exit with error code if errors were found
|
sys.exit(1)
|
||||||
else:
|
else:
|
||||||
print("No errors found.")
|
print("No errors found.")
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user