mirror of
https://github.com/Theodor-Springmann-Stiftung/KGPZ.git
synced 2025-10-29 09:05:30 +00:00
+Page break checks
This commit is contained in:
@@ -1,9 +1,35 @@
|
|||||||
import os
|
import os
|
||||||
from lxml import etree
|
from lxml import etree
|
||||||
|
from collections import defaultdict
|
||||||
|
|
||||||
REPO_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
|
REPO_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
|
||||||
XML_DIR = os.path.join(REPO_ROOT, 'XML')
|
XML_DIR = os.path.join(REPO_ROOT, 'XML')
|
||||||
|
|
||||||
|
def check_order_attribute_conditionally(tree):
|
||||||
|
"""Checks for conditional presence of the 'order' attribute."""
|
||||||
|
errors = []
|
||||||
|
root = tree.getroot()
|
||||||
|
|
||||||
|
ns = {'kgpz': 'https://www.koenigsberger-zeitungen.de'}
|
||||||
|
|
||||||
|
stuecke_without_bis = root.xpath('//kgpz:beitrag/kgpz:stueck[not(@bis)]', namespaces=ns)
|
||||||
|
|
||||||
|
groups = defaultdict(list)
|
||||||
|
for stueck in stuecke_without_bis:
|
||||||
|
key = (stueck.get('when'), stueck.get('nr'), stueck.get('von'))
|
||||||
|
groups[key].append(stueck)
|
||||||
|
|
||||||
|
for key, stuecks in groups.items():
|
||||||
|
if len(stuecks) > 1:
|
||||||
|
for stueck in stuecks:
|
||||||
|
if stueck.get('order') is None:
|
||||||
|
error_msg = (
|
||||||
|
f"Custom Validation Error: <stueck> on line {stueck.sourceline} is part of an ambiguous group "
|
||||||
|
f"(when='{key[0]}', nr='{key[1]}', von='{key[2]}') but is missing the 'order' attribute."
|
||||||
|
)
|
||||||
|
errors.append(error_msg)
|
||||||
|
return errors
|
||||||
|
|
||||||
def validate_xml(xml_file):
|
def validate_xml(xml_file):
|
||||||
errors = []
|
errors = []
|
||||||
try:
|
try:
|
||||||
@@ -20,21 +46,29 @@ def validate_xml(xml_file):
|
|||||||
xsd_doc = etree.parse(xsd_path)
|
xsd_doc = etree.parse(xsd_path)
|
||||||
schema = etree.XMLSchema(xsd_doc)
|
schema = etree.XMLSchema(xsd_doc)
|
||||||
schema.assertValid(tree)
|
schema.assertValid(tree)
|
||||||
print(f"Validation erfolgreich: {xml_file}")
|
print(f"XSD Validation successful: {xml_file}")
|
||||||
|
|
||||||
|
# Perform custom validation after XSD check
|
||||||
|
custom_errors = check_order_attribute_conditionally(tree)
|
||||||
|
if custom_errors:
|
||||||
|
errors.extend(custom_errors)
|
||||||
|
else:
|
||||||
|
print(f"Custom Validation successful: {xml_file}")
|
||||||
|
|
||||||
else:
|
else:
|
||||||
errors.append(f"Schema-Datei nicht gefunden: {xsd_path} für {xml_file}")
|
errors.append(f"Schema file not found: {xsd_path} for {xml_file}")
|
||||||
else:
|
else:
|
||||||
errors.append(f"Keine Schema-Location gefunden in {xml_file}")
|
errors.append(f"No schemaLocation found in {xml_file}")
|
||||||
|
|
||||||
except etree.DocumentInvalid as e:
|
except etree.DocumentInvalid as e:
|
||||||
errors.append(f"Validierungsfehler in {xml_file}:")
|
errors.append(f"Validation error in {xml_file}:")
|
||||||
for error in e.error_log:
|
for error in e.error_log:
|
||||||
errors.append(f" Zeile {error.line}, Spalte {error.column}: {error.message}")
|
errors.append(f" Line {error.line}, Column {error.column}: {error.message}")
|
||||||
except etree.XMLSyntaxError as e:
|
except etree.XMLSyntaxError as e:
|
||||||
errors.append(f"XML-Syntaxfehler in {xml_file}:")
|
errors.append(f"XML syntax error in {xml_file}:")
|
||||||
errors.append(f" Zeile {e.lineno}, Spalte {e.offset}: {e.msg}")
|
errors.append(f" Line {e.lineno}, Column {e.offset}: {e.msg}")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
errors.append(f"Fehler bei der Verarbeitung von {xml_file}: {str(e)}")
|
errors.append(f"Error processing {xml_file}: {str(e)}")
|
||||||
|
|
||||||
return errors
|
return errors
|
||||||
|
|
||||||
@@ -48,14 +82,15 @@ def main():
|
|||||||
all_errors.extend(errors)
|
all_errors.extend(errors)
|
||||||
|
|
||||||
if all_errors:
|
if all_errors:
|
||||||
print("Validierung fehlgeschlagen. Bitte korrigieren Sie die folgenden Fehler:")
|
print("Validation failed. Please correct the following errors:")
|
||||||
with open('schema_validation_errors.txt', 'w') as f:
|
with open('schema_validation_errors.txt', 'w') as f:
|
||||||
for error in all_errors:
|
for error in all_errors:
|
||||||
print(error)
|
print(error)
|
||||||
f.write(f"{error}\n")
|
f.write(f"{error}\n")
|
||||||
exit(1)
|
exit(1)
|
||||||
else:
|
else:
|
||||||
print("Alle XML-Dateien wurden erfolgreich validiert.")
|
print("All XML files were successfully validated.")
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
||||||
|
|
||||||
|
|||||||
@@ -104,5 +104,15 @@
|
|||||||
<xsd:selector xpath="kgpz:beitrag" />
|
<xsd:selector xpath="kgpz:beitrag" />
|
||||||
<xsd:field xpath="@id" />
|
<xsd:field xpath="@id" />
|
||||||
</xsd:unique>
|
</xsd:unique>
|
||||||
|
<xsd:unique name="uniqueStueckJump">
|
||||||
|
<xsd:annotation>
|
||||||
|
<xsd:documentation>Ein Beitrag kann nur einmal von einer Seite auf eine andere springen (Kombination aus when, nr, von und bis muss eindeutig sein).</xsd:documentation>
|
||||||
|
</xsd:annotation>
|
||||||
|
<xsd:selector xpath="kgpz:beitrag/kgpz:stueck"/>
|
||||||
|
<xsd:field xpath="@when"/>
|
||||||
|
<xsd:field xpath="@nr"/>
|
||||||
|
<xsd:field xpath="@von"/>
|
||||||
|
<xsd:field xpath="@bis"/>
|
||||||
|
</xsd:unique>
|
||||||
</xsd:element>
|
</xsd:element>
|
||||||
</xsd:schema>
|
</xsd:schema>
|
||||||
Reference in New Issue
Block a user