From ff78eb7e6d2f5e5dfe70ccf31724394b106b9d1b Mon Sep 17 00:00:00 2001 From: Simon Martens Date: Wed, 5 Feb 2025 20:12:43 +0100 Subject: [PATCH] Bugfix: Kommentare in meta.xml erhalten --- data/xml/meta.xml | 48 +++++++++++++++--------------- data/xsd/meta.xsd | 10 +++++++ scripts/transform-meta-sort.py | 54 ++++++++++++++++++++++++---------- 3 files changed, 73 insertions(+), 39 deletions(-) diff --git a/data/xml/meta.xml b/data/xml/meta.xml index 94207de..fdf8ee7 100644 --- a/data/xml/meta.xml +++ b/data/xml/meta.xml @@ -16,7 +16,6 @@ - Dorpat (Tartu), 11. Oktober 1767 @@ -63,7 +62,7 @@ - Tarwast (Tarvastu), nach 24. Januar 1768 + Tarwast (Tarvastu), nach 24. Januar 1768 @@ -93,7 +92,7 @@ - Dorpat (Tartu), + Dorpat (Tartu), wahrscheinlich Mitte 1770 @@ -109,7 +108,7 @@ - Fort Louis, Ende Mai + Fort Louis, Ende Mai 1772 @@ -373,7 +372,7 @@ - + Landau, 10. Dezember 1772 @@ -727,7 +726,7 @@ - + @@ -758,7 +757,7 @@ - + Straßburg, Wahrscheinlich Juni 1775 @@ -1418,7 +1417,7 @@ - + @@ -1451,7 +1450,7 @@ - Straßburg, 21./22. + Straßburg, 21./22. Januar 1776 @@ -1552,7 +1551,7 @@ Auf dem Weg von Hannover nach Kassel?, 9. Februar 1776 - + @@ -1567,7 +1566,7 @@ Auf dem Weg von Hannover nach Kassel?, Februar 1776 - + @@ -1826,7 +1825,7 @@ - + @@ -2176,7 +2175,7 @@ - + @@ -2767,7 +2766,8 @@ - + @@ -3165,7 +3165,7 @@ - + @@ -3298,12 +3298,12 @@ Weimar [tatsächlich wohl aus Kochberg], 20. September 1776 - + - + @@ -3468,7 +3468,7 @@ Wohl Berka oder Kochberg, Oktober 1776 - + @@ -3483,7 +3483,7 @@ Wohl Berka oder Kochberg, Oktober/November 1776 - + @@ -3595,19 +3595,19 @@ Weimar [wohl Berka], 23. November 1776 - + - + - + @@ -3822,6 +3822,7 @@ + Januar/Februar 1777 @@ -3863,6 +3864,7 @@ + Emmendingen, [vor 22. April 1777], warum nicht 9. April 1777 @@ -3936,7 +3938,7 @@ Schinznach/Zürich, 12.-15. Mai 1777 - + diff --git a/data/xsd/meta.xsd b/data/xsd/meta.xsd index f788f69..540bb84 100644 --- a/data/xsd/meta.xsd +++ b/data/xsd/meta.xsd @@ -58,9 +58,19 @@ + + + + Pflicht: Jeder Akteur hat eine eindeutige ID. + + + + + + diff --git a/scripts/transform-meta-sort.py b/scripts/transform-meta-sort.py index e3b9eed..499f94f 100644 --- a/scripts/transform-meta-sort.py +++ b/scripts/transform-meta-sort.py @@ -1,32 +1,54 @@ -import xml.etree.ElementTree as ET +#!/usr/bin/env python3 -def transform_dates_to_sort_text(input_file, output_file): - tree = ET.parse(input_file) +import sys +from lxml import etree + +def transform_sort_to_date(input_file, output_file): + """ + Transform an XML file: + - Move 's "value" attr as text to + - Remove that element + - Rename to and rename its @value to @when + - Preserve comments + """ + # Use a parser that keeps comments + parser = etree.XMLParser(remove_comments=False) + tree = etree.parse(input_file, parser) root = tree.getroot() - # Find all elements under + # If your XML has namespaces, you'll need namespace-aware searches. + # This example assumes no default namespace is used for 'descriptions' or 'letterDesc'. descriptions = root.find("descriptions") if descriptions is not None: for letter_desc in descriptions.findall("letterDesc"): sent = letter_desc.find("sent") if sent is not None: - # Locate the and elements - date_el = sent.find("date") + # Locate and inside + old_date_el = sent.find("date") sort_el = sent.find("sort") - if date_el is not None and sort_el is not None: - # Move the date attribute's value into sort_el's text - date_value = date_el.get("value") + # 1) Move the old @value into .text and remove + if old_date_el is not None and sort_el is not None: + date_value = old_date_el.get("value") if date_value: sort_el.text = date_value + sent.remove(old_date_el) - # Remove the element from - sent.remove(date_el) + # 2) Rename to , and rename @value to @when + if sort_el is not None: + sort_el.tag = "date" # rename element + old_value = sort_el.attrib.pop("value", None) + if old_value is not None: + sort_el.set("when", old_value) - # Write the modified tree to a new file - tree.write(output_file, encoding="UTF-8", xml_declaration=True) + # Write the modified tree, preserving comments + tree.write(output_file, encoding="UTF-8", xml_declaration=True, pretty_print=True) if __name__ == "__main__": - # Usage example: - # transform_dates_to_sort_text("input.xml", "output.xml") - transform_dates_to_sort_text("../data/xml/meta.xml", "output.xml") + if len(sys.argv) != 3: + print("Usage: {} INPUT.xml OUTPUT.xml".format(sys.argv[0])) + sys.exit(1) + + input_file = sys.argv[1] + output_file = sys.argv[2] + transform_sort_to_date(input_file, output_file)