Bugfix: Kommentare in meta.xml erhalten

This commit is contained in:
Simon Martens
2025-02-05 20:12:43 +01:00
parent 630a1eae69
commit ff78eb7e6d
3 changed files with 73 additions and 39 deletions

View File

@@ -1,32 +1,54 @@
import xml.etree.ElementTree as ET
#!/usr/bin/env python3
def transform_dates_to_sort_text(input_file, output_file):
tree = ET.parse(input_file)
import sys
from lxml import etree
def transform_sort_to_date(input_file, output_file):
"""
Transform an XML file:
- Move <date>'s "value" attr as text to <sort>
- Remove that <date> element
- Rename <sort> to <date> and rename its @value to @when
- Preserve comments
"""
# Use a parser that keeps comments
parser = etree.XMLParser(remove_comments=False)
tree = etree.parse(input_file, parser)
root = tree.getroot()
# Find all <letterDesc> elements under <descriptions>
# If your XML has namespaces, you'll need namespace-aware searches.
# This example assumes no default namespace is used for 'descriptions' or 'letterDesc'.
descriptions = root.find("descriptions")
if descriptions is not None:
for letter_desc in descriptions.findall("letterDesc"):
sent = letter_desc.find("sent")
if sent is not None:
# Locate the <date> and <sort> elements
date_el = sent.find("date")
# Locate <date> and <sort> inside <sent>
old_date_el = sent.find("date")
sort_el = sent.find("sort")
if date_el is not None and sort_el is not None:
# Move the date attribute's value into sort_el's text
date_value = date_el.get("value")
# 1) Move the old <date> @value into <sort>.text and remove <date>
if old_date_el is not None and sort_el is not None:
date_value = old_date_el.get("value")
if date_value:
sort_el.text = date_value
sent.remove(old_date_el)
# Remove the <date> element from <sent>
sent.remove(date_el)
# 2) Rename <sort> to <date>, and rename @value to @when
if sort_el is not None:
sort_el.tag = "date" # rename element
old_value = sort_el.attrib.pop("value", None)
if old_value is not None:
sort_el.set("when", old_value)
# Write the modified tree to a new file
tree.write(output_file, encoding="UTF-8", xml_declaration=True)
# Write the modified tree, preserving comments
tree.write(output_file, encoding="UTF-8", xml_declaration=True, pretty_print=True)
if __name__ == "__main__":
# Usage example:
# transform_dates_to_sort_text("input.xml", "output.xml")
transform_dates_to_sort_text("../data/xml/meta.xml", "output.xml")
if len(sys.argv) != 3:
print("Usage: {} INPUT.xml OUTPUT.xml".format(sys.argv[0]))
sys.exit(1)
input_file = sys.argv[1]
output_file = sys.argv[2]
transform_sort_to_date(input_file, output_file)