diff --git a/xmlmodels/letter.go b/xmlmodels/letter.go
index ffb7d72..860a957 100644
--- a/xmlmodels/letter.go
+++ b/xmlmodels/letter.go
@@ -3,17 +3,10 @@ package xmlmodels
import (
"encoding/json"
"encoding/xml"
+ "io"
+ "strconv"
)
-type Letter struct {
- XMLName xml.Name `xml:"letterText"`
- Letter int `xml:"letter,attr"`
- Pages []Page `xml:"page"`
- Hands []RefElement `xml:"hand"`
- Content string `xml:",innerxml"`
- Chardata string `xml:",chardata"`
-}
-
func (l Letter) Keys() []any {
return []any{l.Letter}
}
@@ -30,7 +23,220 @@ func (l Letter) String() string {
return string(json)
}
-type Page struct {
- XMLName xml.Name `xml:"page"`
- Index int `xml:"index,attr"`
+type SidenotePosition uint8
+
+const (
+ SidenotePositionLeft SidenotePosition = iota
+ SidenotePositionRight
+ SidenotePositionTop
+ SidenotePositionTopLeft
+ SidenotePositionTopRight
+ SidenotePositionBottom
+ SidenotePositionBottomLeft
+ SidenotePositionBottomRight
+)
+
+func (sp *SidenotePosition) UnmarshalXMLAttr(attr xml.Attr) error {
+ switch attr.Value {
+ case "left":
+ *sp = SidenotePositionLeft
+ case "right":
+ *sp = SidenotePositionRight
+ case "top":
+ *sp = SidenotePositionTop
+ case "top left":
+ *sp = SidenotePositionTopLeft
+ case "top right":
+ *sp = SidenotePositionTopRight
+ case "bottom":
+ *sp = SidenotePositionBottom
+ case "bottom left":
+ *sp = SidenotePositionBottomLeft
+ case "bottom right":
+ *sp = SidenotePositionBottomRight
+ default:
+ *sp = SidenotePositionLeft // Default fallback
+ }
+ return nil
+}
+
+type Letter struct {
+ XMLName xml.Name `xml:"letterText"`
+ Letter int
+ Pages []Page
+}
+
+type Page struct {
+ No int
+ Letter int
+ Sidenotes []Sidenote
+ Hands []int
+ Tokens []xml.Token
+ CharData string
+}
+
+type Sidenote struct {
+ XMLName xml.Name
+ Position SidenotePosition
+ Page int
+ Annotation string
+ Anchor int
+ Tokens []xml.Token
+ CharData string
+}
+
+type Char struct {
+ Stack []xml.Token
+ Value string
+}
+
+func (c Char) String() string {
+ return c.Value
+}
+
+func (lt *Letter) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error {
+ lt.XMLName = start.Name
+ for _, attr := range start.Attr {
+ if attr.Name.Local == "letter" {
+ if letterNum, err := strconv.Atoi(attr.Value); err == nil {
+ lt.Letter = letterNum
+ }
+ }
+ }
+
+ if err := lt.parseTokens(d); err != nil {
+ return err
+ }
+
+ return nil
+}
+
+func (lt *Letter) parseTokens(d *xml.Decoder) error {
+ stack := []xml.Token{}
+ var c_page *Page = nil
+
+ for {
+ token, err := d.Token()
+ if err == io.EOF {
+ break
+ }
+ if err != nil {
+ return err
+ }
+
+ // INFO: Make a copy of the token since Token() reuses the underlying data
+ tokenCopy := xml.CopyToken(token)
+ if c_page != nil {
+ c_page.Tokens = append(c_page.Tokens, tokenCopy)
+ }
+
+ switch t := tokenCopy.(type) {
+ case xml.StartElement:
+ switch t.Name.Local {
+ case "page":
+ if c_page != nil {
+ lt.Pages = append(lt.Pages, *c_page)
+ }
+
+ c_page = &Page{}
+
+ for _, attr := range t.Attr {
+ if attr.Name.Local == "index" {
+ if idx, err := strconv.Atoi(attr.Value); err == nil {
+ c_page.No = idx
+ }
+ }
+ }
+
+ d.Skip()
+
+ // WARNING: UnmarshalXML continues and changes the state of the parser
+ case "sidenote":
+ var sidenote Sidenote = Sidenote{
+ Anchor: len(c_page.Tokens),
+ }
+ if err := sidenote.UnmarshalXML(d, t); err == nil && c_page != nil {
+ c_page.Sidenotes = append(c_page.Sidenotes, sidenote)
+ }
+
+ // INFO: We create a list of all hand in a letter
+ case "hand":
+ for _, attr := range t.Attr {
+ if attr.Name.Local == "ref" && c_page != nil {
+ if ref, err := strconv.Atoi(attr.Value); err == nil {
+ c_page.Hands = append(c_page.Hands, ref)
+ }
+ }
+ }
+ fallthrough
+
+ default:
+ if c_page != nil {
+ c_page.Tokens = append(c_page.Tokens, tokenCopy)
+ }
+ }
+
+ case xml.CharData:
+ if c_page != nil {
+ c_page.CharData = string(t)
+ c_page.Tokens = append(c_page.Tokens, tokenCopy)
+ }
+
+ case xml.EndElement:
+ if t.Name.Local == "letterText" {
+ if c_page != nil {
+ lt.Pages = append(lt.Pages, *c_page)
+ }
+ return nil
+ }
+
+ if c_page != nil {
+ c_page.Tokens = append(c_page.Tokens, tokenCopy)
+ }
+ }
+ }
+
+ return nil
+}
+
+func (s *Sidenote) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error {
+ // Set the XMLName
+ s.XMLName = start.Name
+
+ // Parse attributes
+ for _, attr := range start.Attr {
+ switch attr.Name.Local {
+ case "pos":
+ s.Position.UnmarshalXMLAttr(attr)
+ case "page":
+ if page, err := strconv.Atoi(attr.Value); err == nil {
+ s.Page = page
+ }
+ case "annotation":
+ s.Annotation = attr.Value
+ }
+ }
+
+ // Collect all content tokens
+ for {
+ token, err := d.Token()
+ if err != nil {
+ return err
+ }
+
+ tokenCopy := xml.CopyToken(token)
+
+ switch t := tokenCopy.(type) {
+ case xml.EndElement:
+ if t.Name.Local == start.Name.Local {
+ // End of sidenote element
+ return nil
+ }
+ // Add the end element token to content
+ s.Content = append(s.Content, tokenCopy)
+ case xml.StartElement, xml.CharData, xml.Comment, xml.ProcInst:
+ // Add all other tokens to content
+ s.Content = append(s.Content, tokenCopy)
+ }
+ }
}
diff --git a/xmlmodels/letter_test.go b/xmlmodels/letter_test.go
new file mode 100644
index 0000000..f18b0f7
--- /dev/null
+++ b/xmlmodels/letter_test.go
@@ -0,0 +1,446 @@
+package xmlmodels
+
+import (
+ "encoding/xml"
+ "strings"
+ "testing"
+)
+
+// Helper function to convert []xml.Token back to string for testing
+func tokensToString(tokens []xml.Token) string {
+ var sb strings.Builder
+ for _, token := range tokens {
+ switch t := token.(type) {
+ case xml.StartElement:
+ sb.WriteString("<")
+ sb.WriteString(t.Name.Local)
+ for _, attr := range t.Attr {
+ sb.WriteString(" ")
+ sb.WriteString(attr.Name.Local)
+ sb.WriteString(`="`)
+ sb.WriteString(attr.Value)
+ sb.WriteString(`"`)
+ }
+ sb.WriteString(">")
+ case xml.EndElement:
+ sb.WriteString("")
+ sb.WriteString(t.Name.Local)
+ sb.WriteString(">")
+ case xml.CharData:
+ sb.Write(t)
+ case xml.Comment:
+ sb.WriteString("")
+ case xml.ProcInst:
+ sb.WriteString("")
+ sb.WriteString(t.Target)
+ if len(t.Inst) > 0 {
+ sb.WriteString(" ")
+ sb.Write(t.Inst)
+ }
+ sb.WriteString("?>")
+ }
+ }
+ return sb.String()
+}
+
+func TestLetterTextUnmarshal_SimpleCase(t *testing.T) {
+ // Simple test case with basic structure
+ testXML := `
+ Some content before first page break.
+
+ Content on page 1 with some markup and more text.
+ This is a sidenote
+ More content on page 1.
+
+ Content on page 2 with bold text.
+ Hand reference content
+ Final content on page 2.
+ `
+
+ var letterText LetterText
+ err := xml.Unmarshal([]byte(testXML), &letterText)
+ if err != nil {
+ t.Fatalf("Error unmarshaling XML: %v", err)
+ }
+
+ // Verify basic structure
+ if len(letterText.Pages) != 3 {
+ t.Errorf("Expected 3 pages, got %d", len(letterText.Pages))
+ }
+ if len(letterText.PageBreaks) != 2 {
+ t.Errorf("Expected 2 page breaks, got %d", len(letterText.PageBreaks))
+ }
+ if len(letterText.Sidenotes) != 1 {
+ t.Errorf("Expected 1 sidenote, got %d", len(letterText.Sidenotes))
+ }
+ if letterText.Hands.Reference != 42 {
+ t.Errorf("Expected hand reference 42, got %d", letterText.Hands.Reference)
+ }
+
+ // Verify page breaks
+ if letterText.PageBreaks[0].Index != 1 {
+ t.Errorf("Expected page break index 1, got %d", letterText.PageBreaks[0].Index)
+ }
+ if letterText.PageBreaks[1].Index != 2 {
+ t.Errorf("Expected page break index 2, got %d", letterText.PageBreaks[1].Index)
+ }
+
+ // Verify sidenote
+ sidenote := letterText.Sidenotes[0]
+ if sidenote.Page != 1 {
+ t.Errorf("Expected sidenote on page 1, got %d", sidenote.Page)
+ }
+ if sidenote.Position != SidenotePositionRight {
+ t.Errorf("Expected sidenote position right, got %d", sidenote.Position)
+ }
+ if sidenote.Annotation != "test" {
+ t.Errorf("Expected sidenote annotation 'test', got '%s'", sidenote.Annotation)
+ }
+ sidenoteContent := tokensToString(sidenote.Content)
+ if !strings.Contains(sidenoteContent, "This is a sidenote") {
+ t.Errorf("Expected sidenote content to contain 'This is a sidenote', got '%s'", sidenoteContent)
+ }
+
+ // Verify page content doesn't contain sidenote text
+ for _, page := range letterText.Pages {
+ content := tokensToString(page.Content)
+ if strings.Contains(content, "This is a sidenote") {
+ t.Errorf("Page content should not contain sidenote text, but page %d does: %s", page.Page, content)
+ }
+ }
+}
+
+func TestLetterTextUnmarshal_RealExample_Letter1(t *testing.T) {
+ // Real example from briefe.xml - Letter 1 (simplified)
+ testXML := `
+HochEdelgeborner Hochgelahrter Herr Secretair
+Verehrungswürdigster Gönner!
+
+Ew. HochEdelgebh: haben mich durch die neue Probe von Dero schätzbaren Gewogenheit ausserorndtlich beschämt. Meine Feder ist zu schwach, Denenselben die regen Empfindungen meines Herzens darüber zu schildern.
+lasse mich noch lange das Glück genießen, Dieselben in dem blühendsten Wohlstande zu sehen, und mich mit dem erkenntlichsten Herzen nennen zu dürfen
+
+Hoch Edelgeborner Hochgelahrter Herr Secretair
+Verehrungswürdigster Gönner
+Ew. HochEdelgebh:
+Von Hause, d. 2 Jenner, 1765.
+gehorsamsten Diener
+Jacob Michael Reinhold Lenz
+`
+
+ var letterText LetterText
+ err := xml.Unmarshal([]byte(testXML), &letterText)
+ if err != nil {
+ t.Fatalf("Error unmarshaling real XML: %v", err)
+ }
+
+ // Should have 2 pages
+ if len(letterText.Pages) != 2 {
+ t.Errorf("Expected 2 pages, got %d", len(letterText.Pages))
+ }
+ if len(letterText.PageBreaks) != 2 {
+ t.Errorf("Expected 2 page breaks, got %d", len(letterText.PageBreaks))
+ }
+
+ // Verify page content contains expected elements
+ page1Found := false
+ page2Found := false
+ for _, page := range letterText.Pages {
+ content := tokensToString(page.Content)
+ if page.Page == 1 && strings.Contains(content, "HochEdelgeborner") {
+ page1Found = true
+ }
+ if page.Page == 2 && strings.Contains(content, "Jacob Michael Reinhold Lenz") {
+ page2Found = true
+ }
+ }
+
+ if !page1Found {
+ t.Error("Page 1 content not found correctly")
+ }
+ if !page2Found {
+ t.Error("Page 2 content not found correctly")
+ }
+}
+
+func TestLetterTextUnmarshal_WithSidenotes(t *testing.T) {
+ // Real example with sidenotes from briefe.xml
+ testXML := `
+Some text before sidenote.
+Ich umarme Dich und küsse Dich 1000mahl als Dein
+allergetreuester Bruder
+Jacob Michael Reinhold Lenz.
+Dorpat den 11ten October 1767.
+More text after sidenote.
+`
+
+ var letterText LetterText
+ err := xml.Unmarshal([]byte(testXML), &letterText)
+ if err != nil {
+ t.Fatalf("Error unmarshaling sidenote XML: %v", err)
+ }
+
+ // Should have 1 sidenote
+ if len(letterText.Sidenotes) != 1 {
+ t.Errorf("Expected 1 sidenote, got %d", len(letterText.Sidenotes))
+ }
+
+ // Verify sidenote details
+ sidenote := letterText.Sidenotes[0]
+ if sidenote.Position != SidenotePositionLeft {
+ t.Errorf("Expected sidenote position left, got %d", sidenote.Position)
+ }
+ if sidenote.Page != 1 {
+ t.Errorf("Expected sidenote on page 1, got %d", sidenote.Page)
+ }
+ if !strings.Contains(sidenote.Annotation, "am linken Rand") {
+ t.Errorf("Expected sidenote annotation to contain 'am linken Rand', got '%s'", sidenote.Annotation)
+ }
+ sidenoteContent := tokensToString(sidenote.Content)
+ if !strings.Contains(sidenoteContent, "Jacob Michael Reinhold Lenz") {
+ t.Errorf("Expected sidenote content to contain author name, got '%s'", sidenoteContent)
+ }
+
+ // Verify page content doesn't contain sidenote
+ for _, page := range letterText.Pages {
+ content := tokensToString(page.Content)
+ if strings.Contains(content, "allergetreuester Bruder") {
+ t.Errorf("Page content should not contain sidenote text, but page %d does", page.Page)
+ }
+ }
+}
+
+func TestLetterTextUnmarshal_ComplexSidenotePositions(t *testing.T) {
+ // Test different sidenote positions
+ testXML := `
+
+Top right sidenote
+Bottom left sidenote
+Top sidenote
+Some content.
+`
+
+ var letterText LetterText
+ err := xml.Unmarshal([]byte(testXML), &letterText)
+ if err != nil {
+ t.Fatalf("Error unmarshaling complex sidenotes XML: %v", err)
+ }
+
+ if len(letterText.Sidenotes) != 3 {
+ t.Fatalf("Expected 3 sidenotes, got %d", len(letterText.Sidenotes))
+ }
+
+ // Check position parsing
+ positions := make(map[SidenotePosition]bool)
+ for _, sidenote := range letterText.Sidenotes {
+ positions[sidenote.Position] = true
+ }
+
+ expectedPositions := []SidenotePosition{
+ SidenotePositionTopRight,
+ SidenotePositionBottomLeft,
+ SidenotePositionTop,
+ }
+
+ for _, expected := range expectedPositions {
+ if !positions[expected] {
+ t.Errorf("Expected to find sidenote position %d, but didn't", expected)
+ }
+ }
+}
+
+func TestLetterTextUnmarshal_NoPageBreaks(t *testing.T) {
+ // Test letter without explicit page breaks
+ testXML := `
+This is all content on the default page.
+Some markup and more text.
+Note on single page
+Final text.
+`
+
+ var letterText LetterText
+ err := xml.Unmarshal([]byte(testXML), &letterText)
+ if err != nil {
+ t.Fatalf("Error unmarshaling no-page-break XML: %v", err)
+ }
+
+ // Should have 1 page (default page 1)
+ if len(letterText.Pages) != 1 {
+ t.Errorf("Expected 1 page, got %d", len(letterText.Pages))
+ }
+ if len(letterText.PageBreaks) != 0 {
+ t.Errorf("Expected 0 page breaks, got %d", len(letterText.PageBreaks))
+ }
+
+ // Page should be page 1
+ if letterText.Pages[0].Page != 1 {
+ t.Errorf("Expected page 1, got page %d", letterText.Pages[0].Page)
+ }
+
+ // Content should contain markup but not sidenote
+ content := tokensToString(letterText.Pages[0].Content)
+ if !strings.Contains(content, "Some markup") {
+ t.Error("Expected page content to contain markup")
+ }
+ if strings.Contains(content, "Note on single page") {
+ t.Error("Page content should not contain sidenote text")
+ }
+}
+
+func TestLetterTextUnmarshal_EmptyContent(t *testing.T) {
+ // Test edge case with empty content
+ testXML := `
+
+`
+
+ var letterText LetterText
+ err := xml.Unmarshal([]byte(testXML), &letterText)
+ if err != nil {
+ t.Fatalf("Error unmarshaling empty XML: %v", err)
+ }
+
+ // Should have no pages with content
+ if len(letterText.Pages) != 0 {
+ t.Errorf("Expected 0 pages with content, got %d", len(letterText.Pages))
+ }
+ if len(letterText.PageBreaks) != 1 {
+ t.Errorf("Expected 1 page break, got %d", len(letterText.PageBreaks))
+ }
+}
+
+func TestSidenotePosition_UnmarshalXMLAttr(t *testing.T) {
+ tests := []struct {
+ input string
+ expected SidenotePosition
+ }{
+ {"left", SidenotePositionLeft},
+ {"right", SidenotePositionRight},
+ {"top", SidenotePositionTop},
+ {"top left", SidenotePositionTopLeft},
+ {"top right", SidenotePositionTopRight},
+ {"bottom", SidenotePositionBottom},
+ {"bottom left", SidenotePositionBottomLeft},
+ {"bottom right", SidenotePositionBottomRight},
+ {"unknown", SidenotePositionLeft}, // Default fallback
+ }
+
+ for _, test := range tests {
+ var pos SidenotePosition
+ attr := xml.Attr{Value: test.input}
+ err := pos.UnmarshalXMLAttr(attr)
+ if err != nil {
+ t.Errorf("Error unmarshaling position '%s': %v", test.input, err)
+ }
+ if pos != test.expected {
+ t.Errorf("Expected position %d for input '%s', got %d", test.expected, test.input, pos)
+ }
+ }
+}
+
+func TestLetterTextUnmarshal_PreserveMarkup(t *testing.T) {
+ // Test that various markup elements are preserved in page content
+ testXML := `
+
+Text with antiqua and bold and italic.
+
+Centered text
+
+Deleted text
+More content with person reference.
+`
+
+ var letterText LetterText
+ err := xml.Unmarshal([]byte(testXML), &letterText)
+ if err != nil {
+ t.Fatalf("Error unmarshaling markup XML: %v", err)
+ }
+
+ if len(letterText.Pages) != 1 {
+ t.Fatalf("Expected 1 page, got %d", len(letterText.Pages))
+ }
+
+ content := tokensToString(letterText.Pages[0].Content)
+ expectedMarkup := []string{
+ "antiqua",
+ "bold",
+ "italic",
+ "",
+ "",
+ "",
+ "Deleted text",
+ "person reference",
+ }
+
+ for _, markup := range expectedMarkup {
+ if !strings.Contains(content, markup) {
+ t.Errorf("Expected page content to contain '%s', but it doesn't. Content: %s", markup, content)
+ }
+ }
+}
+
+func TestLetterTextUnmarshal_LetterAttribute(t *testing.T) {
+ // Test that the letter attribute is parsed correctly
+ testXML := `
+
+Some content.
+`
+
+ var letterText LetterText
+ err := xml.Unmarshal([]byte(testXML), &letterText)
+ if err != nil {
+ t.Fatalf("Error unmarshaling letter attribute XML: %v", err)
+ }
+
+ // Verify letter attribute is parsed
+ if letterText.Letter != 42 {
+ t.Errorf("Expected letter attribute 42, got %d", letterText.Letter)
+ }
+}
+
+func TestLetterTextUnmarshal_LetterAttribute_AllExistingTests(t *testing.T) {
+ // Test that existing test cases also have correct letter attributes
+ testCases := []struct {
+ name string
+ xml string
+ expectedLetter int
+ }{
+ {
+ name: "Simple case",
+ xml: `
+ Some content.
+ `,
+ expectedLetter: 123,
+ },
+ {
+ name: "Real example letter 1",
+ xml: `
+ Some content.
+ `,
+ expectedLetter: 1,
+ },
+ {
+ name: "Letter with sidenotes",
+ xml: `
+
+ Note
+ Content.
+ `,
+ expectedLetter: 999,
+ },
+ }
+
+ for _, tc := range testCases {
+ t.Run(tc.name, func(t *testing.T) {
+ var letterText LetterText
+ err := xml.Unmarshal([]byte(tc.xml), &letterText)
+ if err != nil {
+ t.Fatalf("Error unmarshaling XML: %v", err)
+ }
+
+ if letterText.Letter != tc.expectedLetter {
+ t.Errorf("Expected letter attribute %d, got %d", tc.expectedLetter, letterText.Letter)
+ }
+ })
+ }
+}
\ No newline at end of file