package xmlmodels import ( "encoding/xml" "strings" "testing" ) // Helper function to convert []xml.Token back to string for testing func tokensToString(tokens []xml.Token) string { var sb strings.Builder for _, token := range tokens { switch t := token.(type) { case xml.StartElement: sb.WriteString("<") sb.WriteString(t.Name.Local) for _, attr := range t.Attr { sb.WriteString(" ") sb.WriteString(attr.Name.Local) sb.WriteString(`="`) sb.WriteString(attr.Value) sb.WriteString(`"`) } sb.WriteString(">") case xml.EndElement: sb.WriteString("") case xml.CharData: sb.Write(t) case xml.Comment: sb.WriteString("") case xml.ProcInst: sb.WriteString(" 0 { sb.WriteString(" ") sb.Write(t.Inst) } sb.WriteString("?>") } } return sb.String() } func TestLetterTextUnmarshal_SimpleCase(t *testing.T) { // Simple test case with basic structure testXML := ` Some content before first page break. Content on page 1 with some markup and more text. This is a sidenote More content on page 1. Content on page 2 with bold text. Hand reference content Final content on page 2. ` var letterText LetterText err := xml.Unmarshal([]byte(testXML), &letterText) if err != nil { t.Fatalf("Error unmarshaling XML: %v", err) } // Verify basic structure if len(letterText.Pages) != 3 { t.Errorf("Expected 3 pages, got %d", len(letterText.Pages)) } if len(letterText.PageBreaks) != 2 { t.Errorf("Expected 2 page breaks, got %d", len(letterText.PageBreaks)) } if len(letterText.Sidenotes) != 1 { t.Errorf("Expected 1 sidenote, got %d", len(letterText.Sidenotes)) } if letterText.Hands.Reference != 42 { t.Errorf("Expected hand reference 42, got %d", letterText.Hands.Reference) } // Verify page breaks if letterText.PageBreaks[0].Index != 1 { t.Errorf("Expected page break index 1, got %d", letterText.PageBreaks[0].Index) } if letterText.PageBreaks[1].Index != 2 { t.Errorf("Expected page break index 2, got %d", letterText.PageBreaks[1].Index) } // Verify sidenote sidenote := letterText.Sidenotes[0] if sidenote.Page != 1 { t.Errorf("Expected sidenote on page 1, got %d", sidenote.Page) } if sidenote.Position != SidenotePositionRight { t.Errorf("Expected sidenote position right, got %d", sidenote.Position) } if sidenote.Annotation != "test" { t.Errorf("Expected sidenote annotation 'test', got '%s'", sidenote.Annotation) } sidenoteContent := tokensToString(sidenote.Content) if !strings.Contains(sidenoteContent, "This is a sidenote") { t.Errorf("Expected sidenote content to contain 'This is a sidenote', got '%s'", sidenoteContent) } // Verify page content doesn't contain sidenote text for _, page := range letterText.Pages { content := tokensToString(page.Content) if strings.Contains(content, "This is a sidenote") { t.Errorf("Page content should not contain sidenote text, but page %d does: %s", page.Page, content) } } } func TestLetterTextUnmarshal_RealExample_Letter1(t *testing.T) { // Real example from briefe.xml - Letter 1 (simplified) testXML := ` HochEdelgeborner Hochgelahrter Herr Secretair Verehrungswürdigster Gönner! Ew. HochEdelgebh: haben mich durch die neue Probe von Dero schätzbaren Gewogenheit ausserorndtlich beschämt. Meine Feder ist zu schwach, Denenselben die regen Empfindungen meines Herzens darüber zu schildern. lasse mich noch lange das Glück genießen, Dieselben in dem blühendsten Wohlstande zu sehen, und mich mit dem erkenntlichsten Herzen nennen zu dürfen Hoch Edelgeborner Hochgelahrter Herr Secretair Verehrungswürdigster Gönner Ew. HochEdelgebh: Von Hause, d. 2 Jenner, 1765. gehorsamsten Diener Jacob Michael Reinhold Lenz ` var letterText LetterText err := xml.Unmarshal([]byte(testXML), &letterText) if err != nil { t.Fatalf("Error unmarshaling real XML: %v", err) } // Should have 2 pages if len(letterText.Pages) != 2 { t.Errorf("Expected 2 pages, got %d", len(letterText.Pages)) } if len(letterText.PageBreaks) != 2 { t.Errorf("Expected 2 page breaks, got %d", len(letterText.PageBreaks)) } // Verify page content contains expected elements page1Found := false page2Found := false for _, page := range letterText.Pages { content := tokensToString(page.Content) if page.Page == 1 && strings.Contains(content, "HochEdelgeborner") { page1Found = true } if page.Page == 2 && strings.Contains(content, "Jacob Michael Reinhold Lenz") { page2Found = true } } if !page1Found { t.Error("Page 1 content not found correctly") } if !page2Found { t.Error("Page 2 content not found correctly") } } func TestLetterTextUnmarshal_WithSidenotes(t *testing.T) { // Real example with sidenotes from briefe.xml testXML := ` Some text before sidenote. Ich umarme Dich und küsse Dich 1000mahl als Dein allergetreuester Bruder Jacob Michael Reinhold Lenz. Dorpat den 11ten October 1767. More text after sidenote. ` var letterText LetterText err := xml.Unmarshal([]byte(testXML), &letterText) if err != nil { t.Fatalf("Error unmarshaling sidenote XML: %v", err) } // Should have 1 sidenote if len(letterText.Sidenotes) != 1 { t.Errorf("Expected 1 sidenote, got %d", len(letterText.Sidenotes)) } // Verify sidenote details sidenote := letterText.Sidenotes[0] if sidenote.Position != SidenotePositionLeft { t.Errorf("Expected sidenote position left, got %d", sidenote.Position) } if sidenote.Page != 1 { t.Errorf("Expected sidenote on page 1, got %d", sidenote.Page) } if !strings.Contains(sidenote.Annotation, "am linken Rand") { t.Errorf("Expected sidenote annotation to contain 'am linken Rand', got '%s'", sidenote.Annotation) } sidenoteContent := tokensToString(sidenote.Content) if !strings.Contains(sidenoteContent, "Jacob Michael Reinhold Lenz") { t.Errorf("Expected sidenote content to contain author name, got '%s'", sidenoteContent) } // Verify page content doesn't contain sidenote for _, page := range letterText.Pages { content := tokensToString(page.Content) if strings.Contains(content, "allergetreuester Bruder") { t.Errorf("Page content should not contain sidenote text, but page %d does", page.Page) } } } func TestLetterTextUnmarshal_ComplexSidenotePositions(t *testing.T) { // Test different sidenote positions testXML := ` Top right sidenote Bottom left sidenote Top sidenote Some content. ` var letterText LetterText err := xml.Unmarshal([]byte(testXML), &letterText) if err != nil { t.Fatalf("Error unmarshaling complex sidenotes XML: %v", err) } if len(letterText.Sidenotes) != 3 { t.Fatalf("Expected 3 sidenotes, got %d", len(letterText.Sidenotes)) } // Check position parsing positions := make(map[SidenotePosition]bool) for _, sidenote := range letterText.Sidenotes { positions[sidenote.Position] = true } expectedPositions := []SidenotePosition{ SidenotePositionTopRight, SidenotePositionBottomLeft, SidenotePositionTop, } for _, expected := range expectedPositions { if !positions[expected] { t.Errorf("Expected to find sidenote position %d, but didn't", expected) } } } func TestLetterTextUnmarshal_NoPageBreaks(t *testing.T) { // Test letter without explicit page breaks testXML := ` This is all content on the default page. Some markup and more text. Note on single page Final text. ` var letterText LetterText err := xml.Unmarshal([]byte(testXML), &letterText) if err != nil { t.Fatalf("Error unmarshaling no-page-break XML: %v", err) } // Should have 1 page (default page 1) if len(letterText.Pages) != 1 { t.Errorf("Expected 1 page, got %d", len(letterText.Pages)) } if len(letterText.PageBreaks) != 0 { t.Errorf("Expected 0 page breaks, got %d", len(letterText.PageBreaks)) } // Page should be page 1 if letterText.Pages[0].Page != 1 { t.Errorf("Expected page 1, got page %d", letterText.Pages[0].Page) } // Content should contain markup but not sidenote content := tokensToString(letterText.Pages[0].Content) if !strings.Contains(content, "Some markup") { t.Error("Expected page content to contain markup") } if strings.Contains(content, "Note on single page") { t.Error("Page content should not contain sidenote text") } } func TestLetterTextUnmarshal_EmptyContent(t *testing.T) { // Test edge case with empty content testXML := ` ` var letterText LetterText err := xml.Unmarshal([]byte(testXML), &letterText) if err != nil { t.Fatalf("Error unmarshaling empty XML: %v", err) } // Should have no pages with content if len(letterText.Pages) != 0 { t.Errorf("Expected 0 pages with content, got %d", len(letterText.Pages)) } if len(letterText.PageBreaks) != 1 { t.Errorf("Expected 1 page break, got %d", len(letterText.PageBreaks)) } } func TestSidenotePosition_UnmarshalXMLAttr(t *testing.T) { tests := []struct { input string expected SidenotePosition }{ {"left", SidenotePositionLeft}, {"right", SidenotePositionRight}, {"top", SidenotePositionTop}, {"top left", SidenotePositionTopLeft}, {"top right", SidenotePositionTopRight}, {"bottom", SidenotePositionBottom}, {"bottom left", SidenotePositionBottomLeft}, {"bottom right", SidenotePositionBottomRight}, {"unknown", SidenotePositionLeft}, // Default fallback } for _, test := range tests { var pos SidenotePosition attr := xml.Attr{Value: test.input} err := pos.UnmarshalXMLAttr(attr) if err != nil { t.Errorf("Error unmarshaling position '%s': %v", test.input, err) } if pos != test.expected { t.Errorf("Expected position %d for input '%s', got %d", test.expected, test.input, pos) } } } func TestLetterTextUnmarshal_PreserveMarkup(t *testing.T) { // Test that various markup elements are preserved in page content testXML := ` Text with antiqua and bold and italic. Centered text
    Underlined text
Deleted text More content with person reference.
` var letterText LetterText err := xml.Unmarshal([]byte(testXML), &letterText) if err != nil { t.Fatalf("Error unmarshaling markup XML: %v", err) } if len(letterText.Pages) != 1 { t.Fatalf("Expected 1 page, got %d", len(letterText.Pages)) } content := tokensToString(letterText.Pages[0].Content) expectedMarkup := []string{ "antiqua", "bold", "italic", "", "", "", "Deleted text", "person reference", } for _, markup := range expectedMarkup { if !strings.Contains(content, markup) { t.Errorf("Expected page content to contain '%s', but it doesn't. Content: %s", markup, content) } } } func TestLetterTextUnmarshal_LetterAttribute(t *testing.T) { // Test that the letter attribute is parsed correctly testXML := ` Some content. ` var letterText LetterText err := xml.Unmarshal([]byte(testXML), &letterText) if err != nil { t.Fatalf("Error unmarshaling letter attribute XML: %v", err) } // Verify letter attribute is parsed if letterText.Letter != 42 { t.Errorf("Expected letter attribute 42, got %d", letterText.Letter) } } func TestLetterTextUnmarshal_LetterAttribute_AllExistingTests(t *testing.T) { // Test that existing test cases also have correct letter attributes testCases := []struct { name string xml string expectedLetter int }{ { name: "Simple case", xml: ` Some content. `, expectedLetter: 123, }, { name: "Real example letter 1", xml: ` Some content. `, expectedLetter: 1, }, { name: "Letter with sidenotes", xml: ` Note Content. `, expectedLetter: 999, }, } for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { var letterText LetterText err := xml.Unmarshal([]byte(tc.xml), &letterText) if err != nil { t.Fatalf("Error unmarshaling XML: %v", err) } if letterText.Letter != tc.expectedLetter { t.Errorf("Expected letter attribute %d, got %d", tc.expectedLetter, letterText.Letter) } }) } }