diff --git a/xmlmodels/letter.go b/xmlmodels/letter.go
index 860a957..ffb7d72 100644
--- a/xmlmodels/letter.go
+++ b/xmlmodels/letter.go
@@ -3,10 +3,17 @@ package xmlmodels
import (
"encoding/json"
"encoding/xml"
- "io"
- "strconv"
)
+type Letter struct {
+ XMLName xml.Name `xml:"letterText"`
+ Letter int `xml:"letter,attr"`
+ Pages []Page `xml:"page"`
+ Hands []RefElement `xml:"hand"`
+ Content string `xml:",innerxml"`
+ Chardata string `xml:",chardata"`
+}
+
func (l Letter) Keys() []any {
return []any{l.Letter}
}
@@ -23,220 +30,7 @@ func (l Letter) String() string {
return string(json)
}
-type SidenotePosition uint8
-
-const (
- SidenotePositionLeft SidenotePosition = iota
- SidenotePositionRight
- SidenotePositionTop
- SidenotePositionTopLeft
- SidenotePositionTopRight
- SidenotePositionBottom
- SidenotePositionBottomLeft
- SidenotePositionBottomRight
-)
-
-func (sp *SidenotePosition) UnmarshalXMLAttr(attr xml.Attr) error {
- switch attr.Value {
- case "left":
- *sp = SidenotePositionLeft
- case "right":
- *sp = SidenotePositionRight
- case "top":
- *sp = SidenotePositionTop
- case "top left":
- *sp = SidenotePositionTopLeft
- case "top right":
- *sp = SidenotePositionTopRight
- case "bottom":
- *sp = SidenotePositionBottom
- case "bottom left":
- *sp = SidenotePositionBottomLeft
- case "bottom right":
- *sp = SidenotePositionBottomRight
- default:
- *sp = SidenotePositionLeft // Default fallback
- }
- return nil
-}
-
-type Letter struct {
- XMLName xml.Name `xml:"letterText"`
- Letter int
- Pages []Page
-}
-
type Page struct {
- No int
- Letter int
- Sidenotes []Sidenote
- Hands []int
- Tokens []xml.Token
- CharData string
-}
-
-type Sidenote struct {
- XMLName xml.Name
- Position SidenotePosition
- Page int
- Annotation string
- Anchor int
- Tokens []xml.Token
- CharData string
-}
-
-type Char struct {
- Stack []xml.Token
- Value string
-}
-
-func (c Char) String() string {
- return c.Value
-}
-
-func (lt *Letter) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error {
- lt.XMLName = start.Name
- for _, attr := range start.Attr {
- if attr.Name.Local == "letter" {
- if letterNum, err := strconv.Atoi(attr.Value); err == nil {
- lt.Letter = letterNum
- }
- }
- }
-
- if err := lt.parseTokens(d); err != nil {
- return err
- }
-
- return nil
-}
-
-func (lt *Letter) parseTokens(d *xml.Decoder) error {
- stack := []xml.Token{}
- var c_page *Page = nil
-
- for {
- token, err := d.Token()
- if err == io.EOF {
- break
- }
- if err != nil {
- return err
- }
-
- // INFO: Make a copy of the token since Token() reuses the underlying data
- tokenCopy := xml.CopyToken(token)
- if c_page != nil {
- c_page.Tokens = append(c_page.Tokens, tokenCopy)
- }
-
- switch t := tokenCopy.(type) {
- case xml.StartElement:
- switch t.Name.Local {
- case "page":
- if c_page != nil {
- lt.Pages = append(lt.Pages, *c_page)
- }
-
- c_page = &Page{}
-
- for _, attr := range t.Attr {
- if attr.Name.Local == "index" {
- if idx, err := strconv.Atoi(attr.Value); err == nil {
- c_page.No = idx
- }
- }
- }
-
- d.Skip()
-
- // WARNING: UnmarshalXML continues and changes the state of the parser
- case "sidenote":
- var sidenote Sidenote = Sidenote{
- Anchor: len(c_page.Tokens),
- }
- if err := sidenote.UnmarshalXML(d, t); err == nil && c_page != nil {
- c_page.Sidenotes = append(c_page.Sidenotes, sidenote)
- }
-
- // INFO: We create a list of all hand in a letter
- case "hand":
- for _, attr := range t.Attr {
- if attr.Name.Local == "ref" && c_page != nil {
- if ref, err := strconv.Atoi(attr.Value); err == nil {
- c_page.Hands = append(c_page.Hands, ref)
- }
- }
- }
- fallthrough
-
- default:
- if c_page != nil {
- c_page.Tokens = append(c_page.Tokens, tokenCopy)
- }
- }
-
- case xml.CharData:
- if c_page != nil {
- c_page.CharData = string(t)
- c_page.Tokens = append(c_page.Tokens, tokenCopy)
- }
-
- case xml.EndElement:
- if t.Name.Local == "letterText" {
- if c_page != nil {
- lt.Pages = append(lt.Pages, *c_page)
- }
- return nil
- }
-
- if c_page != nil {
- c_page.Tokens = append(c_page.Tokens, tokenCopy)
- }
- }
- }
-
- return nil
-}
-
-func (s *Sidenote) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error {
- // Set the XMLName
- s.XMLName = start.Name
-
- // Parse attributes
- for _, attr := range start.Attr {
- switch attr.Name.Local {
- case "pos":
- s.Position.UnmarshalXMLAttr(attr)
- case "page":
- if page, err := strconv.Atoi(attr.Value); err == nil {
- s.Page = page
- }
- case "annotation":
- s.Annotation = attr.Value
- }
- }
-
- // Collect all content tokens
- for {
- token, err := d.Token()
- if err != nil {
- return err
- }
-
- tokenCopy := xml.CopyToken(token)
-
- switch t := tokenCopy.(type) {
- case xml.EndElement:
- if t.Name.Local == start.Name.Local {
- // End of sidenote element
- return nil
- }
- // Add the end element token to content
- s.Content = append(s.Content, tokenCopy)
- case xml.StartElement, xml.CharData, xml.Comment, xml.ProcInst:
- // Add all other tokens to content
- s.Content = append(s.Content, tokenCopy)
- }
- }
+ XMLName xml.Name `xml:"page"`
+ Index int `xml:"index,attr"`
}
diff --git a/xmlmodels/letter_test.go b/xmlmodels/letter_test.go
deleted file mode 100644
index f18b0f7..0000000
--- a/xmlmodels/letter_test.go
+++ /dev/null
@@ -1,446 +0,0 @@
-package xmlmodels
-
-import (
- "encoding/xml"
- "strings"
- "testing"
-)
-
-// Helper function to convert []xml.Token back to string for testing
-func tokensToString(tokens []xml.Token) string {
- var sb strings.Builder
- for _, token := range tokens {
- switch t := token.(type) {
- case xml.StartElement:
- sb.WriteString("<")
- sb.WriteString(t.Name.Local)
- for _, attr := range t.Attr {
- sb.WriteString(" ")
- sb.WriteString(attr.Name.Local)
- sb.WriteString(`="`)
- sb.WriteString(attr.Value)
- sb.WriteString(`"`)
- }
- sb.WriteString(">")
- case xml.EndElement:
- sb.WriteString("")
- sb.WriteString(t.Name.Local)
- sb.WriteString(">")
- case xml.CharData:
- sb.Write(t)
- case xml.Comment:
- sb.WriteString("")
- case xml.ProcInst:
- sb.WriteString("")
- sb.WriteString(t.Target)
- if len(t.Inst) > 0 {
- sb.WriteString(" ")
- sb.Write(t.Inst)
- }
- sb.WriteString("?>")
- }
- }
- return sb.String()
-}
-
-func TestLetterTextUnmarshal_SimpleCase(t *testing.T) {
- // Simple test case with basic structure
- testXML := `
- Some content before first page break.
-
- Content on page 1 with some markup and more text.
- This is a sidenote
- More content on page 1.
-
- Content on page 2 with bold text.
- Hand reference content
- Final content on page 2.
- `
-
- var letterText LetterText
- err := xml.Unmarshal([]byte(testXML), &letterText)
- if err != nil {
- t.Fatalf("Error unmarshaling XML: %v", err)
- }
-
- // Verify basic structure
- if len(letterText.Pages) != 3 {
- t.Errorf("Expected 3 pages, got %d", len(letterText.Pages))
- }
- if len(letterText.PageBreaks) != 2 {
- t.Errorf("Expected 2 page breaks, got %d", len(letterText.PageBreaks))
- }
- if len(letterText.Sidenotes) != 1 {
- t.Errorf("Expected 1 sidenote, got %d", len(letterText.Sidenotes))
- }
- if letterText.Hands.Reference != 42 {
- t.Errorf("Expected hand reference 42, got %d", letterText.Hands.Reference)
- }
-
- // Verify page breaks
- if letterText.PageBreaks[0].Index != 1 {
- t.Errorf("Expected page break index 1, got %d", letterText.PageBreaks[0].Index)
- }
- if letterText.PageBreaks[1].Index != 2 {
- t.Errorf("Expected page break index 2, got %d", letterText.PageBreaks[1].Index)
- }
-
- // Verify sidenote
- sidenote := letterText.Sidenotes[0]
- if sidenote.Page != 1 {
- t.Errorf("Expected sidenote on page 1, got %d", sidenote.Page)
- }
- if sidenote.Position != SidenotePositionRight {
- t.Errorf("Expected sidenote position right, got %d", sidenote.Position)
- }
- if sidenote.Annotation != "test" {
- t.Errorf("Expected sidenote annotation 'test', got '%s'", sidenote.Annotation)
- }
- sidenoteContent := tokensToString(sidenote.Content)
- if !strings.Contains(sidenoteContent, "This is a sidenote") {
- t.Errorf("Expected sidenote content to contain 'This is a sidenote', got '%s'", sidenoteContent)
- }
-
- // Verify page content doesn't contain sidenote text
- for _, page := range letterText.Pages {
- content := tokensToString(page.Content)
- if strings.Contains(content, "This is a sidenote") {
- t.Errorf("Page content should not contain sidenote text, but page %d does: %s", page.Page, content)
- }
- }
-}
-
-func TestLetterTextUnmarshal_RealExample_Letter1(t *testing.T) {
- // Real example from briefe.xml - Letter 1 (simplified)
- testXML := `
-HochEdelgeborner Hochgelahrter Herr Secretair
-Verehrungswürdigster Gönner!
-
-Ew. HochEdelgebh: haben mich durch die neue Probe von Dero schätzbaren Gewogenheit ausserorndtlich beschämt. Meine Feder ist zu schwach, Denenselben die regen Empfindungen meines Herzens darüber zu schildern.
-lasse mich noch lange das Glück genießen, Dieselben in dem blühendsten Wohlstande zu sehen, und mich mit dem erkenntlichsten Herzen nennen zu dürfen
-
-Hoch Edelgeborner Hochgelahrter Herr Secretair
-Verehrungswürdigster Gönner
-Ew. HochEdelgebh:
-Von Hause, d. 2 Jenner, 1765.
-gehorsamsten Diener
-Jacob Michael Reinhold Lenz
-`
-
- var letterText LetterText
- err := xml.Unmarshal([]byte(testXML), &letterText)
- if err != nil {
- t.Fatalf("Error unmarshaling real XML: %v", err)
- }
-
- // Should have 2 pages
- if len(letterText.Pages) != 2 {
- t.Errorf("Expected 2 pages, got %d", len(letterText.Pages))
- }
- if len(letterText.PageBreaks) != 2 {
- t.Errorf("Expected 2 page breaks, got %d", len(letterText.PageBreaks))
- }
-
- // Verify page content contains expected elements
- page1Found := false
- page2Found := false
- for _, page := range letterText.Pages {
- content := tokensToString(page.Content)
- if page.Page == 1 && strings.Contains(content, "HochEdelgeborner") {
- page1Found = true
- }
- if page.Page == 2 && strings.Contains(content, "Jacob Michael Reinhold Lenz") {
- page2Found = true
- }
- }
-
- if !page1Found {
- t.Error("Page 1 content not found correctly")
- }
- if !page2Found {
- t.Error("Page 2 content not found correctly")
- }
-}
-
-func TestLetterTextUnmarshal_WithSidenotes(t *testing.T) {
- // Real example with sidenotes from briefe.xml
- testXML := `
-Some text before sidenote.
-Ich umarme Dich und küsse Dich 1000mahl als Dein
-allergetreuester Bruder
-Jacob Michael Reinhold Lenz.
-Dorpat den 11ten October 1767.
-More text after sidenote.
-`
-
- var letterText LetterText
- err := xml.Unmarshal([]byte(testXML), &letterText)
- if err != nil {
- t.Fatalf("Error unmarshaling sidenote XML: %v", err)
- }
-
- // Should have 1 sidenote
- if len(letterText.Sidenotes) != 1 {
- t.Errorf("Expected 1 sidenote, got %d", len(letterText.Sidenotes))
- }
-
- // Verify sidenote details
- sidenote := letterText.Sidenotes[0]
- if sidenote.Position != SidenotePositionLeft {
- t.Errorf("Expected sidenote position left, got %d", sidenote.Position)
- }
- if sidenote.Page != 1 {
- t.Errorf("Expected sidenote on page 1, got %d", sidenote.Page)
- }
- if !strings.Contains(sidenote.Annotation, "am linken Rand") {
- t.Errorf("Expected sidenote annotation to contain 'am linken Rand', got '%s'", sidenote.Annotation)
- }
- sidenoteContent := tokensToString(sidenote.Content)
- if !strings.Contains(sidenoteContent, "Jacob Michael Reinhold Lenz") {
- t.Errorf("Expected sidenote content to contain author name, got '%s'", sidenoteContent)
- }
-
- // Verify page content doesn't contain sidenote
- for _, page := range letterText.Pages {
- content := tokensToString(page.Content)
- if strings.Contains(content, "allergetreuester Bruder") {
- t.Errorf("Page content should not contain sidenote text, but page %d does", page.Page)
- }
- }
-}
-
-func TestLetterTextUnmarshal_ComplexSidenotePositions(t *testing.T) {
- // Test different sidenote positions
- testXML := `
-
-Top right sidenote
-Bottom left sidenote
-Top sidenote
-Some content.
-`
-
- var letterText LetterText
- err := xml.Unmarshal([]byte(testXML), &letterText)
- if err != nil {
- t.Fatalf("Error unmarshaling complex sidenotes XML: %v", err)
- }
-
- if len(letterText.Sidenotes) != 3 {
- t.Fatalf("Expected 3 sidenotes, got %d", len(letterText.Sidenotes))
- }
-
- // Check position parsing
- positions := make(map[SidenotePosition]bool)
- for _, sidenote := range letterText.Sidenotes {
- positions[sidenote.Position] = true
- }
-
- expectedPositions := []SidenotePosition{
- SidenotePositionTopRight,
- SidenotePositionBottomLeft,
- SidenotePositionTop,
- }
-
- for _, expected := range expectedPositions {
- if !positions[expected] {
- t.Errorf("Expected to find sidenote position %d, but didn't", expected)
- }
- }
-}
-
-func TestLetterTextUnmarshal_NoPageBreaks(t *testing.T) {
- // Test letter without explicit page breaks
- testXML := `
-This is all content on the default page.
-Some markup and more text.
-Note on single page
-Final text.
-`
-
- var letterText LetterText
- err := xml.Unmarshal([]byte(testXML), &letterText)
- if err != nil {
- t.Fatalf("Error unmarshaling no-page-break XML: %v", err)
- }
-
- // Should have 1 page (default page 1)
- if len(letterText.Pages) != 1 {
- t.Errorf("Expected 1 page, got %d", len(letterText.Pages))
- }
- if len(letterText.PageBreaks) != 0 {
- t.Errorf("Expected 0 page breaks, got %d", len(letterText.PageBreaks))
- }
-
- // Page should be page 1
- if letterText.Pages[0].Page != 1 {
- t.Errorf("Expected page 1, got page %d", letterText.Pages[0].Page)
- }
-
- // Content should contain markup but not sidenote
- content := tokensToString(letterText.Pages[0].Content)
- if !strings.Contains(content, "Some markup") {
- t.Error("Expected page content to contain markup")
- }
- if strings.Contains(content, "Note on single page") {
- t.Error("Page content should not contain sidenote text")
- }
-}
-
-func TestLetterTextUnmarshal_EmptyContent(t *testing.T) {
- // Test edge case with empty content
- testXML := `
-
-`
-
- var letterText LetterText
- err := xml.Unmarshal([]byte(testXML), &letterText)
- if err != nil {
- t.Fatalf("Error unmarshaling empty XML: %v", err)
- }
-
- // Should have no pages with content
- if len(letterText.Pages) != 0 {
- t.Errorf("Expected 0 pages with content, got %d", len(letterText.Pages))
- }
- if len(letterText.PageBreaks) != 1 {
- t.Errorf("Expected 1 page break, got %d", len(letterText.PageBreaks))
- }
-}
-
-func TestSidenotePosition_UnmarshalXMLAttr(t *testing.T) {
- tests := []struct {
- input string
- expected SidenotePosition
- }{
- {"left", SidenotePositionLeft},
- {"right", SidenotePositionRight},
- {"top", SidenotePositionTop},
- {"top left", SidenotePositionTopLeft},
- {"top right", SidenotePositionTopRight},
- {"bottom", SidenotePositionBottom},
- {"bottom left", SidenotePositionBottomLeft},
- {"bottom right", SidenotePositionBottomRight},
- {"unknown", SidenotePositionLeft}, // Default fallback
- }
-
- for _, test := range tests {
- var pos SidenotePosition
- attr := xml.Attr{Value: test.input}
- err := pos.UnmarshalXMLAttr(attr)
- if err != nil {
- t.Errorf("Error unmarshaling position '%s': %v", test.input, err)
- }
- if pos != test.expected {
- t.Errorf("Expected position %d for input '%s', got %d", test.expected, test.input, pos)
- }
- }
-}
-
-func TestLetterTextUnmarshal_PreserveMarkup(t *testing.T) {
- // Test that various markup elements are preserved in page content
- testXML := `
-
-Text with antiqua and bold and italic.
-
-Centered text
-
-Deleted text
-More content with person reference.
-`
-
- var letterText LetterText
- err := xml.Unmarshal([]byte(testXML), &letterText)
- if err != nil {
- t.Fatalf("Error unmarshaling markup XML: %v", err)
- }
-
- if len(letterText.Pages) != 1 {
- t.Fatalf("Expected 1 page, got %d", len(letterText.Pages))
- }
-
- content := tokensToString(letterText.Pages[0].Content)
- expectedMarkup := []string{
- "antiqua",
- "bold",
- "italic",
- "",
- "",
- "",
- "Deleted text",
- "person reference",
- }
-
- for _, markup := range expectedMarkup {
- if !strings.Contains(content, markup) {
- t.Errorf("Expected page content to contain '%s', but it doesn't. Content: %s", markup, content)
- }
- }
-}
-
-func TestLetterTextUnmarshal_LetterAttribute(t *testing.T) {
- // Test that the letter attribute is parsed correctly
- testXML := `
-
-Some content.
-`
-
- var letterText LetterText
- err := xml.Unmarshal([]byte(testXML), &letterText)
- if err != nil {
- t.Fatalf("Error unmarshaling letter attribute XML: %v", err)
- }
-
- // Verify letter attribute is parsed
- if letterText.Letter != 42 {
- t.Errorf("Expected letter attribute 42, got %d", letterText.Letter)
- }
-}
-
-func TestLetterTextUnmarshal_LetterAttribute_AllExistingTests(t *testing.T) {
- // Test that existing test cases also have correct letter attributes
- testCases := []struct {
- name string
- xml string
- expectedLetter int
- }{
- {
- name: "Simple case",
- xml: `
- Some content.
- `,
- expectedLetter: 123,
- },
- {
- name: "Real example letter 1",
- xml: `
- Some content.
- `,
- expectedLetter: 1,
- },
- {
- name: "Letter with sidenotes",
- xml: `
-
- Note
- Content.
- `,
- expectedLetter: 999,
- },
- }
-
- for _, tc := range testCases {
- t.Run(tc.name, func(t *testing.T) {
- var letterText LetterText
- err := xml.Unmarshal([]byte(tc.xml), &letterText)
- if err != nil {
- t.Fatalf("Error unmarshaling XML: %v", err)
- }
-
- if letterText.Letter != tc.expectedLetter {
- t.Errorf("Expected letter attribute %d, got %d", tc.expectedLetter, letterText.Letter)
- }
- })
- }
-}
\ No newline at end of file