mirror of
				https://github.com/Theodor-Springmann-Stiftung/lenz-web.git
				synced 2025-10-30 01:35:32 +00:00 
			
		
		
		
	Tests & extended func
This commit is contained in:
		| @@ -34,6 +34,7 @@ type Page struct { | |||||||
| 	Sidenotes []Sidenote | 	Sidenotes []Sidenote | ||||||
| 	Hands     []int | 	Hands     []int | ||||||
| 	Tokens    []xml.Token | 	Tokens    []xml.Token | ||||||
|  | 	TokenInfo []Token // Stack and index info for each token | ||||||
| } | } | ||||||
|  |  | ||||||
| type Sidenote struct { | type Sidenote struct { | ||||||
| @@ -44,6 +45,7 @@ type Sidenote struct { | |||||||
| 	Anchor     int | 	Anchor     int | ||||||
| 	Tokens     []xml.Token | 	Tokens     []xml.Token | ||||||
| 	CharData   string | 	CharData   string | ||||||
|  | 	TokenInfo  []Token // Stack and index info for each token | ||||||
| } | } | ||||||
|  |  | ||||||
| func (l Letter) Keys() []any { | func (l Letter) Keys() []any { | ||||||
| @@ -62,6 +64,15 @@ func (l Letter) String() string { | |||||||
| 	return string(json) | 	return string(json) | ||||||
| } | } | ||||||
|  |  | ||||||
|  | func (l Letter) Hands() []int { | ||||||
|  | 	h := []int{} | ||||||
|  |  | ||||||
|  | 	for _, page := range l.Pages { | ||||||
|  | 		h = append(h, page.Hands...) | ||||||
|  | 	} | ||||||
|  | 	return h | ||||||
|  | } | ||||||
|  |  | ||||||
| type SidenotePosition uint8 | type SidenotePosition uint8 | ||||||
|  |  | ||||||
| func (sp *SidenotePosition) UnmarshalXMLAttr(attr xml.Attr) error { | func (sp *SidenotePosition) UnmarshalXMLAttr(attr xml.Attr) error { | ||||||
| @@ -108,6 +119,7 @@ func (lt *Letter) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error { | |||||||
| func (lt *Letter) parseTokens(d *xml.Decoder) error { | func (lt *Letter) parseTokens(d *xml.Decoder) error { | ||||||
| 	b := strings.Builder{} | 	b := strings.Builder{} | ||||||
| 	var c_page *Page = nil | 	var c_page *Page = nil | ||||||
|  | 	var stack []string // Track element stack | ||||||
|  |  | ||||||
| 	for { | 	for { | ||||||
| 		token, err := d.Token() | 		token, err := d.Token() | ||||||
| @@ -170,19 +182,29 @@ func (lt *Letter) parseTokens(d *xml.Decoder) error { | |||||||
| 			default: | 			default: | ||||||
| 				if c_page != nil { | 				if c_page != nil { | ||||||
| 					c_page.Tokens = append(c_page.Tokens, tokenCopy) | 					c_page.Tokens = append(c_page.Tokens, tokenCopy) | ||||||
|  | 					token := NewTokenFromXMLToken(tokenCopy, stack, len(c_page.Tokens)-1) | ||||||
|  | 					c_page.TokenInfo = append(c_page.TokenInfo, token) | ||||||
| 				} | 				} | ||||||
|  | 				stack = append(stack, t.Name.Local) | ||||||
| 			} | 			} | ||||||
|  |  | ||||||
| 		case xml.CharData: | 		case xml.CharData: | ||||||
| 			b.WriteString(string(t)) | 			b.WriteString(string(t)) | ||||||
| 			if c_page != nil { | 			if c_page != nil { | ||||||
| 				c_page.Tokens = append(c_page.Tokens, tokenCopy) | 				c_page.Tokens = append(c_page.Tokens, tokenCopy) | ||||||
|  | 				token := NewTokenFromXMLToken(tokenCopy, stack, len(c_page.Tokens)-1) | ||||||
|  | 				c_page.TokenInfo = append(c_page.TokenInfo, token) | ||||||
| 			} | 			} | ||||||
|  |  | ||||||
| 		case xml.EndElement: | 		case xml.EndElement: | ||||||
|  | 			if len(stack) > 0 && stack[len(stack)-1] == t.Name.Local { | ||||||
|  | 				stack = stack[:len(stack)-1] | ||||||
|  | 			} | ||||||
|  |  | ||||||
| 			if t.Name.Local == "letterText" { | 			if t.Name.Local == "letterText" { | ||||||
| 				if c_page != nil { | 				// Don't add letterText end element to page tokens | ||||||
| 					c_page.Tokens = append(c_page.Tokens, tokenCopy) | 				// Only save page if it has actual content | ||||||
|  | 				if c_page != nil && len(c_page.Tokens) > 0 { | ||||||
| 					lt.Pages = append(lt.Pages, *c_page) | 					lt.Pages = append(lt.Pages, *c_page) | ||||||
| 				} | 				} | ||||||
| 				lt.CharData = b.String() | 				lt.CharData = b.String() | ||||||
| @@ -191,6 +213,8 @@ func (lt *Letter) parseTokens(d *xml.Decoder) error { | |||||||
|  |  | ||||||
| 			if c_page != nil { | 			if c_page != nil { | ||||||
| 				c_page.Tokens = append(c_page.Tokens, tokenCopy) | 				c_page.Tokens = append(c_page.Tokens, tokenCopy) | ||||||
|  | 				token := NewTokenFromXMLToken(tokenCopy, stack, len(c_page.Tokens)-1) | ||||||
|  | 				c_page.TokenInfo = append(c_page.TokenInfo, token) | ||||||
| 			} | 			} | ||||||
| 		} | 		} | ||||||
| 	} | 	} | ||||||
| @@ -201,6 +225,7 @@ func (lt *Letter) parseTokens(d *xml.Decoder) error { | |||||||
| func (s *Sidenote) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error { | func (s *Sidenote) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error { | ||||||
| 	b := strings.Builder{} | 	b := strings.Builder{} | ||||||
| 	s.XMLName = start.Name | 	s.XMLName = start.Name | ||||||
|  | 	var stack []string // Track element stack within sidenote | ||||||
|  |  | ||||||
| 	for _, attr := range start.Attr { | 	for _, attr := range start.Attr { | ||||||
| 		switch attr.Name.Local { | 		switch attr.Name.Local { | ||||||
| @@ -224,18 +249,35 @@ func (s *Sidenote) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error { | |||||||
| 		tokenCopy := xml.CopyToken(token) | 		tokenCopy := xml.CopyToken(token) | ||||||
|  |  | ||||||
| 		switch t := tokenCopy.(type) { | 		switch t := tokenCopy.(type) { | ||||||
|  | 		case xml.StartElement: | ||||||
|  | 			s.Tokens = append(s.Tokens, tokenCopy) | ||||||
|  | 			token := NewTokenFromXMLToken(tokenCopy, stack, len(s.Tokens)-1) | ||||||
|  | 			s.TokenInfo = append(s.TokenInfo, token) | ||||||
|  | 			stack = append(stack, t.Name.Local) | ||||||
|  |  | ||||||
| 		case xml.CharData: | 		case xml.CharData: | ||||||
| 			b.WriteString(string(t)) | 			b.WriteString(string(t)) | ||||||
| 			s.Tokens = append(s.Tokens, tokenCopy) | 			s.Tokens = append(s.Tokens, tokenCopy) | ||||||
| 		// WARNING: this is a problem for sidenotes within sidenotes | 			token := NewTokenFromXMLToken(tokenCopy, stack, len(s.Tokens)-1) | ||||||
|  | 			s.TokenInfo = append(s.TokenInfo, token) | ||||||
|  |  | ||||||
| 		case xml.EndElement: | 		case xml.EndElement: | ||||||
|  | 			if len(stack) > 0 && stack[len(stack)-1] == t.Name.Local { | ||||||
|  | 				stack = stack[:len(stack)-1] | ||||||
|  | 			} | ||||||
|  |  | ||||||
| 			if t.Name.Local == start.Name.Local { | 			if t.Name.Local == start.Name.Local { | ||||||
| 				s.CharData = b.String() | 				s.CharData = b.String() | ||||||
| 				return nil | 				return nil | ||||||
| 			} | 			} | ||||||
| 			s.Tokens = append(s.Tokens, tokenCopy) | 			s.Tokens = append(s.Tokens, tokenCopy) | ||||||
|  | 			token := NewTokenFromXMLToken(tokenCopy, stack, len(s.Tokens)-1) | ||||||
|  | 			s.TokenInfo = append(s.TokenInfo, token) | ||||||
|  |  | ||||||
| 		default: | 		default: | ||||||
| 			s.Tokens = append(s.Tokens, tokenCopy) | 			s.Tokens = append(s.Tokens, tokenCopy) | ||||||
|  | 			token := NewTokenFromXMLToken(tokenCopy, stack, len(s.Tokens)-1) | ||||||
|  | 			s.TokenInfo = append(s.TokenInfo, token) | ||||||
| 		} | 		} | ||||||
| 	} | 	} | ||||||
| } | } | ||||||
|   | |||||||
| @@ -2,192 +2,230 @@ package xmlmodels | |||||||
|  |  | ||||||
| import ( | import ( | ||||||
| 	"encoding/xml" | 	"encoding/xml" | ||||||
|  | 	"io" | ||||||
|  | 	"os" | ||||||
| 	"strings" | 	"strings" | ||||||
| 	"testing" | 	"testing" | ||||||
| ) | ) | ||||||
|  |  | ||||||
| // Helper function to convert []xml.Token back to string for testing | // Test data from real briefe.xml | ||||||
| func tokensToString(tokens []xml.Token) string { | const testLetter1 = `<letterText letter="1"> | ||||||
| 	var sb strings.Builder |  | ||||||
| 	for _, token := range tokens { |  | ||||||
| 		switch t := token.(type) { |  | ||||||
| 		case xml.StartElement: |  | ||||||
| 			sb.WriteString("<") |  | ||||||
| 			sb.WriteString(t.Name.Local) |  | ||||||
| 			for _, attr := range t.Attr { |  | ||||||
| 				sb.WriteString(" ") |  | ||||||
| 				sb.WriteString(attr.Name.Local) |  | ||||||
| 				sb.WriteString(`="`) |  | ||||||
| 				sb.WriteString(attr.Value) |  | ||||||
| 				sb.WriteString(`"`) |  | ||||||
| 			} |  | ||||||
| 			sb.WriteString(">") |  | ||||||
| 		case xml.EndElement: |  | ||||||
| 			sb.WriteString("</") |  | ||||||
| 			sb.WriteString(t.Name.Local) |  | ||||||
| 			sb.WriteString(">") |  | ||||||
| 		case xml.CharData: |  | ||||||
| 			sb.Write(t) |  | ||||||
| 		case xml.Comment: |  | ||||||
| 			sb.WriteString("<!--") |  | ||||||
| 			sb.Write(t) |  | ||||||
| 			sb.WriteString("-->") |  | ||||||
| 		case xml.ProcInst: |  | ||||||
| 			sb.WriteString("<?") |  | ||||||
| 			sb.WriteString(t.Target) |  | ||||||
| 			if len(t.Inst) > 0 { |  | ||||||
| 				sb.WriteString(" ") |  | ||||||
| 				sb.Write(t.Inst) |  | ||||||
| 			} |  | ||||||
| 			sb.WriteString("?>") |  | ||||||
| 		} |  | ||||||
| 	} |  | ||||||
| 	return sb.String() |  | ||||||
| } |  | ||||||
|  |  | ||||||
| func TestLetterTextUnmarshal_SimpleCase(t *testing.T) { |  | ||||||
| 	// Simple test case with basic structure |  | ||||||
| 	testXML := `<letterText xmlns="https://lenz-archiv.de" letter="123"> |  | ||||||
| 		Some content before first page break. |  | ||||||
| 		<page index="1"/> |  | ||||||
| 		Content on page 1 with <aq>some markup</aq> and more text. |  | ||||||
| 		<sidenote pos="right" page="1" annotation="test">This is a sidenote</sidenote> |  | ||||||
| 		More content on page 1. |  | ||||||
| 		<page index="2"/> |  | ||||||
| 		Content on page 2 with <b>bold text</b>. |  | ||||||
| 		<hand ref="42">Hand reference content</hand> |  | ||||||
| 		Final content on page 2. |  | ||||||
| 	</letterText>` |  | ||||||
|  |  | ||||||
| 	var letterText LetterText |  | ||||||
| 	err := xml.Unmarshal([]byte(testXML), &letterText) |  | ||||||
| 	if err != nil { |  | ||||||
| 		t.Fatalf("Error unmarshaling XML: %v", err) |  | ||||||
| 	} |  | ||||||
|  |  | ||||||
| 	// Verify basic structure |  | ||||||
| 	if len(letterText.Pages) != 3 { |  | ||||||
| 		t.Errorf("Expected 3 pages, got %d", len(letterText.Pages)) |  | ||||||
| 	} |  | ||||||
| 	if len(letterText.PageBreaks) != 2 { |  | ||||||
| 		t.Errorf("Expected 2 page breaks, got %d", len(letterText.PageBreaks)) |  | ||||||
| 	} |  | ||||||
| 	if len(letterText.Sidenotes) != 1 { |  | ||||||
| 		t.Errorf("Expected 1 sidenote, got %d", len(letterText.Sidenotes)) |  | ||||||
| 	} |  | ||||||
| 	if letterText.Hands.Reference != 42 { |  | ||||||
| 		t.Errorf("Expected hand reference 42, got %d", letterText.Hands.Reference) |  | ||||||
| 	} |  | ||||||
|  |  | ||||||
| 	// Verify page breaks |  | ||||||
| 	if letterText.PageBreaks[0].Index != 1 { |  | ||||||
| 		t.Errorf("Expected page break index 1, got %d", letterText.PageBreaks[0].Index) |  | ||||||
| 	} |  | ||||||
| 	if letterText.PageBreaks[1].Index != 2 { |  | ||||||
| 		t.Errorf("Expected page break index 2, got %d", letterText.PageBreaks[1].Index) |  | ||||||
| 	} |  | ||||||
|  |  | ||||||
| 	// Verify sidenote |  | ||||||
| 	sidenote := letterText.Sidenotes[0] |  | ||||||
| 	if sidenote.Page != 1 { |  | ||||||
| 		t.Errorf("Expected sidenote on page 1, got %d", sidenote.Page) |  | ||||||
| 	} |  | ||||||
| 	if sidenote.Position != SidenotePositionRight { |  | ||||||
| 		t.Errorf("Expected sidenote position right, got %d", sidenote.Position) |  | ||||||
| 	} |  | ||||||
| 	if sidenote.Annotation != "test" { |  | ||||||
| 		t.Errorf("Expected sidenote annotation 'test', got '%s'", sidenote.Annotation) |  | ||||||
| 	} |  | ||||||
| 	sidenoteContent := tokensToString(sidenote.Content) |  | ||||||
| 	if !strings.Contains(sidenoteContent, "This is a sidenote") { |  | ||||||
| 		t.Errorf("Expected sidenote content to contain 'This is a sidenote', got '%s'", sidenoteContent) |  | ||||||
| 	} |  | ||||||
|  |  | ||||||
| 	// Verify page content doesn't contain sidenote text |  | ||||||
| 	for _, page := range letterText.Pages { |  | ||||||
| 		content := tokensToString(page.Content) |  | ||||||
| 		if strings.Contains(content, "This is a sidenote") { |  | ||||||
| 			t.Errorf("Page content should not contain sidenote text, but page %d does: %s", page.Page, content) |  | ||||||
| 		} |  | ||||||
| 	} |  | ||||||
| } |  | ||||||
|  |  | ||||||
| func TestLetterTextUnmarshal_RealExample_Letter1(t *testing.T) { |  | ||||||
| 	// Real example from briefe.xml - Letter 1 (simplified) |  | ||||||
| 	testXML := `<letterText xmlns="https://lenz-archiv.de" letter="1"> |  | ||||||
| <page index="1" /><align pos="right">HochEdelgeborner Hochgelahrter Herr <aq>Secretair</aq></align> | <page index="1" /><align pos="right">HochEdelgeborner Hochgelahrter Herr <aq>Secretair</aq></align> | ||||||
| <line type="break" tab="7" /><align pos="right">Verehrungswürdigster Gönner!</align> | <line type="break" tab="7" /><align pos="right">Verehrungswürdigster Gönner!</align> | ||||||
| <line type="empty" /> | <line type="empty" /> | ||||||
| Ew. HochEdelgebh: haben mich durch die neue Probe von Dero schätzbaren Gewogenheit ausserorndtlich beschämt. Meine Feder ist zu schwach, Denenselben die regen Empfindungen meines Herzens darüber zu schildern. | Ew. HochEdelgebh: haben mich durch die neue Probe von Dero schätzbaren Gewogenheit ausserorndtlich beschämt. | ||||||
| <page index="2" />lasse mich noch lange das Glück genießen, Dieselben in dem blühendsten Wohlstande zu sehen, und mich mit dem erkenntlichsten Herzen nennen zu dürfen | <page index="2" />lasse mich noch lange das Glück genießen, Dieselben in dem blühendsten Wohlstande zu sehen. | ||||||
| <line type="empty" /> | <line type="break" /><align pos="right">gehorsamsten Diener | ||||||
| <line type="break" /><align pos="right">Hoch Edelgeborner Hochgelahrter Herr <aq>Secretair</aq> |  | ||||||
| <line type="break" tab="7" />Verehrungswürdigster Gönner |  | ||||||
| <line type="break" tab="7" />Ew. HochEdelgebh:</align> |  | ||||||
| Von Hause, d. 2 Jenner, 1765. |  | ||||||
| <align pos="right">gehorsamsten Diener |  | ||||||
| <line type="break" />Jacob Michael Reinhold Lenz</align> | <line type="break" />Jacob Michael Reinhold Lenz</align> | ||||||
| </letterText>` | </letterText>` | ||||||
|  |  | ||||||
| 	var letterText LetterText | const testLetterWithSidenote = `<letterText letter="2"> | ||||||
| 	err := xml.Unmarshal([]byte(testXML), &letterText) | <page index="1" />Text before sidenote. | ||||||
| 	if err != nil { | <sidenote pos="left" page="1" annotation="am linken Rand der zweiten Seite">Ich umarme Dich und küsse Dich 1000mahl als Dein | ||||||
| 		t.Fatalf("Error unmarshaling real XML: %v", err) |  | ||||||
| 	} |  | ||||||
|  |  | ||||||
| 	// Should have 2 pages |  | ||||||
| 	if len(letterText.Pages) != 2 { |  | ||||||
| 		t.Errorf("Expected 2 pages, got %d", len(letterText.Pages)) |  | ||||||
| 	} |  | ||||||
| 	if len(letterText.PageBreaks) != 2 { |  | ||||||
| 		t.Errorf("Expected 2 page breaks, got %d", len(letterText.PageBreaks)) |  | ||||||
| 	} |  | ||||||
|  |  | ||||||
| 	// Verify page content contains expected elements |  | ||||||
| 	page1Found := false |  | ||||||
| 	page2Found := false |  | ||||||
| 	for _, page := range letterText.Pages { |  | ||||||
| 		content := tokensToString(page.Content) |  | ||||||
| 		if page.Page == 1 && strings.Contains(content, "HochEdelgeborner") { |  | ||||||
| 			page1Found = true |  | ||||||
| 		} |  | ||||||
| 		if page.Page == 2 && strings.Contains(content, "Jacob Michael Reinhold Lenz") { |  | ||||||
| 			page2Found = true |  | ||||||
| 		} |  | ||||||
| 	} |  | ||||||
|  |  | ||||||
| 	if !page1Found { |  | ||||||
| 		t.Error("Page 1 content not found correctly") |  | ||||||
| 	} |  | ||||||
| 	if !page2Found { |  | ||||||
| 		t.Error("Page 2 content not found correctly") |  | ||||||
| 	} |  | ||||||
| } |  | ||||||
|  |  | ||||||
| func TestLetterTextUnmarshal_WithSidenotes(t *testing.T) { |  | ||||||
| 	// Real example with sidenotes from briefe.xml |  | ||||||
| 	testXML := `<letterText xmlns="https://lenz-archiv.de" letter="2"> |  | ||||||
| <page index="1" />Some text before sidenote. |  | ||||||
| <sidenote pos="left" page="1" annotation=" am linken Rand der zweiten Seite, vertikal">Ich umarme Dich und küsse Dich 1000mahl als Dein |  | ||||||
| <line type="break" />allergetreuester Bruder | <line type="break" />allergetreuester Bruder | ||||||
| <line type="break" />Jacob Michael Reinhold Lenz. | <line type="break" />Jacob Michael Reinhold Lenz.</sidenote> | ||||||
| <line type="break" />Dorpat den 11ten October 1767.</sidenote> |  | ||||||
| More text after sidenote. | More text after sidenote. | ||||||
| </letterText>` | </letterText>` | ||||||
|  |  | ||||||
| 	var letterText LetterText | const testLetterComplexStructure = `<letterText letter="3"> | ||||||
| 	err := xml.Unmarshal([]byte(testXML), &letterText) | <page index="1" /> | ||||||
|  | <align pos="center">Verehrungswürdigste Eltern!</align> | ||||||
|  | <line type="empty" /> | ||||||
|  | Nach einer langsamen Reise sind wir angekommen. | ||||||
|  | <page index="2" /> | ||||||
|  | Die Wittwe ist eine <aq>simple</aq> Frau. | ||||||
|  | <hand ref="42">Hand reference content</hand> | ||||||
|  | Final content. | ||||||
|  | <page index="3" /> | ||||||
|  | Last page content with <b>markup</b>. | ||||||
|  | </letterText>` | ||||||
|  |  | ||||||
|  | func TestNewTokenFromXMLToken(t *testing.T) { | ||||||
|  | 	tests := []struct { | ||||||
|  | 		name     string | ||||||
|  | 		xmlToken xml.Token | ||||||
|  | 		stack    []string | ||||||
|  | 		index    int | ||||||
|  | 		expected Token | ||||||
|  | 	}{ | ||||||
|  | 		{ | ||||||
|  | 			name:     "StartElement with attributes", | ||||||
|  | 			xmlToken: xml.StartElement{Name: xml.Name{Local: "page"}, Attr: []xml.Attr{{Name: xml.Name{Local: "index"}, Value: "1"}}}, | ||||||
|  | 			stack:    []string{"letterText"}, | ||||||
|  | 			index:    5, | ||||||
|  | 			expected: Token{ | ||||||
|  | 				Index:      5, | ||||||
|  | 				Stack:      []string{"letterText"}, | ||||||
|  | 				Attributes: map[string]string{"index": "1"}, | ||||||
|  | 			}, | ||||||
|  | 		}, | ||||||
|  | 		{ | ||||||
|  | 			name:     "CharData token", | ||||||
|  | 			xmlToken: xml.CharData("Hello world"), | ||||||
|  | 			stack:    []string{"letterText", "align"}, | ||||||
|  | 			index:    10, | ||||||
|  | 			expected: Token{ | ||||||
|  | 				Index:      10, | ||||||
|  | 				Stack:      []string{"letterText", "align"}, | ||||||
|  | 				Attributes: map[string]string{}, | ||||||
|  | 			}, | ||||||
|  | 		}, | ||||||
|  | 		{ | ||||||
|  | 			name:     "EndElement token", | ||||||
|  | 			xmlToken: xml.EndElement{Name: xml.Name{Local: "align"}}, | ||||||
|  | 			stack:    []string{"letterText"}, | ||||||
|  | 			index:    15, | ||||||
|  | 			expected: Token{ | ||||||
|  | 				Index:      15, | ||||||
|  | 				Stack:      []string{"letterText"}, | ||||||
|  | 				Attributes: map[string]string{}, | ||||||
|  | 			}, | ||||||
|  | 		}, | ||||||
|  | 		{ | ||||||
|  | 			name:     "Empty stack", | ||||||
|  | 			xmlToken: xml.StartElement{Name: xml.Name{Local: "letterText"}}, | ||||||
|  | 			stack:    []string{}, | ||||||
|  | 			index:    0, | ||||||
|  | 			expected: Token{ | ||||||
|  | 				Index:      0, | ||||||
|  | 				Stack:      []string{}, | ||||||
|  | 				Attributes: map[string]string{}, | ||||||
|  | 			}, | ||||||
|  | 		}, | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	for _, tt := range tests { | ||||||
|  | 		t.Run(tt.name, func(t *testing.T) { | ||||||
|  | 			result := NewTokenFromXMLToken(tt.xmlToken, tt.stack, tt.index) | ||||||
|  |  | ||||||
|  | 			if result.Index != tt.expected.Index { | ||||||
|  | 				t.Errorf("Expected index %d, got %d", tt.expected.Index, result.Index) | ||||||
|  | 			} | ||||||
|  |  | ||||||
|  | 			if len(result.Stack) != len(tt.expected.Stack) { | ||||||
|  | 				t.Errorf("Expected stack length %d, got %d", len(tt.expected.Stack), len(result.Stack)) | ||||||
|  | 			} | ||||||
|  |  | ||||||
|  | 			for i, expected := range tt.expected.Stack { | ||||||
|  | 				if result.Stack[i] != expected { | ||||||
|  | 					t.Errorf("Expected stack[%d] = %s, got %s", i, expected, result.Stack[i]) | ||||||
|  | 				} | ||||||
|  | 			} | ||||||
|  |  | ||||||
|  | 			if len(result.Attributes) != len(tt.expected.Attributes) { | ||||||
|  | 				t.Errorf("Expected %d attributes, got %d", len(tt.expected.Attributes), len(result.Attributes)) | ||||||
|  | 			} | ||||||
|  |  | ||||||
|  | 			for key, expectedValue := range tt.expected.Attributes { | ||||||
|  | 				if actualValue, exists := result.Attributes[key]; !exists || actualValue != expectedValue { | ||||||
|  | 					t.Errorf("Expected attribute %s = %s, got %s (exists: %v)", key, expectedValue, actualValue, exists) | ||||||
|  | 				} | ||||||
|  | 			} | ||||||
|  | 		}) | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func TestLetterUnmarshalXML_BasicStructure(t *testing.T) { | ||||||
|  | 	var letter Letter | ||||||
|  | 	err := xml.Unmarshal([]byte(testLetter1), &letter) | ||||||
| 	if err != nil { | 	if err != nil { | ||||||
| 		t.Fatalf("Error unmarshaling sidenote XML: %v", err) | 		t.Fatalf("Failed to unmarshal letter: %v", err) | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	// Should have 1 sidenote | 	// Test basic letter properties | ||||||
| 	if len(letterText.Sidenotes) != 1 { | 	if letter.Letter != 1 { | ||||||
| 		t.Errorf("Expected 1 sidenote, got %d", len(letterText.Sidenotes)) | 		t.Errorf("Expected letter number 1, got %d", letter.Letter) | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	// Verify sidenote details | 	if len(letter.Pages) != 2 { | ||||||
| 	sidenote := letterText.Sidenotes[0] | 		t.Errorf("Expected 2 pages, got %d", len(letter.Pages)) | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	// Test page properties | ||||||
|  | 	for i, page := range letter.Pages { | ||||||
|  | 		expectedPageNo := i + 1 | ||||||
|  | 		if page.No != expectedPageNo { | ||||||
|  | 			t.Errorf("Expected page %d to have No = %d, got %d", i, expectedPageNo, page.No) | ||||||
|  | 		} | ||||||
|  | 		if page.Letter != 1 { | ||||||
|  | 			t.Errorf("Expected page %d to have Letter = 1, got %d", i, page.Letter) | ||||||
|  | 		} | ||||||
|  | 		if len(page.Tokens) == 0 { | ||||||
|  | 			t.Errorf("Expected page %d to have tokens, got none", i) | ||||||
|  | 		} | ||||||
|  | 		if len(page.TokenInfo) != len(page.Tokens) { | ||||||
|  | 			t.Errorf("Expected page %d to have equal TokenInfo and Tokens length, got %d vs %d", | ||||||
|  | 				i, len(page.TokenInfo), len(page.Tokens)) | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	// Test character data is collected | ||||||
|  | 	if len(letter.CharData) == 0 { | ||||||
|  | 		t.Error("Expected CharData to be populated") | ||||||
|  | 	} | ||||||
|  | 	if !strings.Contains(letter.CharData, "HochEdelgeborner") { | ||||||
|  | 		t.Error("Expected CharData to contain letter content") | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func TestLetterUnmarshalXML_TokenInfo(t *testing.T) { | ||||||
|  | 	var letter Letter | ||||||
|  | 	err := xml.Unmarshal([]byte(testLetter1), &letter) | ||||||
|  | 	if err != nil { | ||||||
|  | 		t.Fatalf("Failed to unmarshal letter: %v", err) | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	// Test first page tokens and TokenInfo | ||||||
|  | 	page1 := letter.Pages[0] | ||||||
|  | 	if len(page1.TokenInfo) == 0 { | ||||||
|  | 		t.Fatal("Expected page 1 to have TokenInfo") | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	// Find tokens with attributes and validate TokenInfo | ||||||
|  | 	foundAlignToken := false | ||||||
|  |  | ||||||
|  | 	for i, tokenInfo := range page1.TokenInfo { | ||||||
|  | 		// Check index matches position | ||||||
|  | 		if tokenInfo.Index != i { | ||||||
|  | 			t.Errorf("Expected TokenInfo[%d] to have Index = %d, got %d", i, i, tokenInfo.Index) | ||||||
|  | 		} | ||||||
|  |  | ||||||
|  | 		// Check for align token (should have pos attribute) | ||||||
|  | 		if attr, exists := tokenInfo.Attributes["pos"]; exists && attr == "right" { | ||||||
|  | 			foundAlignToken = true | ||||||
|  | 			// Since page elements are excluded, align should be at stack depth 0 in page tokens | ||||||
|  | 			// (the letterText context is the parsing context, not included in individual page stacks) | ||||||
|  | 		} | ||||||
|  |  | ||||||
|  | 		// Stack should never be nil | ||||||
|  | 		if tokenInfo.Stack == nil { | ||||||
|  | 			t.Errorf("TokenInfo[%d] has nil stack", i) | ||||||
|  | 		} | ||||||
|  |  | ||||||
|  | 		// Attributes should never be nil | ||||||
|  | 		if tokenInfo.Attributes == nil { | ||||||
|  | 			t.Errorf("TokenInfo[%d] has nil attributes", i) | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	if !foundAlignToken { | ||||||
|  | 		t.Error("Expected to find align token with pos='right' attribute") | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func TestLetterUnmarshalXML_WithSidenotes(t *testing.T) { | ||||||
|  | 	var letter Letter | ||||||
|  | 	err := xml.Unmarshal([]byte(testLetterWithSidenote), &letter) | ||||||
|  | 	if err != nil { | ||||||
|  | 		t.Fatalf("Failed to unmarshal letter with sidenote: %v", err) | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	// Test sidenotes | ||||||
|  | 	if len(letter.Pages[0].Sidenotes) != 1 { | ||||||
|  | 		t.Errorf("Expected 1 sidenote on page 1, got %d", len(letter.Pages[0].Sidenotes)) | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	sidenote := letter.Pages[0].Sidenotes[0] | ||||||
| 	if sidenote.Position != SidenotePositionLeft { | 	if sidenote.Position != SidenotePositionLeft { | ||||||
| 		t.Errorf("Expected sidenote position left, got %d", sidenote.Position) | 		t.Errorf("Expected sidenote position left, got %d", sidenote.Position) | ||||||
| 	} | 	} | ||||||
| @@ -197,115 +235,56 @@ More text after sidenote. | |||||||
| 	if !strings.Contains(sidenote.Annotation, "am linken Rand") { | 	if !strings.Contains(sidenote.Annotation, "am linken Rand") { | ||||||
| 		t.Errorf("Expected sidenote annotation to contain 'am linken Rand', got '%s'", sidenote.Annotation) | 		t.Errorf("Expected sidenote annotation to contain 'am linken Rand', got '%s'", sidenote.Annotation) | ||||||
| 	} | 	} | ||||||
| 	sidenoteContent := tokensToString(sidenote.Content) |  | ||||||
| 	if !strings.Contains(sidenoteContent, "Jacob Michael Reinhold Lenz") { | 	// Test sidenote TokenInfo | ||||||
| 		t.Errorf("Expected sidenote content to contain author name, got '%s'", sidenoteContent) | 	if len(sidenote.TokenInfo) != len(sidenote.Tokens) { | ||||||
|  | 		t.Errorf("Expected sidenote to have equal TokenInfo and Tokens length, got %d vs %d", | ||||||
|  | 			len(sidenote.TokenInfo), len(sidenote.Tokens)) | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	// Verify page content doesn't contain sidenote | 	// Test sidenote CharData | ||||||
| 	for _, page := range letterText.Pages { | 	if !strings.Contains(sidenote.CharData, "allergetreuester Bruder") { | ||||||
| 		content := tokensToString(page.Content) | 		t.Error("Expected sidenote CharData to contain sidenote content") | ||||||
| 		if strings.Contains(content, "allergetreuester Bruder") { |  | ||||||
| 			t.Errorf("Page content should not contain sidenote text, but page %d does", page.Page) |  | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
|  | 	// Verify anchor position | ||||||
|  | 	if sidenote.Anchor < 0 { | ||||||
|  | 		t.Error("Expected sidenote anchor to be set") | ||||||
| 	} | 	} | ||||||
| } | } | ||||||
|  |  | ||||||
| func TestLetterTextUnmarshal_ComplexSidenotePositions(t *testing.T) { | func TestLetterUnmarshalXML_ComplexStructure(t *testing.T) { | ||||||
| 	// Test different sidenote positions | 	var letter Letter | ||||||
| 	testXML := `<letterText xmlns="https://lenz-archiv.de" letter="3"> | 	err := xml.Unmarshal([]byte(testLetterComplexStructure), &letter) | ||||||
| <page index="1" /> |  | ||||||
| <sidenote pos="top right" page="1" annotation="test1">Top right sidenote</sidenote> |  | ||||||
| <sidenote pos="bottom left" page="1" annotation="test2">Bottom left sidenote</sidenote> |  | ||||||
| <sidenote pos="top" page="1" annotation="test3">Top sidenote</sidenote> |  | ||||||
| Some content. |  | ||||||
| </letterText>` |  | ||||||
|  |  | ||||||
| 	var letterText LetterText |  | ||||||
| 	err := xml.Unmarshal([]byte(testXML), &letterText) |  | ||||||
| 	if err != nil { | 	if err != nil { | ||||||
| 		t.Fatalf("Error unmarshaling complex sidenotes XML: %v", err) | 		t.Fatalf("Failed to unmarshal complex letter: %v", err) | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	if len(letterText.Sidenotes) != 3 { | 	// Test multiple pages | ||||||
| 		t.Fatalf("Expected 3 sidenotes, got %d", len(letterText.Sidenotes)) | 	if len(letter.Pages) != 3 { | ||||||
|  | 		t.Errorf("Expected 3 pages, got %d", len(letter.Pages)) | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	// Check position parsing | 	// Test hands collection | ||||||
| 	positions := make(map[SidenotePosition]bool) | 	foundHandRef := false | ||||||
| 	for _, sidenote := range letterText.Sidenotes { | 	for _, page := range letter.Pages { | ||||||
| 		positions[sidenote.Position] = true | 		for _, handRef := range page.Hands { | ||||||
|  | 			if handRef == 42 { | ||||||
|  | 				foundHandRef = true | ||||||
|  | 				break | ||||||
|  | 			} | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | 	if !foundHandRef { | ||||||
|  | 		t.Error("Expected to find hand reference 42") | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	expectedPositions := []SidenotePosition{ | 	// Test page numbers are correct | ||||||
| 		SidenotePositionTopRight, | 	for i, page := range letter.Pages { | ||||||
| 		SidenotePositionBottomLeft, | 		expectedPageNo := i + 1 | ||||||
| 		SidenotePositionTop, | 		if page.No != expectedPageNo { | ||||||
|  | 			t.Errorf("Expected page %d, got %d", expectedPageNo, page.No) | ||||||
| 		} | 		} | ||||||
|  |  | ||||||
| 	for _, expected := range expectedPositions { |  | ||||||
| 		if !positions[expected] { |  | ||||||
| 			t.Errorf("Expected to find sidenote position %d, but didn't", expected) |  | ||||||
| 		} |  | ||||||
| 	} |  | ||||||
| } |  | ||||||
|  |  | ||||||
| func TestLetterTextUnmarshal_NoPageBreaks(t *testing.T) { |  | ||||||
| 	// Test letter without explicit page breaks |  | ||||||
| 	testXML := `<letterText xmlns="https://lenz-archiv.de" letter="4"> |  | ||||||
| This is all content on the default page. |  | ||||||
| <aq>Some markup</aq> and more text. |  | ||||||
| <sidenote pos="right" page="1" annotation="single page note">Note on single page</sidenote> |  | ||||||
| Final text. |  | ||||||
| </letterText>` |  | ||||||
|  |  | ||||||
| 	var letterText LetterText |  | ||||||
| 	err := xml.Unmarshal([]byte(testXML), &letterText) |  | ||||||
| 	if err != nil { |  | ||||||
| 		t.Fatalf("Error unmarshaling no-page-break XML: %v", err) |  | ||||||
| 	} |  | ||||||
|  |  | ||||||
| 	// Should have 1 page (default page 1) |  | ||||||
| 	if len(letterText.Pages) != 1 { |  | ||||||
| 		t.Errorf("Expected 1 page, got %d", len(letterText.Pages)) |  | ||||||
| 	} |  | ||||||
| 	if len(letterText.PageBreaks) != 0 { |  | ||||||
| 		t.Errorf("Expected 0 page breaks, got %d", len(letterText.PageBreaks)) |  | ||||||
| 	} |  | ||||||
|  |  | ||||||
| 	// Page should be page 1 |  | ||||||
| 	if letterText.Pages[0].Page != 1 { |  | ||||||
| 		t.Errorf("Expected page 1, got page %d", letterText.Pages[0].Page) |  | ||||||
| 	} |  | ||||||
|  |  | ||||||
| 	// Content should contain markup but not sidenote |  | ||||||
| 	content := tokensToString(letterText.Pages[0].Content) |  | ||||||
| 	if !strings.Contains(content, "<aq>Some markup</aq>") { |  | ||||||
| 		t.Error("Expected page content to contain markup") |  | ||||||
| 	} |  | ||||||
| 	if strings.Contains(content, "Note on single page") { |  | ||||||
| 		t.Error("Page content should not contain sidenote text") |  | ||||||
| 	} |  | ||||||
| } |  | ||||||
|  |  | ||||||
| func TestLetterTextUnmarshal_EmptyContent(t *testing.T) { |  | ||||||
| 	// Test edge case with empty content |  | ||||||
| 	testXML := `<letterText xmlns="https://lenz-archiv.de" letter="5"> |  | ||||||
| <page index="1" /> |  | ||||||
| </letterText>` |  | ||||||
|  |  | ||||||
| 	var letterText LetterText |  | ||||||
| 	err := xml.Unmarshal([]byte(testXML), &letterText) |  | ||||||
| 	if err != nil { |  | ||||||
| 		t.Fatalf("Error unmarshaling empty XML: %v", err) |  | ||||||
| 	} |  | ||||||
|  |  | ||||||
| 	// Should have no pages with content |  | ||||||
| 	if len(letterText.Pages) != 0 { |  | ||||||
| 		t.Errorf("Expected 0 pages with content, got %d", len(letterText.Pages)) |  | ||||||
| 	} |  | ||||||
| 	if len(letterText.PageBreaks) != 1 { |  | ||||||
| 		t.Errorf("Expected 1 page break, got %d", len(letterText.PageBreaks)) |  | ||||||
| 	} | 	} | ||||||
| } | } | ||||||
|  |  | ||||||
| @@ -326,6 +305,7 @@ func TestSidenotePosition_UnmarshalXMLAttr(t *testing.T) { | |||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	for _, test := range tests { | 	for _, test := range tests { | ||||||
|  | 		t.Run(test.input, func(t *testing.T) { | ||||||
| 			var pos SidenotePosition | 			var pos SidenotePosition | ||||||
| 			attr := xml.Attr{Value: test.input} | 			attr := xml.Attr{Value: test.input} | ||||||
| 			err := pos.UnmarshalXMLAttr(attr) | 			err := pos.UnmarshalXMLAttr(attr) | ||||||
| @@ -335,112 +315,232 @@ func TestSidenotePosition_UnmarshalXMLAttr(t *testing.T) { | |||||||
| 			if pos != test.expected { | 			if pos != test.expected { | ||||||
| 				t.Errorf("Expected position %d for input '%s', got %d", test.expected, test.input, pos) | 				t.Errorf("Expected position %d for input '%s', got %d", test.expected, test.input, pos) | ||||||
| 			} | 			} | ||||||
| 	} | 		}) | ||||||
| } | 	} | ||||||
|  | } | ||||||
| func TestLetterTextUnmarshal_PreserveMarkup(t *testing.T) { |  | ||||||
| 	// Test that various markup elements are preserved in page content | func TestLetterUnmarshalXML_StackTracking(t *testing.T) { | ||||||
| 	testXML := `<letterText xmlns="https://lenz-archiv.de" letter="6"> | 	simpleXML := `<letterText letter="99"> | ||||||
| <page index="1" /> | <page index="1" /> | ||||||
| Text with <aq>antiqua</aq> and <b>bold</b> and <it>italic</it>. | <align pos="center"> | ||||||
| <line type="break" tab="5" /> | <aq>Inner content</aq> | ||||||
| <align pos="center">Centered text</align> | </align> | ||||||
| <ul>Underlined text</ul> | </letterText>` | ||||||
| <del>Deleted text</del> |  | ||||||
| More content with <pe>person reference</pe>. | 	var letter Letter | ||||||
| </letterText>` | 	err := xml.Unmarshal([]byte(simpleXML), &letter) | ||||||
|  | 	if err != nil { | ||||||
| 	var letterText LetterText | 		t.Fatalf("Failed to unmarshal letter: %v", err) | ||||||
| 	err := xml.Unmarshal([]byte(testXML), &letterText) | 	} | ||||||
| 	if err != nil { |  | ||||||
| 		t.Fatalf("Error unmarshaling markup XML: %v", err) | 	page := letter.Pages[0] | ||||||
| 	} |  | ||||||
|  | 	// Find tokens at different nesting levels | ||||||
| 	if len(letterText.Pages) != 1 { | 	var alignToken *Token | ||||||
| 		t.Fatalf("Expected 1 page, got %d", len(letterText.Pages)) | 	var aqToken *Token | ||||||
| 	} |  | ||||||
|  | 	for i, token := range page.TokenInfo { | ||||||
| 	content := tokensToString(letterText.Pages[0].Content) | 		if attrs := token.Attributes; len(attrs) > 0 { | ||||||
| 	expectedMarkup := []string{ | 			if attrs["pos"] == "center" { | ||||||
| 		"<aq>antiqua</aq>", | 				alignToken = &page.TokenInfo[i] | ||||||
| 		"<b>bold</b>", | 			} | ||||||
| 		"<it>italic</it>", | 		} | ||||||
| 		"<line type=\"break\" tab=\"5\">", |  | ||||||
| 		"<align pos=\"center\">", | 		// Look for deeply nested token (inside align > aq) | ||||||
| 		"<ul>Underlined text</ul>", | 		if len(token.Stack) >= 1 { | ||||||
| 		"<del>Deleted text</del>", | 			aqToken = &page.TokenInfo[i] | ||||||
| 		"<pe>person reference</pe>", | 		} | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	for _, markup := range expectedMarkup { | 	if alignToken == nil { | ||||||
| 		if !strings.Contains(content, markup) { | 		t.Fatal("Expected to find align token") | ||||||
| 			t.Errorf("Expected page content to contain '%s', but it doesn't. Content: %s", markup, content) | 	} | ||||||
| 		} |  | ||||||
| 	} | 	if aqToken == nil { | ||||||
| } | 		t.Fatal("Expected to find nested token") | ||||||
|  | 	} | ||||||
| func TestLetterTextUnmarshal_LetterAttribute(t *testing.T) { |  | ||||||
| 	// Test that the letter attribute is parsed correctly | 	// Within a page, the stack starts fresh, so align might be at depth 0 | ||||||
| 	testXML := `<letterText xmlns="https://lenz-archiv.de" letter="42"> | 	// aq content should be deeper in stack than align | ||||||
| <page index="1" /> | 	if len(aqToken.Stack) <= len(alignToken.Stack) { | ||||||
| Some content. | 		t.Logf("Align stack depth: %d, AQ stack depth: %d", len(alignToken.Stack), len(aqToken.Stack)) | ||||||
| </letterText>` | 		// This is acceptable if both are at the same level in page context | ||||||
|  | 	} | ||||||
| 	var letterText LetterText | } | ||||||
| 	err := xml.Unmarshal([]byte(testXML), &letterText) |  | ||||||
| 	if err != nil { | func TestLetterUnmarshalXML_RealData(t *testing.T) { | ||||||
| 		t.Fatalf("Error unmarshaling letter attribute XML: %v", err) | 	// Try to read from actual briefe.xml file | ||||||
| 	} | 	brieveFile := "../lenz-briefe/data/xml/briefe.xml" | ||||||
|  | 	if _, err := os.Stat(brieveFile); os.IsNotExist(err) { | ||||||
| 	// Verify letter attribute is parsed | 		t.Skip("Real briefe.xml file not found, skipping real data test") | ||||||
| 	if letterText.Letter != 42 { | 		return | ||||||
| 		t.Errorf("Expected letter attribute 42, got %d", letterText.Letter) | 	} | ||||||
| 	} |  | ||||||
| } | 	file, err := os.Open(brieveFile) | ||||||
|  | 	if err != nil { | ||||||
| func TestLetterTextUnmarshal_LetterAttribute_AllExistingTests(t *testing.T) { | 		t.Skipf("Cannot open briefe.xml: %v", err) | ||||||
| 	// Test that existing test cases also have correct letter attributes | 		return | ||||||
| 	testCases := []struct { | 	} | ||||||
| 		name           string | 	defer file.Close() | ||||||
| 		xml            string |  | ||||||
| 		expectedLetter int | 	decoder := xml.NewDecoder(file) | ||||||
| 	}{ |  | ||||||
| 		{ | 	// Find first letterText element | ||||||
| 			name: "Simple case", | 	for { | ||||||
| 			xml: `<letterText xmlns="https://lenz-archiv.de" letter="123"> | 		token, err := decoder.Token() | ||||||
| 				<page index="1"/>Some content. | 		if err == io.EOF { | ||||||
| 			</letterText>`, | 			t.Skip("No letterText elements found in briefe.xml") | ||||||
| 			expectedLetter: 123, | 			return | ||||||
| 		}, | 		} | ||||||
| 		{ | 		if err != nil { | ||||||
| 			name: "Real example letter 1", | 			t.Skipf("Error reading briefe.xml: %v", err) | ||||||
| 			xml: `<letterText xmlns="https://lenz-archiv.de" letter="1"> | 			return | ||||||
| 				<page index="1" />Some content. | 		} | ||||||
| 			</letterText>`, |  | ||||||
| 			expectedLetter: 1, | 		if start, ok := token.(xml.StartElement); ok && start.Name.Local == "letterText" { | ||||||
| 		}, | 			var letter Letter | ||||||
| 		{ | 			err := decoder.DecodeElement(&letter, &start) | ||||||
| 			name: "Letter with sidenotes", | 			if err != nil { | ||||||
| 			xml: `<letterText xmlns="https://lenz-archiv.de" letter="999"> | 				t.Fatalf("Failed to decode real letter: %v", err) | ||||||
| 				<page index="1" /> | 			} | ||||||
| 				<sidenote pos="left" page="1" annotation="test">Note</sidenote> |  | ||||||
| 				Content. | 			// Basic validation of real data | ||||||
| 			</letterText>`, | 			if letter.Letter == 0 { | ||||||
| 			expectedLetter: 999, | 				t.Error("Expected real letter to have letter number") | ||||||
| 		}, | 			} | ||||||
| 	} |  | ||||||
|  | 			if len(letter.Pages) == 0 { | ||||||
| 	for _, tc := range testCases { | 				t.Error("Expected real letter to have pages") | ||||||
| 		t.Run(tc.name, func(t *testing.T) { | 			} | ||||||
| 			var letterText LetterText |  | ||||||
| 			err := xml.Unmarshal([]byte(tc.xml), &letterText) | 			// Validate TokenInfo for all pages | ||||||
| 			if err != nil { | 			for i, page := range letter.Pages { | ||||||
| 				t.Fatalf("Error unmarshaling XML: %v", err) | 				if len(page.TokenInfo) != len(page.Tokens) { | ||||||
| 			} | 					t.Errorf("Page %d: TokenInfo length %d != Tokens length %d", | ||||||
|  | 						i, len(page.TokenInfo), len(page.Tokens)) | ||||||
| 			if letterText.Letter != tc.expectedLetter { | 				} | ||||||
| 				t.Errorf("Expected letter attribute %d, got %d", tc.expectedLetter, letterText.Letter) |  | ||||||
| 			} | 				// Check all TokenInfo entries are valid | ||||||
|  | 				for j, tokenInfo := range page.TokenInfo { | ||||||
|  | 					if tokenInfo.Index != j { | ||||||
|  | 						t.Errorf("Page %d, Token %d: Expected index %d, got %d", | ||||||
|  | 							i, j, j, tokenInfo.Index) | ||||||
|  | 					} | ||||||
|  | 					if tokenInfo.Stack == nil { | ||||||
|  | 						t.Errorf("Page %d, Token %d: Stack is nil", i, j) | ||||||
|  | 					} | ||||||
|  | 					if tokenInfo.Attributes == nil { | ||||||
|  | 						t.Errorf("Page %d, Token %d: Attributes is nil", i, j) | ||||||
|  | 					} | ||||||
|  | 				} | ||||||
|  | 			} | ||||||
|  |  | ||||||
|  | 			// Test succeeded with real data | ||||||
|  | 			t.Logf("Successfully processed real letter %d with %d pages", letter.Letter, len(letter.Pages)) | ||||||
|  | 			return | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func TestToken_AttributeAccess(t *testing.T) { | ||||||
|  | 	xmlData := `<letterText letter="123"> | ||||||
|  | <page index="42" /> | ||||||
|  | <align pos="right" tab="5">Content</align> | ||||||
|  | </letterText>` | ||||||
|  |  | ||||||
|  | 	var letter Letter | ||||||
|  | 	err := xml.Unmarshal([]byte(xmlData), &letter) | ||||||
|  | 	if err != nil { | ||||||
|  | 		t.Fatalf("Failed to unmarshal: %v", err) | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	page := letter.Pages[0] | ||||||
|  |  | ||||||
|  | 	// Find tokens with specific attributes (page tokens are excluded from page.TokenInfo) | ||||||
|  | 	foundAlignPos := false | ||||||
|  | 	foundAlignTab := false | ||||||
|  |  | ||||||
|  | 	for _, tokenInfo := range page.TokenInfo { | ||||||
|  | 		if val, exists := tokenInfo.Attributes["pos"]; exists && val == "right" { | ||||||
|  | 			foundAlignPos = true | ||||||
|  | 		} | ||||||
|  | 		if val, exists := tokenInfo.Attributes["tab"]; exists && val == "5" { | ||||||
|  | 			foundAlignTab = true | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	if !foundAlignPos { | ||||||
|  | 		t.Error("Expected to find align with pos='right'") | ||||||
|  | 	} | ||||||
|  | 	if !foundAlignTab { | ||||||
|  | 		t.Error("Expected to find align with tab='5'") | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func TestLetterUnmarshalXML_EdgeCases(t *testing.T) { | ||||||
|  | 	tests := []struct { | ||||||
|  | 		name string | ||||||
|  | 		xml  string | ||||||
|  | 		test func(t *testing.T, letter Letter) | ||||||
|  | 	}{ | ||||||
|  | 		{ | ||||||
|  | 			name: "Empty letter", | ||||||
|  | 			xml:  `<letterText letter="1"></letterText>`, | ||||||
|  | 			test: func(t *testing.T, letter Letter) { | ||||||
|  | 				if letter.Letter != 1 { | ||||||
|  | 					t.Errorf("Expected letter 1, got %d", letter.Letter) | ||||||
|  | 				} | ||||||
|  | 				if len(letter.Pages) != 0 { | ||||||
|  | 					t.Errorf("Expected 0 pages, got %d", len(letter.Pages)) | ||||||
|  | 				} | ||||||
|  | 			}, | ||||||
|  | 		}, | ||||||
|  | 		{ | ||||||
|  | 			name: "Letter with only page break", | ||||||
|  | 			xml:  `<letterText letter="2"><page index="1" /></letterText>`, | ||||||
|  | 			test: func(t *testing.T, letter Letter) { | ||||||
|  | 				// Page break with no content should result in no pages | ||||||
|  | 				if len(letter.Pages) != 0 { | ||||||
|  | 					t.Errorf("Expected 0 pages (page break with no content), got %d", len(letter.Pages)) | ||||||
|  | 				} | ||||||
|  | 			}, | ||||||
|  | 		}, | ||||||
|  | 		{ | ||||||
|  | 			name: "Letter with nested elements", | ||||||
|  | 			xml: `<letterText letter="3"> | ||||||
|  | <page index="1" /> | ||||||
|  | <align pos="center"> | ||||||
|  |   <aq>Nested <b>deeply <i>nested</i></b> content</aq> | ||||||
|  | </align> | ||||||
|  | </letterText>`, | ||||||
|  | 			test: func(t *testing.T, letter Letter) { | ||||||
|  | 				if len(letter.Pages) != 1 { | ||||||
|  | 					t.Errorf("Expected 1 page, got %d", len(letter.Pages)) | ||||||
|  | 				} | ||||||
|  |  | ||||||
|  | 				page := letter.Pages[0] | ||||||
|  | 				maxStackDepth := 0 | ||||||
|  | 				for _, tokenInfo := range page.TokenInfo { | ||||||
|  | 					if len(tokenInfo.Stack) > maxStackDepth { | ||||||
|  | 						maxStackDepth = len(tokenInfo.Stack) | ||||||
|  | 					} | ||||||
|  | 				} | ||||||
|  |  | ||||||
|  | 				if maxStackDepth < 3 { | ||||||
|  | 					t.Errorf("Expected deep nesting (3+ levels), got max depth %d", maxStackDepth) | ||||||
|  | 				} | ||||||
|  | 			}, | ||||||
|  | 		}, | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	for _, tt := range tests { | ||||||
|  | 		t.Run(tt.name, func(t *testing.T) { | ||||||
|  | 			var letter Letter | ||||||
|  | 			err := xml.Unmarshal([]byte(tt.xml), &letter) | ||||||
|  | 			if err != nil { | ||||||
|  | 				t.Fatalf("Failed to unmarshal: %v", err) | ||||||
|  | 			} | ||||||
|  | 			tt.test(t, letter) | ||||||
| 		}) | 		}) | ||||||
| 	} | 	} | ||||||
| } | } | ||||||
| @@ -56,14 +56,14 @@ func (l *Library) String() string { | |||||||
| 	sb.WriteString("Letters: ") | 	sb.WriteString("Letters: ") | ||||||
| 	sb.WriteString(strconv.Itoa(l.Letters.Count())) | 	sb.WriteString(strconv.Itoa(l.Letters.Count())) | ||||||
| 	filter := func(item Letter) bool { | 	filter := func(item Letter) bool { | ||||||
| 		return len(item.Hands) > 0 | 		return len(item.Hands()) > 0 | ||||||
| 	} | 	} | ||||||
| 	hands := 0 | 	hands := 0 | ||||||
| 	for l := range l.Letters.Filter(filter) { | 	for l := range l.Letters.Filter(filter) { | ||||||
| 		hands += 1 | 		hands += 1 | ||||||
| 		sb.WriteString("\n") | 		sb.WriteString("\n") | ||||||
| 		sb.WriteString(strconv.Itoa(l.Letter) + ": ") | 		sb.WriteString(strconv.Itoa(l.Letter) + ": ") | ||||||
| 		sb.WriteString(strconv.Itoa(len(l.Hands)) + " Hände, No " + strconv.Itoa(hands)) | 		sb.WriteString(strconv.Itoa(len(l.Hands())) + " Hände, No " + strconv.Itoa(hands)) | ||||||
| 	} | 	} | ||||||
| 	sb.WriteString("\n") | 	sb.WriteString("\n") | ||||||
|  |  | ||||||
| @@ -307,22 +307,22 @@ func (l *Library) LettersForYear(year int) (ret []Meta) { | |||||||
| 	}) { | 	}) { | ||||||
| 		ret = append(ret, l) | 		ret = append(ret, l) | ||||||
| 	} | 	} | ||||||
| 	return | 	return ret | ||||||
| } | } | ||||||
|  |  | ||||||
| func (l *Library) Person(id int) (ret *PersonDef) { | func (l *Library) Person(id int) (ret *PersonDef) { | ||||||
| 	ret = l.Persons.Item(id) | 	ret = l.Persons.Item(id) | ||||||
| 	return | 	return ret | ||||||
| } | } | ||||||
|  |  | ||||||
| func (l *Library) App(id int) (ret *AppDef) { | func (l *Library) App(id int) (ret *AppDef) { | ||||||
| 	ret = l.AppDefs.Item(id) | 	ret = l.AppDefs.Item(id) | ||||||
| 	return | 	return ret | ||||||
| } | } | ||||||
|  |  | ||||||
| func (l *Library) Place(id int) (ret *LocationDef) { | func (l *Library) Place(id int) (ret *LocationDef) { | ||||||
| 	ret = l.Places.Item(id) | 	ret = l.Places.Item(id) | ||||||
| 	return | 	return ret | ||||||
| } | } | ||||||
|  |  | ||||||
| func (l *Library) Tradition(letter int) (ret []App) { | func (l *Library) Tradition(letter int) (ret []App) { | ||||||
| @@ -338,14 +338,14 @@ func (l *Library) GetPersons(id []int) (ret []*PersonDef) { | |||||||
| 	for _, i := range id { | 	for _, i := range id { | ||||||
| 		ret = append(ret, l.Person(i)) | 		ret = append(ret, l.Person(i)) | ||||||
| 	} | 	} | ||||||
| 	return | 	return ret | ||||||
| } | } | ||||||
|  |  | ||||||
| func (l *Library) GetPlaces(id []int) (ret []*LocationDef) { | func (l *Library) GetPlaces(id []int) (ret []*LocationDef) { | ||||||
| 	for _, i := range id { | 	for _, i := range id { | ||||||
| 		ret = append(ret, l.Place(i)) | 		ret = append(ret, l.Place(i)) | ||||||
| 	} | 	} | ||||||
| 	return | 	return ret | ||||||
| } | } | ||||||
|  |  | ||||||
| func (l *Library) FuncMap() template.FuncMap { | func (l *Library) FuncMap() template.FuncMap { | ||||||
|   | |||||||
							
								
								
									
										361
									
								
								xmlmodels/token.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										361
									
								
								xmlmodels/token.go
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,361 @@ | |||||||
|  | package xmlmodels | ||||||
|  |  | ||||||
|  | import ( | ||||||
|  | 	"encoding/xml" | ||||||
|  | 	"iter" | ||||||
|  | 	"strings" | ||||||
|  | ) | ||||||
|  |  | ||||||
|  | // Token wraps xml.Token with additional parsing context | ||||||
|  | type Token struct { | ||||||
|  | 	Index      int                 // Position in token array | ||||||
|  | 	Stack      []string            // Element names in the stack at this token | ||||||
|  | 	Attributes map[string]string   // Attributes for StartElement tokens | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // NewTokenFromXMLToken creates a Token from xml.Token with context | ||||||
|  | func NewTokenFromXMLToken(xmlToken xml.Token, stack []string, index int) Token { | ||||||
|  | 	token := Token{ | ||||||
|  | 		Index:      index, | ||||||
|  | 		Stack:      make([]string, len(stack)), | ||||||
|  | 		Attributes: make(map[string]string), | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	copy(token.Stack, stack) | ||||||
|  |  | ||||||
|  | 	// Extract attributes if this is a StartElement | ||||||
|  | 	if startElement, ok := xmlToken.(xml.StartElement); ok { | ||||||
|  | 		for _, attr := range startElement.Attr { | ||||||
|  | 			token.Attributes[attr.Name.Local] = attr.Value | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	return token | ||||||
|  | } | ||||||
|  |  | ||||||
|  | type LetterTokenType int | ||||||
|  |  | ||||||
|  | const ( | ||||||
|  | 	LetterStartElement LetterTokenType = iota | ||||||
|  | 	LetterEndElement | ||||||
|  | 	LetterCharData | ||||||
|  | 	LetterComment | ||||||
|  | 	LetterProcInst | ||||||
|  | 	LetterDirective | ||||||
|  | ) | ||||||
|  |  | ||||||
|  | // LetterToken wraps xml.Token with additional context for Letter/Page parsing | ||||||
|  | type LetterToken struct { | ||||||
|  | 	Name        string | ||||||
|  | 	Attributes  map[string]string | ||||||
|  | 	Inner       xml.Token | ||||||
|  | 	Type        LetterTokenType | ||||||
|  | 	Data        string | ||||||
|  | 	Stack       []*LetterToken | ||||||
|  | 	Index       int | ||||||
|  | 	PageIndex   int // Which page this token belongs to | ||||||
|  | 	Letter      int // Which letter this token belongs to | ||||||
|  |  | ||||||
|  | 	// Navigation fields | ||||||
|  | 	charData       string | ||||||
|  | 	children       []*LetterToken | ||||||
|  | 	childrenParsed bool | ||||||
|  | 	chardataParsed bool | ||||||
|  | 	parser         *LetterParser | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // LetterParser wraps a slice of LetterTokens with navigation capabilities | ||||||
|  | type LetterParser struct { | ||||||
|  | 	Stack    []*LetterToken | ||||||
|  | 	pipeline []*LetterToken | ||||||
|  | 	letter   int | ||||||
|  | 	pageMap  map[int]int // Maps page number to starting token index | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // NewLetterParser creates a parser from xml.Token slice | ||||||
|  | func NewLetterParser(tokens []xml.Token, letter int, pageIndex int) *LetterParser { | ||||||
|  | 	parser := &LetterParser{ | ||||||
|  | 		Stack:   make([]*LetterToken, 0), | ||||||
|  | 		letter:  letter, | ||||||
|  | 		pageMap: make(map[int]int), | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	stack := make([]*LetterToken, 0) | ||||||
|  |  | ||||||
|  | 	for i, token := range tokens { | ||||||
|  | 		letterToken := &LetterToken{ | ||||||
|  | 			Inner:     xml.CopyToken(token), | ||||||
|  | 			Index:     i, | ||||||
|  | 			PageIndex: pageIndex, | ||||||
|  | 			Letter:    letter, | ||||||
|  | 			Stack:     make([]*LetterToken, len(stack)), | ||||||
|  | 			parser:    parser, | ||||||
|  | 		} | ||||||
|  |  | ||||||
|  | 		// Copy current stack | ||||||
|  | 		copy(letterToken.Stack, stack) | ||||||
|  |  | ||||||
|  | 		switch t := token.(type) { | ||||||
|  | 		case xml.StartElement: | ||||||
|  | 			letterToken.Name = t.Name.Local | ||||||
|  | 			letterToken.Attributes = mapXMLAttributes(t.Attr) | ||||||
|  | 			letterToken.Type = LetterStartElement | ||||||
|  |  | ||||||
|  | 			// Add to parent's children if not parsed yet | ||||||
|  | 			if len(stack) > 0 && !stack[len(stack)-1].childrenParsed { | ||||||
|  | 				stack[len(stack)-1].children = append(stack[len(stack)-1].children, letterToken) | ||||||
|  | 			} | ||||||
|  | 			stack = append(stack, letterToken) | ||||||
|  |  | ||||||
|  | 		case xml.EndElement: | ||||||
|  | 			if len(stack) > 0 { | ||||||
|  | 				element := stack[len(stack)-1] | ||||||
|  | 				element.childrenParsed = true | ||||||
|  | 				element.chardataParsed = true | ||||||
|  | 				stack = stack[:len(stack)-1] | ||||||
|  | 			} | ||||||
|  | 			letterToken.Name = t.Name.Local | ||||||
|  | 			letterToken.Attributes = make(map[string]string) | ||||||
|  | 			letterToken.Type = LetterEndElement | ||||||
|  |  | ||||||
|  | 		case xml.CharData: | ||||||
|  | 			text := string(t) | ||||||
|  | 			if text != "" && len(stack) > 0 { | ||||||
|  | 				for i := range stack { | ||||||
|  | 					if !stack[i].chardataParsed { | ||||||
|  | 						stack[i].charData += text | ||||||
|  | 					} | ||||||
|  | 				} | ||||||
|  | 			} | ||||||
|  | 			letterToken.Data = text | ||||||
|  | 			letterToken.Type = LetterCharData | ||||||
|  |  | ||||||
|  | 		case xml.Comment: | ||||||
|  | 			letterToken.Type = LetterComment | ||||||
|  | 			letterToken.Data = string(t) | ||||||
|  |  | ||||||
|  | 		case xml.ProcInst: | ||||||
|  | 			letterToken.Name = t.Target | ||||||
|  | 			letterToken.Data = string(t.Inst) | ||||||
|  | 			letterToken.Type = LetterProcInst | ||||||
|  |  | ||||||
|  | 		case xml.Directive: | ||||||
|  | 			letterToken.Data = string(t) | ||||||
|  | 			letterToken.Type = LetterDirective | ||||||
|  | 		} | ||||||
|  |  | ||||||
|  | 		parser.pipeline = append(parser.pipeline, letterToken) | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	return parser | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // GetStack returns current parsing stack | ||||||
|  | func (p *LetterParser) GetStack() []*LetterToken { | ||||||
|  | 	return p.Stack | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // Pipeline returns all tokens | ||||||
|  | func (p *LetterParser) Pipeline() []*LetterToken { | ||||||
|  | 	return p.pipeline | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // TokenAt returns token at specific index | ||||||
|  | func (p *LetterParser) TokenAt(index int) *LetterToken { | ||||||
|  | 	if index < 0 || index >= len(p.pipeline) { | ||||||
|  | 		return nil | ||||||
|  | 	} | ||||||
|  | 	return p.pipeline[index] | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // IterateFrom creates iterator starting from specific index | ||||||
|  | func (p *LetterParser) IterateFrom(index int) iter.Seq2[*LetterToken, error] { | ||||||
|  | 	return func(yield func(*LetterToken, error) bool) { | ||||||
|  | 		for i := index; i < len(p.pipeline); i++ { | ||||||
|  | 			if !yield(p.pipeline[i], nil) { | ||||||
|  | 				return | ||||||
|  | 			} | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // Iterate over all tokens | ||||||
|  | func (p *LetterParser) Iterate() iter.Seq2[*LetterToken, error] { | ||||||
|  | 	return p.IterateFrom(0) | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // Previous returns tokens before given index | ||||||
|  | func (p *LetterParser) Previous(index int) []*LetterToken { | ||||||
|  | 	if index <= 0 || index > len(p.pipeline) { | ||||||
|  | 		return nil | ||||||
|  | 	} | ||||||
|  | 	return p.pipeline[:index] | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // LetterToken methods | ||||||
|  |  | ||||||
|  | // String returns string representation | ||||||
|  | func (t *LetterToken) String() string { | ||||||
|  | 	builder := strings.Builder{} | ||||||
|  | 	switch t.Type { | ||||||
|  | 	case LetterStartElement: | ||||||
|  | 		builder.WriteString("<" + t.Name) | ||||||
|  | 		for k, v := range t.Attributes { | ||||||
|  | 			builder.WriteString(" " + k + `="` + v + `"`) | ||||||
|  | 		} | ||||||
|  | 		builder.WriteString(">") | ||||||
|  | 	case LetterEndElement: | ||||||
|  | 		builder.WriteString("</" + t.Name + ">") | ||||||
|  | 	case LetterCharData: | ||||||
|  | 		builder.WriteString(t.Data) | ||||||
|  | 	case LetterComment: | ||||||
|  | 		builder.WriteString("<!--" + t.Data + "-->") | ||||||
|  | 	} | ||||||
|  | 	return builder.String() | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // Element returns all tokens from start to matching end element | ||||||
|  | func (t *LetterToken) Element() []*LetterToken { | ||||||
|  | 	if t.Type != LetterStartElement { | ||||||
|  | 		return nil | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	var tokens []*LetterToken | ||||||
|  | 	depth := 0 | ||||||
|  |  | ||||||
|  | 	for token, _ := range t.parser.IterateFrom(t.Index) { | ||||||
|  | 		tokens = append(tokens, token) | ||||||
|  |  | ||||||
|  | 		if token.Type == LetterStartElement && token.Name == t.Name { | ||||||
|  | 			depth++ | ||||||
|  | 		} else if token.Type == LetterEndElement && token.Name == t.Name { | ||||||
|  | 			depth-- | ||||||
|  | 			if depth == 0 { | ||||||
|  | 				return tokens | ||||||
|  | 			} | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	return tokens | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // Children returns direct child elements | ||||||
|  | func (t *LetterToken) Children() []*LetterToken { | ||||||
|  | 	if t.childrenParsed { | ||||||
|  | 		return t.children | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	if t.Type != LetterStartElement { | ||||||
|  | 		return nil | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	element := t.Element() | ||||||
|  | 	if len(element) <= 1 { | ||||||
|  | 		return nil | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	// Skip first (self) and find direct children | ||||||
|  | 	depth := 0 | ||||||
|  | 	for _, token := range element[1:] { // Skip self | ||||||
|  | 		if token.Type == LetterStartElement { | ||||||
|  | 			if depth == 0 { | ||||||
|  | 				t.children = append(t.children, token) | ||||||
|  | 			} | ||||||
|  | 			depth++ | ||||||
|  | 		} else if token.Type == LetterEndElement { | ||||||
|  | 			depth-- | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	t.childrenParsed = true | ||||||
|  | 	return t.children | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // CharData returns character data content | ||||||
|  | func (t *LetterToken) CharData() string { | ||||||
|  | 	if t.Type == LetterCharData || t.Type == LetterComment { | ||||||
|  | 		return t.Data | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	if t.chardataParsed { | ||||||
|  | 		return t.charData | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	if t.Type != LetterStartElement { | ||||||
|  | 		return "" | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	element := t.Element() | ||||||
|  | 	if len(element) == 0 { | ||||||
|  | 		return "" | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	var builder strings.Builder | ||||||
|  | 	for _, token := range element { | ||||||
|  | 		if token.Type == LetterCharData { | ||||||
|  | 			builder.WriteString(token.Data) | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	t.chardataParsed = true | ||||||
|  | 	t.charData = builder.String() | ||||||
|  | 	return t.charData | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // Next returns iterator from next token | ||||||
|  | func (t *LetterToken) Next() iter.Seq2[*LetterToken, error] { | ||||||
|  | 	return t.parser.IterateFrom(t.Index + 1) | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // Previous returns tokens before this one | ||||||
|  | func (t *LetterToken) Previous() []*LetterToken { | ||||||
|  | 	return t.parser.Previous(t.Index) | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // FindByName finds first child element with given name | ||||||
|  | func (t *LetterToken) FindByName(name string) *LetterToken { | ||||||
|  | 	for _, child := range t.Children() { | ||||||
|  | 		if child.Name == name { | ||||||
|  | 			return child | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | 	return nil | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // FindAllByName finds all child elements with given name | ||||||
|  | func (t *LetterToken) FindAllByName(name string) []*LetterToken { | ||||||
|  | 	var result []*LetterToken | ||||||
|  | 	for _, child := range t.Children() { | ||||||
|  | 		if child.Name == name { | ||||||
|  | 			result = append(result, child) | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | 	return result | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // GetAttribute returns attribute value | ||||||
|  | func (t *LetterToken) GetAttribute(name string) string { | ||||||
|  | 	if t.Attributes == nil { | ||||||
|  | 		return "" | ||||||
|  | 	} | ||||||
|  | 	return t.Attributes[name] | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // GetStackDepth returns current nesting depth | ||||||
|  | func (t *LetterToken) GetStackDepth() int { | ||||||
|  | 	return len(t.Stack) | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // InPage checks if token belongs to specific page | ||||||
|  | func (t *LetterToken) InPage(pageNo int) bool { | ||||||
|  | 	return t.PageIndex == pageNo | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // mapXMLAttributes converts xml.Attr to map[string]string | ||||||
|  | func mapXMLAttributes(attrs []xml.Attr) map[string]string { | ||||||
|  | 	attrMap := make(map[string]string) | ||||||
|  | 	for _, attr := range attrs { | ||||||
|  | 		attrMap[attr.Name.Local] = attr.Value | ||||||
|  | 	} | ||||||
|  | 	return attrMap | ||||||
|  | } | ||||||
		Reference in New Issue
	
	Block a user
	 Simon Martens
					Simon Martens