mirror of
				https://github.com/Theodor-Springmann-Stiftung/lenz-web.git
				synced 2025-10-30 17:55:32 +00:00 
			
		
		
		
	Init rework
This commit is contained in:
		| @@ -3,17 +3,10 @@ package xmlmodels | ||||
| import ( | ||||
| 	"encoding/json" | ||||
| 	"encoding/xml" | ||||
| 	"io" | ||||
| 	"strconv" | ||||
| ) | ||||
|  | ||||
| type Letter struct { | ||||
| 	XMLName  xml.Name     `xml:"letterText"` | ||||
| 	Letter   int          `xml:"letter,attr"` | ||||
| 	Pages    []Page       `xml:"page"` | ||||
| 	Hands    []RefElement `xml:"hand"` | ||||
| 	Content  string       `xml:",innerxml"` | ||||
| 	Chardata string       `xml:",chardata"` | ||||
| } | ||||
|  | ||||
| func (l Letter) Keys() []any { | ||||
| 	return []any{l.Letter} | ||||
| } | ||||
| @@ -30,7 +23,220 @@ func (l Letter) String() string { | ||||
| 	return string(json) | ||||
| } | ||||
|  | ||||
| type Page struct { | ||||
| 	XMLName xml.Name `xml:"page"` | ||||
| 	Index   int      `xml:"index,attr"` | ||||
| type SidenotePosition uint8 | ||||
|  | ||||
| const ( | ||||
| 	SidenotePositionLeft SidenotePosition = iota | ||||
| 	SidenotePositionRight | ||||
| 	SidenotePositionTop | ||||
| 	SidenotePositionTopLeft | ||||
| 	SidenotePositionTopRight | ||||
| 	SidenotePositionBottom | ||||
| 	SidenotePositionBottomLeft | ||||
| 	SidenotePositionBottomRight | ||||
| ) | ||||
|  | ||||
| func (sp *SidenotePosition) UnmarshalXMLAttr(attr xml.Attr) error { | ||||
| 	switch attr.Value { | ||||
| 	case "left": | ||||
| 		*sp = SidenotePositionLeft | ||||
| 	case "right": | ||||
| 		*sp = SidenotePositionRight | ||||
| 	case "top": | ||||
| 		*sp = SidenotePositionTop | ||||
| 	case "top left": | ||||
| 		*sp = SidenotePositionTopLeft | ||||
| 	case "top right": | ||||
| 		*sp = SidenotePositionTopRight | ||||
| 	case "bottom": | ||||
| 		*sp = SidenotePositionBottom | ||||
| 	case "bottom left": | ||||
| 		*sp = SidenotePositionBottomLeft | ||||
| 	case "bottom right": | ||||
| 		*sp = SidenotePositionBottomRight | ||||
| 	default: | ||||
| 		*sp = SidenotePositionLeft // Default fallback | ||||
| 	} | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| type Letter struct { | ||||
| 	XMLName xml.Name `xml:"letterText"` | ||||
| 	Letter  int | ||||
| 	Pages   []Page | ||||
| } | ||||
|  | ||||
| type Page struct { | ||||
| 	No        int | ||||
| 	Letter    int | ||||
| 	Sidenotes []Sidenote | ||||
| 	Hands     []int | ||||
| 	Tokens    []xml.Token | ||||
| 	CharData  string | ||||
| } | ||||
|  | ||||
| type Sidenote struct { | ||||
| 	XMLName    xml.Name | ||||
| 	Position   SidenotePosition | ||||
| 	Page       int | ||||
| 	Annotation string | ||||
| 	Anchor     int | ||||
| 	Tokens     []xml.Token | ||||
| 	CharData   string | ||||
| } | ||||
|  | ||||
| type Char struct { | ||||
| 	Stack []xml.Token | ||||
| 	Value string | ||||
| } | ||||
|  | ||||
| func (c Char) String() string { | ||||
| 	return c.Value | ||||
| } | ||||
|  | ||||
| func (lt *Letter) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error { | ||||
| 	lt.XMLName = start.Name | ||||
| 	for _, attr := range start.Attr { | ||||
| 		if attr.Name.Local == "letter" { | ||||
| 			if letterNum, err := strconv.Atoi(attr.Value); err == nil { | ||||
| 				lt.Letter = letterNum | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	if err := lt.parseTokens(d); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
|  | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| func (lt *Letter) parseTokens(d *xml.Decoder) error { | ||||
| 	stack := []xml.Token{} | ||||
| 	var c_page *Page = nil | ||||
|  | ||||
| 	for { | ||||
| 		token, err := d.Token() | ||||
| 		if err == io.EOF { | ||||
| 			break | ||||
| 		} | ||||
| 		if err != nil { | ||||
| 			return err | ||||
| 		} | ||||
|  | ||||
| 		// INFO: Make a copy of the token since Token() reuses the underlying data | ||||
| 		tokenCopy := xml.CopyToken(token) | ||||
| 		if c_page != nil { | ||||
| 			c_page.Tokens = append(c_page.Tokens, tokenCopy) | ||||
| 		} | ||||
|  | ||||
| 		switch t := tokenCopy.(type) { | ||||
| 		case xml.StartElement: | ||||
| 			switch t.Name.Local { | ||||
| 			case "page": | ||||
| 				if c_page != nil { | ||||
| 					lt.Pages = append(lt.Pages, *c_page) | ||||
| 				} | ||||
|  | ||||
| 				c_page = &Page{} | ||||
|  | ||||
| 				for _, attr := range t.Attr { | ||||
| 					if attr.Name.Local == "index" { | ||||
| 						if idx, err := strconv.Atoi(attr.Value); err == nil { | ||||
| 							c_page.No = idx | ||||
| 						} | ||||
| 					} | ||||
| 				} | ||||
|  | ||||
| 				d.Skip() | ||||
|  | ||||
| 			// WARNING: UnmarshalXML continues and changes the state of the parser | ||||
| 			case "sidenote": | ||||
| 				var sidenote Sidenote = Sidenote{ | ||||
| 					Anchor: len(c_page.Tokens), | ||||
| 				} | ||||
| 				if err := sidenote.UnmarshalXML(d, t); err == nil && c_page != nil { | ||||
| 					c_page.Sidenotes = append(c_page.Sidenotes, sidenote) | ||||
| 				} | ||||
|  | ||||
| 			// INFO: We create a list of all hand in a letter | ||||
| 			case "hand": | ||||
| 				for _, attr := range t.Attr { | ||||
| 					if attr.Name.Local == "ref" && c_page != nil { | ||||
| 						if ref, err := strconv.Atoi(attr.Value); err == nil { | ||||
| 							c_page.Hands = append(c_page.Hands, ref) | ||||
| 						} | ||||
| 					} | ||||
| 				} | ||||
| 				fallthrough | ||||
|  | ||||
| 			default: | ||||
| 				if c_page != nil { | ||||
| 					c_page.Tokens = append(c_page.Tokens, tokenCopy) | ||||
| 				} | ||||
| 			} | ||||
|  | ||||
| 		case xml.CharData: | ||||
| 			if c_page != nil { | ||||
| 				c_page.CharData = string(t) | ||||
| 				c_page.Tokens = append(c_page.Tokens, tokenCopy) | ||||
| 			} | ||||
|  | ||||
| 		case xml.EndElement: | ||||
| 			if t.Name.Local == "letterText" { | ||||
| 				if c_page != nil { | ||||
| 					lt.Pages = append(lt.Pages, *c_page) | ||||
| 				} | ||||
| 				return nil | ||||
| 			} | ||||
|  | ||||
| 			if c_page != nil { | ||||
| 				c_page.Tokens = append(c_page.Tokens, tokenCopy) | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| func (s *Sidenote) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error { | ||||
| 	// Set the XMLName | ||||
| 	s.XMLName = start.Name | ||||
|  | ||||
| 	// Parse attributes | ||||
| 	for _, attr := range start.Attr { | ||||
| 		switch attr.Name.Local { | ||||
| 		case "pos": | ||||
| 			s.Position.UnmarshalXMLAttr(attr) | ||||
| 		case "page": | ||||
| 			if page, err := strconv.Atoi(attr.Value); err == nil { | ||||
| 				s.Page = page | ||||
| 			} | ||||
| 		case "annotation": | ||||
| 			s.Annotation = attr.Value | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	// Collect all content tokens | ||||
| 	for { | ||||
| 		token, err := d.Token() | ||||
| 		if err != nil { | ||||
| 			return err | ||||
| 		} | ||||
|  | ||||
| 		tokenCopy := xml.CopyToken(token) | ||||
|  | ||||
| 		switch t := tokenCopy.(type) { | ||||
| 		case xml.EndElement: | ||||
| 			if t.Name.Local == start.Name.Local { | ||||
| 				// End of sidenote element | ||||
| 				return nil | ||||
| 			} | ||||
| 			// Add the end element token to content | ||||
| 			s.Content = append(s.Content, tokenCopy) | ||||
| 		case xml.StartElement, xml.CharData, xml.Comment, xml.ProcInst: | ||||
| 			// Add all other tokens to content | ||||
| 			s.Content = append(s.Content, tokenCopy) | ||||
| 		} | ||||
| 	} | ||||
| } | ||||
|   | ||||
							
								
								
									
										446
									
								
								xmlmodels/letter_test.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										446
									
								
								xmlmodels/letter_test.go
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,446 @@ | ||||
| package xmlmodels | ||||
|  | ||||
| import ( | ||||
| 	"encoding/xml" | ||||
| 	"strings" | ||||
| 	"testing" | ||||
| ) | ||||
|  | ||||
| // Helper function to convert []xml.Token back to string for testing | ||||
| func tokensToString(tokens []xml.Token) string { | ||||
| 	var sb strings.Builder | ||||
| 	for _, token := range tokens { | ||||
| 		switch t := token.(type) { | ||||
| 		case xml.StartElement: | ||||
| 			sb.WriteString("<") | ||||
| 			sb.WriteString(t.Name.Local) | ||||
| 			for _, attr := range t.Attr { | ||||
| 				sb.WriteString(" ") | ||||
| 				sb.WriteString(attr.Name.Local) | ||||
| 				sb.WriteString(`="`) | ||||
| 				sb.WriteString(attr.Value) | ||||
| 				sb.WriteString(`"`) | ||||
| 			} | ||||
| 			sb.WriteString(">") | ||||
| 		case xml.EndElement: | ||||
| 			sb.WriteString("</") | ||||
| 			sb.WriteString(t.Name.Local) | ||||
| 			sb.WriteString(">") | ||||
| 		case xml.CharData: | ||||
| 			sb.Write(t) | ||||
| 		case xml.Comment: | ||||
| 			sb.WriteString("<!--") | ||||
| 			sb.Write(t) | ||||
| 			sb.WriteString("-->") | ||||
| 		case xml.ProcInst: | ||||
| 			sb.WriteString("<?") | ||||
| 			sb.WriteString(t.Target) | ||||
| 			if len(t.Inst) > 0 { | ||||
| 				sb.WriteString(" ") | ||||
| 				sb.Write(t.Inst) | ||||
| 			} | ||||
| 			sb.WriteString("?>") | ||||
| 		} | ||||
| 	} | ||||
| 	return sb.String() | ||||
| } | ||||
|  | ||||
| func TestLetterTextUnmarshal_SimpleCase(t *testing.T) { | ||||
| 	// Simple test case with basic structure | ||||
| 	testXML := `<letterText xmlns="https://lenz-archiv.de" letter="123"> | ||||
| 		Some content before first page break. | ||||
| 		<page index="1"/> | ||||
| 		Content on page 1 with <aq>some markup</aq> and more text. | ||||
| 		<sidenote pos="right" page="1" annotation="test">This is a sidenote</sidenote> | ||||
| 		More content on page 1. | ||||
| 		<page index="2"/> | ||||
| 		Content on page 2 with <b>bold text</b>. | ||||
| 		<hand ref="42">Hand reference content</hand> | ||||
| 		Final content on page 2. | ||||
| 	</letterText>` | ||||
|  | ||||
| 	var letterText LetterText | ||||
| 	err := xml.Unmarshal([]byte(testXML), &letterText) | ||||
| 	if err != nil { | ||||
| 		t.Fatalf("Error unmarshaling XML: %v", err) | ||||
| 	} | ||||
|  | ||||
| 	// Verify basic structure | ||||
| 	if len(letterText.Pages) != 3 { | ||||
| 		t.Errorf("Expected 3 pages, got %d", len(letterText.Pages)) | ||||
| 	} | ||||
| 	if len(letterText.PageBreaks) != 2 { | ||||
| 		t.Errorf("Expected 2 page breaks, got %d", len(letterText.PageBreaks)) | ||||
| 	} | ||||
| 	if len(letterText.Sidenotes) != 1 { | ||||
| 		t.Errorf("Expected 1 sidenote, got %d", len(letterText.Sidenotes)) | ||||
| 	} | ||||
| 	if letterText.Hands.Reference != 42 { | ||||
| 		t.Errorf("Expected hand reference 42, got %d", letterText.Hands.Reference) | ||||
| 	} | ||||
|  | ||||
| 	// Verify page breaks | ||||
| 	if letterText.PageBreaks[0].Index != 1 { | ||||
| 		t.Errorf("Expected page break index 1, got %d", letterText.PageBreaks[0].Index) | ||||
| 	} | ||||
| 	if letterText.PageBreaks[1].Index != 2 { | ||||
| 		t.Errorf("Expected page break index 2, got %d", letterText.PageBreaks[1].Index) | ||||
| 	} | ||||
|  | ||||
| 	// Verify sidenote | ||||
| 	sidenote := letterText.Sidenotes[0] | ||||
| 	if sidenote.Page != 1 { | ||||
| 		t.Errorf("Expected sidenote on page 1, got %d", sidenote.Page) | ||||
| 	} | ||||
| 	if sidenote.Position != SidenotePositionRight { | ||||
| 		t.Errorf("Expected sidenote position right, got %d", sidenote.Position) | ||||
| 	} | ||||
| 	if sidenote.Annotation != "test" { | ||||
| 		t.Errorf("Expected sidenote annotation 'test', got '%s'", sidenote.Annotation) | ||||
| 	} | ||||
| 	sidenoteContent := tokensToString(sidenote.Content) | ||||
| 	if !strings.Contains(sidenoteContent, "This is a sidenote") { | ||||
| 		t.Errorf("Expected sidenote content to contain 'This is a sidenote', got '%s'", sidenoteContent) | ||||
| 	} | ||||
|  | ||||
| 	// Verify page content doesn't contain sidenote text | ||||
| 	for _, page := range letterText.Pages { | ||||
| 		content := tokensToString(page.Content) | ||||
| 		if strings.Contains(content, "This is a sidenote") { | ||||
| 			t.Errorf("Page content should not contain sidenote text, but page %d does: %s", page.Page, content) | ||||
| 		} | ||||
| 	} | ||||
| } | ||||
|  | ||||
| func TestLetterTextUnmarshal_RealExample_Letter1(t *testing.T) { | ||||
| 	// Real example from briefe.xml - Letter 1 (simplified) | ||||
| 	testXML := `<letterText xmlns="https://lenz-archiv.de" letter="1"> | ||||
| <page index="1" /><align pos="right">HochEdelgeborner Hochgelahrter Herr <aq>Secretair</aq></align> | ||||
| <line type="break" tab="7" /><align pos="right">Verehrungswürdigster Gönner!</align> | ||||
| <line type="empty" /> | ||||
| Ew. HochEdelgebh: haben mich durch die neue Probe von Dero schätzbaren Gewogenheit ausserorndtlich beschämt. Meine Feder ist zu schwach, Denenselben die regen Empfindungen meines Herzens darüber zu schildern. | ||||
| <page index="2" />lasse mich noch lange das Glück genießen, Dieselben in dem blühendsten Wohlstande zu sehen, und mich mit dem erkenntlichsten Herzen nennen zu dürfen | ||||
| <line type="empty" /> | ||||
| <line type="break" /><align pos="right">Hoch Edelgeborner Hochgelahrter Herr <aq>Secretair</aq> | ||||
| <line type="break" tab="7" />Verehrungswürdigster Gönner | ||||
| <line type="break" tab="7" />Ew. HochEdelgebh:</align> | ||||
| Von Hause, d. 2 Jenner, 1765. | ||||
| <align pos="right">gehorsamsten Diener | ||||
| <line type="break" />Jacob Michael Reinhold Lenz</align> | ||||
| </letterText>` | ||||
|  | ||||
| 	var letterText LetterText | ||||
| 	err := xml.Unmarshal([]byte(testXML), &letterText) | ||||
| 	if err != nil { | ||||
| 		t.Fatalf("Error unmarshaling real XML: %v", err) | ||||
| 	} | ||||
|  | ||||
| 	// Should have 2 pages | ||||
| 	if len(letterText.Pages) != 2 { | ||||
| 		t.Errorf("Expected 2 pages, got %d", len(letterText.Pages)) | ||||
| 	} | ||||
| 	if len(letterText.PageBreaks) != 2 { | ||||
| 		t.Errorf("Expected 2 page breaks, got %d", len(letterText.PageBreaks)) | ||||
| 	} | ||||
|  | ||||
| 	// Verify page content contains expected elements | ||||
| 	page1Found := false | ||||
| 	page2Found := false | ||||
| 	for _, page := range letterText.Pages { | ||||
| 		content := tokensToString(page.Content) | ||||
| 		if page.Page == 1 && strings.Contains(content, "HochEdelgeborner") { | ||||
| 			page1Found = true | ||||
| 		} | ||||
| 		if page.Page == 2 && strings.Contains(content, "Jacob Michael Reinhold Lenz") { | ||||
| 			page2Found = true | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	if !page1Found { | ||||
| 		t.Error("Page 1 content not found correctly") | ||||
| 	} | ||||
| 	if !page2Found { | ||||
| 		t.Error("Page 2 content not found correctly") | ||||
| 	} | ||||
| } | ||||
|  | ||||
| func TestLetterTextUnmarshal_WithSidenotes(t *testing.T) { | ||||
| 	// Real example with sidenotes from briefe.xml | ||||
| 	testXML := `<letterText xmlns="https://lenz-archiv.de" letter="2"> | ||||
| <page index="1" />Some text before sidenote. | ||||
| <sidenote pos="left" page="1" annotation=" am linken Rand der zweiten Seite, vertikal">Ich umarme Dich und küsse Dich 1000mahl als Dein | ||||
| <line type="break" />allergetreuester Bruder | ||||
| <line type="break" />Jacob Michael Reinhold Lenz. | ||||
| <line type="break" />Dorpat den 11ten October 1767.</sidenote> | ||||
| More text after sidenote. | ||||
| </letterText>` | ||||
|  | ||||
| 	var letterText LetterText | ||||
| 	err := xml.Unmarshal([]byte(testXML), &letterText) | ||||
| 	if err != nil { | ||||
| 		t.Fatalf("Error unmarshaling sidenote XML: %v", err) | ||||
| 	} | ||||
|  | ||||
| 	// Should have 1 sidenote | ||||
| 	if len(letterText.Sidenotes) != 1 { | ||||
| 		t.Errorf("Expected 1 sidenote, got %d", len(letterText.Sidenotes)) | ||||
| 	} | ||||
|  | ||||
| 	// Verify sidenote details | ||||
| 	sidenote := letterText.Sidenotes[0] | ||||
| 	if sidenote.Position != SidenotePositionLeft { | ||||
| 		t.Errorf("Expected sidenote position left, got %d", sidenote.Position) | ||||
| 	} | ||||
| 	if sidenote.Page != 1 { | ||||
| 		t.Errorf("Expected sidenote on page 1, got %d", sidenote.Page) | ||||
| 	} | ||||
| 	if !strings.Contains(sidenote.Annotation, "am linken Rand") { | ||||
| 		t.Errorf("Expected sidenote annotation to contain 'am linken Rand', got '%s'", sidenote.Annotation) | ||||
| 	} | ||||
| 	sidenoteContent := tokensToString(sidenote.Content) | ||||
| 	if !strings.Contains(sidenoteContent, "Jacob Michael Reinhold Lenz") { | ||||
| 		t.Errorf("Expected sidenote content to contain author name, got '%s'", sidenoteContent) | ||||
| 	} | ||||
|  | ||||
| 	// Verify page content doesn't contain sidenote | ||||
| 	for _, page := range letterText.Pages { | ||||
| 		content := tokensToString(page.Content) | ||||
| 		if strings.Contains(content, "allergetreuester Bruder") { | ||||
| 			t.Errorf("Page content should not contain sidenote text, but page %d does", page.Page) | ||||
| 		} | ||||
| 	} | ||||
| } | ||||
|  | ||||
| func TestLetterTextUnmarshal_ComplexSidenotePositions(t *testing.T) { | ||||
| 	// Test different sidenote positions | ||||
| 	testXML := `<letterText xmlns="https://lenz-archiv.de" letter="3"> | ||||
| <page index="1" /> | ||||
| <sidenote pos="top right" page="1" annotation="test1">Top right sidenote</sidenote> | ||||
| <sidenote pos="bottom left" page="1" annotation="test2">Bottom left sidenote</sidenote> | ||||
| <sidenote pos="top" page="1" annotation="test3">Top sidenote</sidenote> | ||||
| Some content. | ||||
| </letterText>` | ||||
|  | ||||
| 	var letterText LetterText | ||||
| 	err := xml.Unmarshal([]byte(testXML), &letterText) | ||||
| 	if err != nil { | ||||
| 		t.Fatalf("Error unmarshaling complex sidenotes XML: %v", err) | ||||
| 	} | ||||
|  | ||||
| 	if len(letterText.Sidenotes) != 3 { | ||||
| 		t.Fatalf("Expected 3 sidenotes, got %d", len(letterText.Sidenotes)) | ||||
| 	} | ||||
|  | ||||
| 	// Check position parsing | ||||
| 	positions := make(map[SidenotePosition]bool) | ||||
| 	for _, sidenote := range letterText.Sidenotes { | ||||
| 		positions[sidenote.Position] = true | ||||
| 	} | ||||
|  | ||||
| 	expectedPositions := []SidenotePosition{ | ||||
| 		SidenotePositionTopRight, | ||||
| 		SidenotePositionBottomLeft, | ||||
| 		SidenotePositionTop, | ||||
| 	} | ||||
|  | ||||
| 	for _, expected := range expectedPositions { | ||||
| 		if !positions[expected] { | ||||
| 			t.Errorf("Expected to find sidenote position %d, but didn't", expected) | ||||
| 		} | ||||
| 	} | ||||
| } | ||||
|  | ||||
| func TestLetterTextUnmarshal_NoPageBreaks(t *testing.T) { | ||||
| 	// Test letter without explicit page breaks | ||||
| 	testXML := `<letterText xmlns="https://lenz-archiv.de" letter="4"> | ||||
| This is all content on the default page. | ||||
| <aq>Some markup</aq> and more text. | ||||
| <sidenote pos="right" page="1" annotation="single page note">Note on single page</sidenote> | ||||
| Final text. | ||||
| </letterText>` | ||||
|  | ||||
| 	var letterText LetterText | ||||
| 	err := xml.Unmarshal([]byte(testXML), &letterText) | ||||
| 	if err != nil { | ||||
| 		t.Fatalf("Error unmarshaling no-page-break XML: %v", err) | ||||
| 	} | ||||
|  | ||||
| 	// Should have 1 page (default page 1) | ||||
| 	if len(letterText.Pages) != 1 { | ||||
| 		t.Errorf("Expected 1 page, got %d", len(letterText.Pages)) | ||||
| 	} | ||||
| 	if len(letterText.PageBreaks) != 0 { | ||||
| 		t.Errorf("Expected 0 page breaks, got %d", len(letterText.PageBreaks)) | ||||
| 	} | ||||
|  | ||||
| 	// Page should be page 1 | ||||
| 	if letterText.Pages[0].Page != 1 { | ||||
| 		t.Errorf("Expected page 1, got page %d", letterText.Pages[0].Page) | ||||
| 	} | ||||
|  | ||||
| 	// Content should contain markup but not sidenote | ||||
| 	content := tokensToString(letterText.Pages[0].Content) | ||||
| 	if !strings.Contains(content, "<aq>Some markup</aq>") { | ||||
| 		t.Error("Expected page content to contain markup") | ||||
| 	} | ||||
| 	if strings.Contains(content, "Note on single page") { | ||||
| 		t.Error("Page content should not contain sidenote text") | ||||
| 	} | ||||
| } | ||||
|  | ||||
| func TestLetterTextUnmarshal_EmptyContent(t *testing.T) { | ||||
| 	// Test edge case with empty content | ||||
| 	testXML := `<letterText xmlns="https://lenz-archiv.de" letter="5"> | ||||
| <page index="1" /> | ||||
| </letterText>` | ||||
|  | ||||
| 	var letterText LetterText | ||||
| 	err := xml.Unmarshal([]byte(testXML), &letterText) | ||||
| 	if err != nil { | ||||
| 		t.Fatalf("Error unmarshaling empty XML: %v", err) | ||||
| 	} | ||||
|  | ||||
| 	// Should have no pages with content | ||||
| 	if len(letterText.Pages) != 0 { | ||||
| 		t.Errorf("Expected 0 pages with content, got %d", len(letterText.Pages)) | ||||
| 	} | ||||
| 	if len(letterText.PageBreaks) != 1 { | ||||
| 		t.Errorf("Expected 1 page break, got %d", len(letterText.PageBreaks)) | ||||
| 	} | ||||
| } | ||||
|  | ||||
| func TestSidenotePosition_UnmarshalXMLAttr(t *testing.T) { | ||||
| 	tests := []struct { | ||||
| 		input    string | ||||
| 		expected SidenotePosition | ||||
| 	}{ | ||||
| 		{"left", SidenotePositionLeft}, | ||||
| 		{"right", SidenotePositionRight}, | ||||
| 		{"top", SidenotePositionTop}, | ||||
| 		{"top left", SidenotePositionTopLeft}, | ||||
| 		{"top right", SidenotePositionTopRight}, | ||||
| 		{"bottom", SidenotePositionBottom}, | ||||
| 		{"bottom left", SidenotePositionBottomLeft}, | ||||
| 		{"bottom right", SidenotePositionBottomRight}, | ||||
| 		{"unknown", SidenotePositionLeft}, // Default fallback | ||||
| 	} | ||||
|  | ||||
| 	for _, test := range tests { | ||||
| 		var pos SidenotePosition | ||||
| 		attr := xml.Attr{Value: test.input} | ||||
| 		err := pos.UnmarshalXMLAttr(attr) | ||||
| 		if err != nil { | ||||
| 			t.Errorf("Error unmarshaling position '%s': %v", test.input, err) | ||||
| 		} | ||||
| 		if pos != test.expected { | ||||
| 			t.Errorf("Expected position %d for input '%s', got %d", test.expected, test.input, pos) | ||||
| 		} | ||||
| 	} | ||||
| } | ||||
|  | ||||
| func TestLetterTextUnmarshal_PreserveMarkup(t *testing.T) { | ||||
| 	// Test that various markup elements are preserved in page content | ||||
| 	testXML := `<letterText xmlns="https://lenz-archiv.de" letter="6"> | ||||
| <page index="1" /> | ||||
| Text with <aq>antiqua</aq> and <b>bold</b> and <it>italic</it>. | ||||
| <line type="break" tab="5" /> | ||||
| <align pos="center">Centered text</align> | ||||
| <ul>Underlined text</ul> | ||||
| <del>Deleted text</del> | ||||
| More content with <pe>person reference</pe>. | ||||
| </letterText>` | ||||
|  | ||||
| 	var letterText LetterText | ||||
| 	err := xml.Unmarshal([]byte(testXML), &letterText) | ||||
| 	if err != nil { | ||||
| 		t.Fatalf("Error unmarshaling markup XML: %v", err) | ||||
| 	} | ||||
|  | ||||
| 	if len(letterText.Pages) != 1 { | ||||
| 		t.Fatalf("Expected 1 page, got %d", len(letterText.Pages)) | ||||
| 	} | ||||
|  | ||||
| 	content := tokensToString(letterText.Pages[0].Content) | ||||
| 	expectedMarkup := []string{ | ||||
| 		"<aq>antiqua</aq>", | ||||
| 		"<b>bold</b>", | ||||
| 		"<it>italic</it>", | ||||
| 		"<line type=\"break\" tab=\"5\">", | ||||
| 		"<align pos=\"center\">", | ||||
| 		"<ul>Underlined text</ul>", | ||||
| 		"<del>Deleted text</del>", | ||||
| 		"<pe>person reference</pe>", | ||||
| 	} | ||||
|  | ||||
| 	for _, markup := range expectedMarkup { | ||||
| 		if !strings.Contains(content, markup) { | ||||
| 			t.Errorf("Expected page content to contain '%s', but it doesn't. Content: %s", markup, content) | ||||
| 		} | ||||
| 	} | ||||
| } | ||||
|  | ||||
| func TestLetterTextUnmarshal_LetterAttribute(t *testing.T) { | ||||
| 	// Test that the letter attribute is parsed correctly | ||||
| 	testXML := `<letterText xmlns="https://lenz-archiv.de" letter="42"> | ||||
| <page index="1" /> | ||||
| Some content. | ||||
| </letterText>` | ||||
|  | ||||
| 	var letterText LetterText | ||||
| 	err := xml.Unmarshal([]byte(testXML), &letterText) | ||||
| 	if err != nil { | ||||
| 		t.Fatalf("Error unmarshaling letter attribute XML: %v", err) | ||||
| 	} | ||||
|  | ||||
| 	// Verify letter attribute is parsed | ||||
| 	if letterText.Letter != 42 { | ||||
| 		t.Errorf("Expected letter attribute 42, got %d", letterText.Letter) | ||||
| 	} | ||||
| } | ||||
|  | ||||
| func TestLetterTextUnmarshal_LetterAttribute_AllExistingTests(t *testing.T) { | ||||
| 	// Test that existing test cases also have correct letter attributes | ||||
| 	testCases := []struct { | ||||
| 		name           string | ||||
| 		xml            string | ||||
| 		expectedLetter int | ||||
| 	}{ | ||||
| 		{ | ||||
| 			name: "Simple case", | ||||
| 			xml: `<letterText xmlns="https://lenz-archiv.de" letter="123"> | ||||
| 				<page index="1"/>Some content. | ||||
| 			</letterText>`, | ||||
| 			expectedLetter: 123, | ||||
| 		}, | ||||
| 		{ | ||||
| 			name: "Real example letter 1", | ||||
| 			xml: `<letterText xmlns="https://lenz-archiv.de" letter="1"> | ||||
| 				<page index="1" />Some content. | ||||
| 			</letterText>`, | ||||
| 			expectedLetter: 1, | ||||
| 		}, | ||||
| 		{ | ||||
| 			name: "Letter with sidenotes", | ||||
| 			xml: `<letterText xmlns="https://lenz-archiv.de" letter="999"> | ||||
| 				<page index="1" /> | ||||
| 				<sidenote pos="left" page="1" annotation="test">Note</sidenote> | ||||
| 				Content. | ||||
| 			</letterText>`, | ||||
| 			expectedLetter: 999, | ||||
| 		}, | ||||
| 	} | ||||
|  | ||||
| 	for _, tc := range testCases { | ||||
| 		t.Run(tc.name, func(t *testing.T) { | ||||
| 			var letterText LetterText | ||||
| 			err := xml.Unmarshal([]byte(tc.xml), &letterText) | ||||
| 			if err != nil { | ||||
| 				t.Fatalf("Error unmarshaling XML: %v", err) | ||||
| 			} | ||||
|  | ||||
| 			if letterText.Letter != tc.expectedLetter { | ||||
| 				t.Errorf("Expected letter attribute %d, got %d", tc.expectedLetter, letterText.Letter) | ||||
| 			} | ||||
| 		}) | ||||
| 	} | ||||
| } | ||||
		Reference in New Issue
	
	Block a user
	 Simon Martens
					Simon Martens