mirror of
https://github.com/Theodor-Springmann-Stiftung/lenz-web.git
synced 2025-10-29 01:05:32 +00:00
New Letter serialzation
This commit is contained in:
@@ -5,8 +5,47 @@ import (
|
||||
"encoding/xml"
|
||||
"io"
|
||||
"strconv"
|
||||
"strings"
|
||||
)
|
||||
|
||||
const (
|
||||
SidenotePositionLeft SidenotePosition = iota
|
||||
SidenotePositionRight
|
||||
SidenotePositionTop
|
||||
SidenotePositionTopLeft
|
||||
SidenotePositionTopRight
|
||||
SidenotePositionBottom
|
||||
SidenotePositionBottomLeft
|
||||
SidenotePositionBottomRight
|
||||
)
|
||||
|
||||
type Letter struct {
|
||||
XMLName xml.Name `xml:"letterText"`
|
||||
Letter int
|
||||
Pages []Page
|
||||
// TODO: we need to save meta information about the chardata,
|
||||
// but separately, since this array will be used for search
|
||||
CharData string
|
||||
}
|
||||
|
||||
type Page struct {
|
||||
No int
|
||||
Letter int
|
||||
Sidenotes []Sidenote
|
||||
Hands []int
|
||||
Tokens []xml.Token
|
||||
}
|
||||
|
||||
type Sidenote struct {
|
||||
XMLName xml.Name
|
||||
Position SidenotePosition
|
||||
Page int
|
||||
Annotation string
|
||||
Anchor int
|
||||
Tokens []xml.Token
|
||||
CharData string
|
||||
}
|
||||
|
||||
func (l Letter) Keys() []any {
|
||||
return []any{l.Letter}
|
||||
}
|
||||
@@ -25,17 +64,6 @@ func (l Letter) String() string {
|
||||
|
||||
type SidenotePosition uint8
|
||||
|
||||
const (
|
||||
SidenotePositionLeft SidenotePosition = iota
|
||||
SidenotePositionRight
|
||||
SidenotePositionTop
|
||||
SidenotePositionTopLeft
|
||||
SidenotePositionTopRight
|
||||
SidenotePositionBottom
|
||||
SidenotePositionBottomLeft
|
||||
SidenotePositionBottomRight
|
||||
)
|
||||
|
||||
func (sp *SidenotePosition) UnmarshalXMLAttr(attr xml.Attr) error {
|
||||
switch attr.Value {
|
||||
case "left":
|
||||
@@ -60,40 +88,6 @@ func (sp *SidenotePosition) UnmarshalXMLAttr(attr xml.Attr) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
type Letter struct {
|
||||
XMLName xml.Name `xml:"letterText"`
|
||||
Letter int
|
||||
Pages []Page
|
||||
}
|
||||
|
||||
type Page struct {
|
||||
No int
|
||||
Letter int
|
||||
Sidenotes []Sidenote
|
||||
Hands []int
|
||||
Tokens []xml.Token
|
||||
CharData string
|
||||
}
|
||||
|
||||
type Sidenote struct {
|
||||
XMLName xml.Name
|
||||
Position SidenotePosition
|
||||
Page int
|
||||
Annotation string
|
||||
Anchor int
|
||||
Tokens []xml.Token
|
||||
CharData string
|
||||
}
|
||||
|
||||
type Char struct {
|
||||
Stack []xml.Token
|
||||
Value string
|
||||
}
|
||||
|
||||
func (c Char) String() string {
|
||||
return c.Value
|
||||
}
|
||||
|
||||
func (lt *Letter) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error {
|
||||
lt.XMLName = start.Name
|
||||
for _, attr := range start.Attr {
|
||||
@@ -112,7 +106,7 @@ func (lt *Letter) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error {
|
||||
}
|
||||
|
||||
func (lt *Letter) parseTokens(d *xml.Decoder) error {
|
||||
stack := []xml.Token{}
|
||||
b := strings.Builder{}
|
||||
var c_page *Page = nil
|
||||
|
||||
for {
|
||||
@@ -126,9 +120,6 @@ func (lt *Letter) parseTokens(d *xml.Decoder) error {
|
||||
|
||||
// INFO: Make a copy of the token since Token() reuses the underlying data
|
||||
tokenCopy := xml.CopyToken(token)
|
||||
if c_page != nil {
|
||||
c_page.Tokens = append(c_page.Tokens, tokenCopy)
|
||||
}
|
||||
|
||||
switch t := tokenCopy.(type) {
|
||||
case xml.StartElement:
|
||||
@@ -138,7 +129,9 @@ func (lt *Letter) parseTokens(d *xml.Decoder) error {
|
||||
lt.Pages = append(lt.Pages, *c_page)
|
||||
}
|
||||
|
||||
c_page = &Page{}
|
||||
c_page = &Page{
|
||||
Letter: lt.Letter,
|
||||
}
|
||||
|
||||
for _, attr := range t.Attr {
|
||||
if attr.Name.Local == "index" {
|
||||
@@ -152,6 +145,10 @@ func (lt *Letter) parseTokens(d *xml.Decoder) error {
|
||||
|
||||
// WARNING: UnmarshalXML continues and changes the state of the parser
|
||||
case "sidenote":
|
||||
if c_page == nil {
|
||||
d.Skip()
|
||||
continue
|
||||
}
|
||||
var sidenote Sidenote = Sidenote{
|
||||
Anchor: len(c_page.Tokens),
|
||||
}
|
||||
@@ -177,16 +174,18 @@ func (lt *Letter) parseTokens(d *xml.Decoder) error {
|
||||
}
|
||||
|
||||
case xml.CharData:
|
||||
b.WriteString(string(t))
|
||||
if c_page != nil {
|
||||
c_page.CharData = string(t)
|
||||
c_page.Tokens = append(c_page.Tokens, tokenCopy)
|
||||
}
|
||||
|
||||
case xml.EndElement:
|
||||
if t.Name.Local == "letterText" {
|
||||
if c_page != nil {
|
||||
c_page.Tokens = append(c_page.Tokens, tokenCopy)
|
||||
lt.Pages = append(lt.Pages, *c_page)
|
||||
}
|
||||
lt.CharData = b.String()
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -200,10 +199,9 @@ func (lt *Letter) parseTokens(d *xml.Decoder) error {
|
||||
}
|
||||
|
||||
func (s *Sidenote) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error {
|
||||
// Set the XMLName
|
||||
b := strings.Builder{}
|
||||
s.XMLName = start.Name
|
||||
|
||||
// Parse attributes
|
||||
for _, attr := range start.Attr {
|
||||
switch attr.Name.Local {
|
||||
case "pos":
|
||||
@@ -217,7 +215,6 @@ func (s *Sidenote) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error {
|
||||
}
|
||||
}
|
||||
|
||||
// Collect all content tokens
|
||||
for {
|
||||
token, err := d.Token()
|
||||
if err != nil {
|
||||
@@ -227,16 +224,18 @@ func (s *Sidenote) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error {
|
||||
tokenCopy := xml.CopyToken(token)
|
||||
|
||||
switch t := tokenCopy.(type) {
|
||||
case xml.CharData:
|
||||
b.WriteString(string(t))
|
||||
s.Tokens = append(s.Tokens, tokenCopy)
|
||||
// WARNING: this is a problem for sidenotes within sidenotes
|
||||
case xml.EndElement:
|
||||
if t.Name.Local == start.Name.Local {
|
||||
// End of sidenote element
|
||||
s.CharData = b.String()
|
||||
return nil
|
||||
}
|
||||
// Add the end element token to content
|
||||
s.Content = append(s.Content, tokenCopy)
|
||||
case xml.StartElement, xml.CharData, xml.Comment, xml.ProcInst:
|
||||
// Add all other tokens to content
|
||||
s.Content = append(s.Content, tokenCopy)
|
||||
s.Tokens = append(s.Tokens, tokenCopy)
|
||||
default:
|
||||
s.Tokens = append(s.Tokens, tokenCopy)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user