Parsing upon deserialising

This commit is contained in:
Simon Martens
2025-11-14 15:29:51 +01:00
parent a46c171de7
commit 2e251f446f
9 changed files with 633 additions and 331 deletions

View File

@@ -1,223 +0,0 @@
package functions
import (
"strings"
"github.com/Theodor-Springmann-Stiftung/lenz-web/xmlparsing"
)
type outType int
const (
NA outType = iota
Text
Element
EmptyElement
EndElement
)
type outToken struct {
Type outType
Name string
Classes []string
Id string
Value string
Attributes map[string]string
}
func (o outToken) String() string {
switch o.Type {
case Text:
return o.Value
case Element:
builder := strings.Builder{}
builder.WriteString("<")
builder.WriteString(o.Name)
if len(o.Classes) > 0 {
builder.WriteString(" class=\"")
builder.WriteString(strings.Join(o.Classes, " "))
builder.WriteString("\"")
}
if len(o.Id) > 0 {
builder.WriteString(" id=\"")
builder.WriteString(o.Id)
builder.WriteString("\"")
}
if len(o.Attributes) > 0 {
for key, value := range o.Attributes {
builder.WriteString(" ")
builder.WriteString(key)
builder.WriteString("=\"")
builder.WriteString(value)
builder.WriteString("\"")
}
}
builder.WriteString(">")
return builder.String()
case EndElement:
return "</" + o.Name + ">"
case EmptyElement:
builder := strings.Builder{}
builder.WriteString("<")
builder.WriteString(o.Name)
if len(o.Classes) > 0 {
builder.WriteString(" class=\"")
builder.WriteString(strings.Join(o.Classes, " "))
builder.WriteString("\"")
}
if len(o.Id) > 0 {
builder.WriteString(" id=\"")
builder.WriteString(o.Id)
builder.WriteString("\"")
}
if len(o.Attributes) > 0 {
for key, value := range o.Attributes {
builder.WriteString(" ")
builder.WriteString(key)
builder.WriteString("=\"")
builder.WriteString(value)
builder.WriteString("\"")
}
}
builder.WriteString("/>")
return builder.String()
}
return ""
}
func (o *outToken) ClassesFromAttrs(attrs map[string]string) {
if len(attrs) == 0 {
return
}
for key, value := range attrs {
o.Classes = append(o.Classes, key+"-"+value)
}
}
func Default(token *xmlparsing.Token) outToken {
o := outToken{}
switch token.Type {
case xmlparsing.StartElement:
o.Name = "div"
o.Type = Element
o.Classes = []string{token.Name}
o.ClassesFromAttrs(token.Attributes)
case xmlparsing.EndElement:
o.Type = EndElement
case xmlparsing.CharData:
o.Type = Text
o.Value = token.Data
}
return o
}
type Tokens struct {
Out []outToken
}
func (s *Tokens) Prepend(token outToken) {
s.Out = append([]outToken{token}, s.Out...)
}
func (s *Tokens) AppendDefaultElement(token *xmlparsing.Token, ids ...string) {
t := Default(token)
if len(ids) > 0 {
t.Id = ids[0]
}
s.Out = append(s.Out, t)
}
func (s *Tokens) AppendCustomAttribute(name, value string) {
if len(s.Out) == 0 {
return
}
if s.Out[len(s.Out)-1].Attributes == nil {
s.Out[len(s.Out)-1].Attributes = make(map[string]string)
}
s.Out[len(s.Out)-1].Attributes[name] = value
}
func (s *Tokens) AppendElement(name string, id string, classes ...string) {
s.Out = append(s.Out, outToken{
Name: name,
Id: id,
Classes: classes,
Type: Element,
})
}
func (s *Tokens) AppendEndElement() {
skip := 0
for i := len(s.Out) - 1; i >= 0; i-- {
if s.Out[i].Type == EndElement {
skip++
}
if s.Out[i].Type == Element && s.Out[i].Name != "p" && s.Out[i].Name != "br" {
if skip == 0 {
s.Out = append(s.Out, outToken{
Name: s.Out[i].Name,
Type: EndElement,
})
return
} else {
skip--
}
}
}
}
func (s *Tokens) AppendDivElement(id string, classes ...string) {
s.Out = append(s.Out, outToken{
Name: "div",
Id: id,
Classes: classes,
Type: Element,
})
}
func (s *Tokens) AppendEmptyElement(name string, id string, classes ...string) {
s.Out = append(s.Out, outToken{
Name: name,
Id: id,
Classes: classes,
Type: EmptyElement,
})
}
func (s *Tokens) AppendLink(href string, classes ...string) {
s.Out = append(s.Out, outToken{
Name: "a",
Attributes: map[string]string{"href": href},
Classes: classes,
Type: Element,
})
}
func (s *Tokens) AppendText(text string) {
s.Out = append(s.Out, outToken{
Type: Text,
Value: text,
})
}
func (s *Tokens) Append(token outToken) {
s.Out = append(s.Out, token)
}
func (s *Tokens) String() string {
builder := strings.Builder{}
for _, token := range s.Out {
builder.WriteString(token.String())
}
return builder.String()
}

View File

@@ -1,246 +0,0 @@
package functions
import (
"math/rand"
"strconv"
"strings"
"github.com/Theodor-Springmann-Stiftung/lenz-web/xmlmodels"
"github.com/Theodor-Springmann-Stiftung/lenz-web/xmlparsing"
)
const charset = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
func RandString(length int) string {
b := make([]byte, length)
for i := range b {
b[i] = charset[rand.Intn(len(charset))]
}
return string(b)
}
type Note struct {
Id string
Tokens Tokens
}
type LenzParseState struct {
Tokens Tokens
Notes []Note
Count []Note
LC int
PC string
CloseElement bool
Break bool
PageBreak bool
LineBreak bool
}
func (s *LenzParseState) String() string {
builder := strings.Builder{}
builder.WriteString(outToken{Name: "div", Classes: []string{"count"}, Type: Element}.String())
for _, c := range s.Count {
builder.WriteString(c.Tokens.String())
}
builder.WriteString(outToken{Name: "div", Classes: []string{"count"}, Type: EndElement}.String())
s.Tokens.Prepend(outToken{Name: "div", Classes: []string{"fulltext"}, Type: Element})
s.Tokens.AppendEndElement()
builder.WriteString(s.Tokens.String())
builder.WriteString(outToken{Name: "div", Classes: []string{"notes"}, Type: Element}.String())
for _, note := range s.Notes {
builder.WriteString(note.Tokens.String())
}
builder.WriteString(outToken{Name: "div", Classes: []string{"notes"}, Type: EndElement}.String())
return builder.String()
}
func (s *LenzParseState) AppendNote(note Note) {
s.Notes = append(s.Notes, note)
}
func ParseText(lib *xmlmodels.Library, meta *xmlmodels.Meta) string {
if lib == nil {
return ""
}
text := lib.Letters.Item(meta.Letter)
if text == nil {
return ""
}
return Parse(lib, meta, text.Content)
}
func TemplateParse(lib *xmlmodels.Library) func(letter *xmlmodels.Meta, s string) string {
return func(letter *xmlmodels.Meta, s string) string {
return Parse(lib, letter, s)
}
}
func Parse(lib *xmlmodels.Library, letter *xmlmodels.Meta, s string) string {
if len(s) == 0 {
return ""
}
ps := LenzParseState{CloseElement: true, PC: "1"}
parser := xmlparsing.NewParser(s)
for elem, err := range parser.Iterate() {
if err != nil {
return err.Error()
}
if elem.Type < 3 {
if elem.Type == xmlparsing.EndElement {
if elem.Name == "sidenote" {
ps.LineBreak = true
}
if ps.CloseElement {
ps.Tokens.AppendEndElement()
} else {
ps.CloseElement = true
}
continue
}
switch elem.Name {
case "insertion":
ps.Tokens.AppendDefaultElement(elem)
ps.Tokens.AppendDivElement("", "insertion-marker")
ps.Tokens.AppendEndElement()
case "sidenote":
id := RandString(8)
ps.Tokens.AppendDefaultElement(elem)
ps.Break = false
ps.Tokens.AppendCustomAttribute("aria-describedby", id)
if elem.Attributes["annotation"] != "" ||
elem.Attributes["page"] != "" ||
elem.Attributes["pos"] != "" {
note := Note{Id: id}
note.Tokens.AppendDivElement(id, "note-sidenote-meta")
ps.Tokens.AppendDivElement(id, "inline-sidenote-meta")
if elem.Attributes["page"] != "" {
note.Tokens.AppendDivElement("", "sidenote-page")
note.Tokens.AppendText(elem.Attributes["page"])
note.Tokens.AppendEndElement()
ps.Tokens.AppendDivElement("", "sidenote-page")
ps.Tokens.AppendText(elem.Attributes["page"])
ps.Tokens.AppendEndElement()
}
if elem.Attributes["annotation"] != "" {
note.Tokens.AppendDivElement("", "sidenote-note")
note.Tokens.AppendText(elem.Attributes["annotation"])
note.Tokens.AppendEndElement()
ps.Tokens.AppendDivElement("", "sidenote-note")
ps.Tokens.AppendText(elem.Attributes["annotation"])
ps.Tokens.AppendEndElement()
}
if elem.Attributes["pos"] != "" {
note.Tokens.AppendDivElement("", "sidenote-pos")
note.Tokens.AppendText(elem.Attributes["pos"])
note.Tokens.AppendEndElement()
ps.Tokens.AppendDivElement("", "sidenote-pos")
ps.Tokens.AppendText(elem.Attributes["pos"])
ps.Tokens.AppendEndElement()
}
note.Tokens.AppendEndElement() // sidenote-meta
ps.Tokens.AppendEndElement()
ps.AppendNote(note)
}
case "note":
id := RandString(8)
ps.Tokens.AppendLink("#"+id, "nanchor-note")
ps.Tokens.AppendEndElement()
ps.Tokens.AppendDivElement(id, "note", "note-note")
case "nr":
ext := elem.Attributes["extent"]
if ext == "" {
ext = "1"
}
extno, err := strconv.Atoi(ext)
if err != nil {
extno = 1
}
ps.Tokens.AppendDefaultElement(elem)
for i := 0; i < extno; i++ {
ps.Tokens.AppendText("&nbsp;")
}
case "hand":
id := RandString(8)
idno, err := strconv.Atoi(elem.Attributes["ref"])
var person *xmlmodels.PersonDef
if err == nil {
person = lib.Persons.Item(idno)
}
hand := "N/A"
if person != nil {
hand = person.Name
}
note := Note{Id: id}
note.Tokens.AppendDivElement(id, "note-hand")
note.Tokens.AppendText(hand)
note.Tokens.AppendEndElement()
ps.AppendNote(note)
ps.Tokens.AppendDivElement(id, "inline-hand")
ps.Tokens.AppendText(hand)
ps.Tokens.AppendEndElement()
ps.Tokens.AppendDivElement("", "hand")
ps.Tokens.AppendCustomAttribute("aria-describedby", id)
case "line":
if val := elem.Attributes["type"]; val != "empty" {
ps.LC += 1
if ps.Break {
ps.Tokens.AppendEmptyElement("br", ps.PC+"-"+strconv.Itoa(ps.LC))
}
ps.Tokens.AppendDefaultElement(elem) // This is for indents, must be closed
} else {
ps.Tokens.AppendEmptyElement("br", "", "empty")
ps.CloseElement = false // Here Indents make no sense, so we dont open an element
}
ps.LineBreak = true
case "page":
ps.PC = elem.Attributes["index"]
ps.PageBreak = true
ps.CloseElement = false
default:
if !ps.Break && elem.Type == xmlparsing.CharData && strings.TrimSpace(elem.Data) != "" {
ps.Break = true
}
if ps.PageBreak && ps.PC != "1" && elem.Type == xmlparsing.CharData && strings.TrimSpace(elem.Data) != "" {
ps.PageBreak = false
note := Note{Id: ps.PC}
quality := "outside"
if !ps.LineBreak {
quality = "inside"
}
ps.Tokens.AppendDivElement("", "eanchor-page", "eanchor-page-"+quality)
ps.Tokens.AppendCustomAttribute("aria-describedby", ps.PC)
ps.Tokens.AppendEndElement()
ps.Tokens.AppendDivElement("", "page-counter", "page-"+quality)
ps.Tokens.AppendText(ps.PC)
ps.Tokens.AppendEndElement()
note.Tokens.AppendDivElement(ps.PC, "page", "page-"+quality)
note.Tokens.AppendText(ps.PC)
note.Tokens.AppendEndElement()
ps.Count = append(ps.Count, note)
strings.TrimLeft(elem.Data, " \t\n\r")
}
if ps.LineBreak && elem.Type == xmlparsing.CharData && strings.TrimSpace(elem.Data) != "" {
strings.TrimLeft(elem.Data, " \t\n\r")
ps.LineBreak = false
}
ps.Tokens.AppendDefaultElement(elem)
}
}
}
return ps.String()
}