Files
lenz-web/helpers/functions/textparse.go
Simon Martens 9563145aeb Lots of stuff
2025-06-24 18:20:06 +02:00

247 lines
6.9 KiB
Go

package functions
import (
"math/rand"
"strconv"
"strings"
"github.com/Theodor-Springmann-Stiftung/lenz-web/xmlmodels"
"github.com/Theodor-Springmann-Stiftung/lenz-web/xmlparsing"
)
const charset = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
func RandString(length int) string {
b := make([]byte, length)
for i := range b {
b[i] = charset[rand.Intn(len(charset))]
}
return string(b)
}
type Note struct {
Id string
Tokens Tokens
}
type LenzParseState struct {
Tokens Tokens
Notes []Note
Count []Note
LC int
PC string
CloseElement bool
Break bool
PageBreak bool
LineBreak bool
}
func (s *LenzParseState) String() string {
builder := strings.Builder{}
builder.WriteString(outToken{Name: "div", Classes: []string{"count"}, Type: Element}.String())
for _, c := range s.Count {
builder.WriteString(c.Tokens.String())
}
builder.WriteString(outToken{Name: "div", Classes: []string{"count"}, Type: EndElement}.String())
s.Tokens.Prepend(outToken{Name: "div", Classes: []string{"fulltext"}, Type: Element})
s.Tokens.AppendEndElement()
builder.WriteString(s.Tokens.String())
builder.WriteString(outToken{Name: "div", Classes: []string{"notes"}, Type: Element}.String())
for _, note := range s.Notes {
builder.WriteString(note.Tokens.String())
}
builder.WriteString(outToken{Name: "div", Classes: []string{"notes"}, Type: EndElement}.String())
return builder.String()
}
func (s *LenzParseState) AppendNote(note Note) {
s.Notes = append(s.Notes, note)
}
func ParseText(lib *xmlmodels.Library, meta *xmlmodels.Meta) string {
if lib == nil {
return ""
}
text := lib.Letters.Item(meta.Letter)
if text == nil {
return ""
}
return Parse(lib, meta, text.Content)
}
func TemplateParse(lib *xmlmodels.Library) func(letter *xmlmodels.Meta, s string) string {
return func(letter *xmlmodels.Meta, s string) string {
return Parse(lib, letter, s)
}
}
func Parse(lib *xmlmodels.Library, letter *xmlmodels.Meta, s string) string {
if len(s) == 0 {
return ""
}
ps := LenzParseState{CloseElement: true, PC: "1"}
parser := xmlparsing.NewParser(s)
for elem, err := range parser.Iterate() {
if err != nil {
return err.Error()
}
if elem.Type < 3 {
if elem.Type == xmlparsing.EndElement {
if elem.Name == "sidenote" {
ps.LineBreak = true
}
if ps.CloseElement {
ps.Tokens.AppendEndElement()
} else {
ps.CloseElement = true
}
continue
}
switch elem.Name {
case "insertion":
ps.Tokens.AppendDefaultElement(elem)
ps.Tokens.AppendDivElement("", "insertion-marker")
ps.Tokens.AppendEndElement()
case "sidenote":
id := RandString(8)
ps.Tokens.AppendDefaultElement(elem)
ps.Break = false
ps.Tokens.AppendCustomAttribute("aria-describedby", id)
if elem.Attributes["annotation"] != "" ||
elem.Attributes["page"] != "" ||
elem.Attributes["pos"] != "" {
note := Note{Id: id}
note.Tokens.AppendDivElement(id, "note-sidenote-meta")
ps.Tokens.AppendDivElement(id, "inline-sidenote-meta")
if elem.Attributes["page"] != "" {
note.Tokens.AppendDivElement("", "sidenote-page")
note.Tokens.AppendText(elem.Attributes["page"])
note.Tokens.AppendEndElement()
ps.Tokens.AppendDivElement("", "sidenote-page")
ps.Tokens.AppendText(elem.Attributes["page"])
ps.Tokens.AppendEndElement()
}
if elem.Attributes["annotation"] != "" {
note.Tokens.AppendDivElement("", "sidenote-note")
note.Tokens.AppendText(elem.Attributes["annotation"])
note.Tokens.AppendEndElement()
ps.Tokens.AppendDivElement("", "sidenote-note")
ps.Tokens.AppendText(elem.Attributes["annotation"])
ps.Tokens.AppendEndElement()
}
if elem.Attributes["pos"] != "" {
note.Tokens.AppendDivElement("", "sidenote-pos")
note.Tokens.AppendText(elem.Attributes["pos"])
note.Tokens.AppendEndElement()
ps.Tokens.AppendDivElement("", "sidenote-pos")
ps.Tokens.AppendText(elem.Attributes["pos"])
ps.Tokens.AppendEndElement()
}
note.Tokens.AppendEndElement() // sidenote-meta
ps.Tokens.AppendEndElement()
ps.AppendNote(note)
}
case "note":
id := RandString(8)
ps.Tokens.AppendLink("#"+id, "nanchor-note")
ps.Tokens.AppendEndElement()
ps.Tokens.AppendDivElement(id, "note", "note-note")
case "nr":
ext := elem.Attributes["extent"]
if ext == "" {
ext = "1"
}
extno, err := strconv.Atoi(ext)
if err != nil {
extno = 1
}
ps.Tokens.AppendDefaultElement(elem)
for i := 0; i < extno; i++ {
ps.Tokens.AppendText("&nbsp;")
}
case "hand":
id := RandString(8)
idno, err := strconv.Atoi(elem.Attributes["ref"])
var person *xmlmodels.PersonDef
if err == nil {
person = lib.Persons.Item(idno)
}
hand := "N/A"
if person != nil {
hand = person.Name
}
note := Note{Id: id}
note.Tokens.AppendDivElement(id, "note-hand")
note.Tokens.AppendText(hand)
note.Tokens.AppendEndElement()
ps.AppendNote(note)
ps.Tokens.AppendDivElement(id, "inline-hand")
ps.Tokens.AppendText(hand)
ps.Tokens.AppendEndElement()
ps.Tokens.AppendDivElement("", "hand")
ps.Tokens.AppendCustomAttribute("aria-describedby", id)
case "line":
if val := elem.Attributes["type"]; val != "empty" {
ps.LC += 1
if ps.Break {
ps.Tokens.AppendEmptyElement("br", ps.PC+"-"+strconv.Itoa(ps.LC))
}
ps.Tokens.AppendDefaultElement(elem) // This is for indents, must be closed
} else {
ps.Tokens.AppendEmptyElement("br", "", "empty")
ps.CloseElement = false // Here Indents make no sense, so we dont open an element
}
ps.LineBreak = true
case "page":
ps.PC = elem.Attributes["index"]
ps.PageBreak = true
ps.CloseElement = false
default:
if !ps.Break && elem.Type == xmlparsing.CharData && strings.TrimSpace(elem.Data) != "" {
ps.Break = true
}
if ps.PageBreak && ps.PC != "1" && elem.Type == xmlparsing.CharData && strings.TrimSpace(elem.Data) != "" {
ps.PageBreak = false
note := Note{Id: ps.PC}
quality := "outside"
if !ps.LineBreak {
quality = "inside"
}
ps.Tokens.AppendDivElement("", "eanchor-page", "eanchor-page-"+quality)
ps.Tokens.AppendCustomAttribute("aria-describedby", ps.PC)
ps.Tokens.AppendEndElement()
ps.Tokens.AppendDivElement("", "page-counter", "page-"+quality)
ps.Tokens.AppendText(ps.PC)
ps.Tokens.AppendEndElement()
note.Tokens.AppendDivElement(ps.PC, "page", "page-"+quality)
note.Tokens.AppendText(ps.PC)
note.Tokens.AppendEndElement()
ps.Count = append(ps.Count, note)
strings.TrimLeft(elem.Data, " \t\n\r")
}
if ps.LineBreak && elem.Type == xmlparsing.CharData && strings.TrimSpace(elem.Data) != "" {
strings.TrimLeft(elem.Data, " \t\n\r")
ps.LineBreak = false
}
ps.Tokens.AppendDefaultElement(elem)
}
}
}
return ps.String()
}