Lots of stuff

This commit is contained in:
Simon Martens
2025-06-24 18:20:06 +02:00
parent 3127446dab
commit 9563145aeb
29 changed files with 1694 additions and 1386 deletions

View File

@@ -3,6 +3,7 @@ package controllers
import ( import (
"strconv" "strconv"
"github.com/Theodor-Springmann-Stiftung/lenz-web/helpers/functions"
"github.com/Theodor-Springmann-Stiftung/lenz-web/xmlmodels" "github.com/Theodor-Springmann-Stiftung/lenz-web/xmlmodels"
"github.com/gofiber/fiber/v2" "github.com/gofiber/fiber/v2"
) )
@@ -21,8 +22,8 @@ func GetLetter(c *fiber.Ctx) error {
} }
np := lib.NextPrev(meta) np := lib.NextPrev(meta)
text := lib.Letters.Item(letter) parsed := functions.ParseText(lib, meta)
tradition := lib.Traditions.Item(letter) tradition := lib.Traditions.Item(letter)
return c.Render("/brief/", fiber.Map{"meta": meta, "text": text, "tradition": tradition, "next": np.Next, "prev": np.Prev}) return c.Render("/brief/", fiber.Map{"meta": meta, "text": parsed, "tradition": tradition, "next": np.Next, "prev": np.Prev})
} }

View File

@@ -3,7 +3,7 @@ package functions
import ( import (
"strings" "strings"
xmlparsing "github.com/Theodor-Springmann-Stiftung/lenz-web/xml" "github.com/Theodor-Springmann-Stiftung/lenz-web/xmlparsing"
) )
type outType int type outType int
@@ -101,7 +101,7 @@ func (o *outToken) ClassesFromAttrs(attrs map[string]string) {
} }
} }
func Default(token xmlparsing.Token) outToken { func Default(token *xmlparsing.Token) outToken {
o := outToken{} o := outToken{}
switch token.Type { switch token.Type {
case xmlparsing.StartElement: case xmlparsing.StartElement:
@@ -126,7 +126,7 @@ func (s *Tokens) Prepend(token outToken) {
s.Out = append([]outToken{token}, s.Out...) s.Out = append([]outToken{token}, s.Out...)
} }
func (s *Tokens) AppendDefaultElement(token xmlparsing.Token, ids ...string) { func (s *Tokens) AppendDefaultElement(token *xmlparsing.Token, ids ...string) {
t := Default(token) t := Default(token)
if len(ids) > 0 { if len(ids) > 0 {
t.Id = ids[0] t.Id = ids[0]

View File

@@ -5,8 +5,8 @@ import (
"strconv" "strconv"
"strings" "strings"
xmlparsing "github.com/Theodor-Springmann-Stiftung/lenz-web/xml"
"github.com/Theodor-Springmann-Stiftung/lenz-web/xmlmodels" "github.com/Theodor-Springmann-Stiftung/lenz-web/xmlmodels"
"github.com/Theodor-Springmann-Stiftung/lenz-web/xmlparsing"
) )
const charset = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789" const charset = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
@@ -58,22 +58,41 @@ func (s *LenzParseState) AppendNote(note Note) {
s.Notes = append(s.Notes, note) s.Notes = append(s.Notes, note)
} }
func Parse(lib *xmlmodels.Library) func(s string) string { func ParseText(lib *xmlmodels.Library, meta *xmlmodels.Meta) string {
return func(s string) string { if lib == nil {
return ""
}
text := lib.Letters.Item(meta.Letter)
if text == nil {
return ""
}
return Parse(lib, meta, text.Content)
}
func TemplateParse(lib *xmlmodels.Library) func(letter *xmlmodels.Meta, s string) string {
return func(letter *xmlmodels.Meta, s string) string {
return Parse(lib, letter, s)
}
}
func Parse(lib *xmlmodels.Library, letter *xmlmodels.Meta, s string) string {
if len(s) == 0 { if len(s) == 0 {
return "" return ""
} }
ps := LenzParseState{CloseElement: true, PC: "1"} ps := LenzParseState{CloseElement: true, PC: "1"}
parser := xmlparsing.NewParser(s)
for elem, err := range xmlparsing.Iterate(s, ps) { for elem, err := range parser.Iterate() {
if err != nil { if err != nil {
return err.Error() return err.Error()
} }
if elem.Token.Type < 3 { if elem.Type < 3 {
if elem.Token.Type == xmlparsing.EndElement { if elem.Type == xmlparsing.EndElement {
if elem.Token.Name == "sidenote" { if elem.Name == "sidenote" {
ps.LineBreak = true ps.LineBreak = true
} }
if ps.CloseElement { if ps.CloseElement {
@@ -84,41 +103,44 @@ func Parse(lib *xmlmodels.Library) func(s string) string {
continue continue
} }
switch elem.Token.Name { switch elem.Name {
case "insertion":
ps.Tokens.AppendDefaultElement(elem)
ps.Tokens.AppendDivElement("", "insertion-marker")
ps.Tokens.AppendEndElement()
case "sidenote": case "sidenote":
id := RandString(8) id := RandString(8)
ps.Tokens.AppendDefaultElement(elem.Token) ps.Tokens.AppendDefaultElement(elem)
ps.Break = false ps.Break = false
ps.Tokens.AppendCustomAttribute("aria-describedby", id) ps.Tokens.AppendCustomAttribute("aria-describedby", id)
if elem.Token.Attributes["annotation"] != "" || if elem.Attributes["annotation"] != "" ||
elem.Token.Attributes["page"] != "" || elem.Attributes["page"] != "" ||
elem.Token.Attributes["pos"] != "" { elem.Attributes["pos"] != "" {
note := Note{Id: id} note := Note{Id: id}
note.Tokens.AppendDivElement(id, "note-sidenote-meta") note.Tokens.AppendDivElement(id, "note-sidenote-meta")
ps.Tokens.AppendDivElement(id, "inline-sidenote-meta") ps.Tokens.AppendDivElement(id, "inline-sidenote-meta")
if elem.Token.Attributes["page"] != "" { if elem.Attributes["page"] != "" {
note.Tokens.AppendDivElement("", "sidenote-page") note.Tokens.AppendDivElement("", "sidenote-page")
note.Tokens.AppendText(elem.Token.Attributes["page"]) note.Tokens.AppendText(elem.Attributes["page"])
note.Tokens.AppendEndElement() note.Tokens.AppendEndElement()
ps.Tokens.AppendDivElement("", "sidenote-page") ps.Tokens.AppendDivElement("", "sidenote-page")
ps.Tokens.AppendText(elem.Token.Attributes["page"]) ps.Tokens.AppendText(elem.Attributes["page"])
ps.Tokens.AppendEndElement() ps.Tokens.AppendEndElement()
} }
if elem.Token.Attributes["annotation"] != "" { if elem.Attributes["annotation"] != "" {
note.Tokens.AppendDivElement("", "sidenote-note") note.Tokens.AppendDivElement("", "sidenote-note")
note.Tokens.AppendText(elem.Token.Attributes["annotation"]) note.Tokens.AppendText(elem.Attributes["annotation"])
note.Tokens.AppendEndElement() note.Tokens.AppendEndElement()
ps.Tokens.AppendDivElement("", "sidenote-note") ps.Tokens.AppendDivElement("", "sidenote-note")
ps.Tokens.AppendText(elem.Token.Attributes["annotation"]) ps.Tokens.AppendText(elem.Attributes["annotation"])
ps.Tokens.AppendEndElement() ps.Tokens.AppendEndElement()
} }
if elem.Token.Attributes["pos"] != "" { if elem.Attributes["pos"] != "" {
note.Tokens.AppendDivElement("", "sidenote-pos") note.Tokens.AppendDivElement("", "sidenote-pos")
note.Tokens.AppendText(elem.Token.Attributes["pos"]) note.Tokens.AppendText(elem.Attributes["pos"])
note.Tokens.AppendEndElement() note.Tokens.AppendEndElement()
ps.Tokens.AppendDivElement("", "sidenote-pos") ps.Tokens.AppendDivElement("", "sidenote-pos")
ps.Tokens.AppendText(elem.Token.Attributes["pos"]) ps.Tokens.AppendText(elem.Attributes["pos"])
ps.Tokens.AppendEndElement() ps.Tokens.AppendEndElement()
} }
note.Tokens.AppendEndElement() // sidenote-meta note.Tokens.AppendEndElement() // sidenote-meta
@@ -133,7 +155,7 @@ func Parse(lib *xmlmodels.Library) func(s string) string {
ps.Tokens.AppendDivElement(id, "note", "note-note") ps.Tokens.AppendDivElement(id, "note", "note-note")
case "nr": case "nr":
ext := elem.Token.Attributes["extent"] ext := elem.Attributes["extent"]
if ext == "" { if ext == "" {
ext = "1" ext = "1"
} }
@@ -142,14 +164,14 @@ func Parse(lib *xmlmodels.Library) func(s string) string {
extno = 1 extno = 1
} }
ps.Tokens.AppendDefaultElement(elem.Token) ps.Tokens.AppendDefaultElement(elem)
for i := 0; i < extno; i++ { for i := 0; i < extno; i++ {
ps.Tokens.AppendText("&nbsp;") ps.Tokens.AppendText("&nbsp;")
} }
case "hand": case "hand":
id := elem.Token.Attributes["ref"] id := RandString(8)
idno, err := strconv.Atoi(id) idno, err := strconv.Atoi(elem.Attributes["ref"])
var person *xmlmodels.PersonDef var person *xmlmodels.PersonDef
if err == nil { if err == nil {
person = lib.Persons.Item(idno) person = lib.Persons.Item(idno)
@@ -171,12 +193,12 @@ func Parse(lib *xmlmodels.Library) func(s string) string {
ps.Tokens.AppendCustomAttribute("aria-describedby", id) ps.Tokens.AppendCustomAttribute("aria-describedby", id)
case "line": case "line":
if val := elem.Token.Attributes["type"]; val != "empty" { if val := elem.Attributes["type"]; val != "empty" {
ps.LC += 1 ps.LC += 1
if ps.Break { if ps.Break {
ps.Tokens.AppendEmptyElement("br", ps.PC+"-"+strconv.Itoa(ps.LC)) ps.Tokens.AppendEmptyElement("br", ps.PC+"-"+strconv.Itoa(ps.LC))
} }
ps.Tokens.AppendDefaultElement(elem.Token) // This is for indents, must be closed ps.Tokens.AppendDefaultElement(elem) // This is for indents, must be closed
} else { } else {
ps.Tokens.AppendEmptyElement("br", "", "empty") ps.Tokens.AppendEmptyElement("br", "", "empty")
ps.CloseElement = false // Here Indents make no sense, so we dont open an element ps.CloseElement = false // Here Indents make no sense, so we dont open an element
@@ -184,15 +206,15 @@ func Parse(lib *xmlmodels.Library) func(s string) string {
ps.LineBreak = true ps.LineBreak = true
case "page": case "page":
ps.PC = elem.Token.Attributes["index"] ps.PC = elem.Attributes["index"]
ps.PageBreak = true ps.PageBreak = true
ps.CloseElement = false ps.CloseElement = false
default: default:
if !ps.Break && elem.Token.Type == xmlparsing.CharData && strings.TrimSpace(elem.Token.Data) != "" { if !ps.Break && elem.Type == xmlparsing.CharData && strings.TrimSpace(elem.Data) != "" {
ps.Break = true ps.Break = true
} }
if ps.PageBreak && ps.PC != "1" && elem.Token.Type == xmlparsing.CharData && strings.TrimSpace(elem.Token.Data) != "" { if ps.PageBreak && ps.PC != "1" && elem.Type == xmlparsing.CharData && strings.TrimSpace(elem.Data) != "" {
ps.PageBreak = false ps.PageBreak = false
note := Note{Id: ps.PC} note := Note{Id: ps.PC}
quality := "outside" quality := "outside"
@@ -209,17 +231,16 @@ func Parse(lib *xmlmodels.Library) func(s string) string {
note.Tokens.AppendText(ps.PC) note.Tokens.AppendText(ps.PC)
note.Tokens.AppendEndElement() note.Tokens.AppendEndElement()
ps.Count = append(ps.Count, note) ps.Count = append(ps.Count, note)
strings.TrimLeft(elem.Token.Data, " \t\n\r") strings.TrimLeft(elem.Data, " \t\n\r")
} }
if ps.LineBreak && elem.Token.Type == xmlparsing.CharData && strings.TrimSpace(elem.Token.Data) != "" { if ps.LineBreak && elem.Type == xmlparsing.CharData && strings.TrimSpace(elem.Data) != "" {
strings.TrimLeft(elem.Token.Data, " \t\n\r") strings.TrimLeft(elem.Data, " \t\n\r")
ps.LineBreak = false ps.LineBreak = false
} }
ps.Tokens.AppendDefaultElement(elem.Token) ps.Tokens.AppendDefaultElement(elem)
} }
} }
} }
return ps.String() return ps.String()
} }
}

View File

@@ -53,7 +53,7 @@ func main() {
engine := templating.New(&views.LayoutFS, &views.RoutesFS) engine := templating.New(&views.LayoutFS, &views.RoutesFS)
engine.AddFuncs(lib.FuncMap()) engine.AddFuncs(lib.FuncMap())
engine.AddFunc("ParseGeneric", functions.Parse(lib)) engine.AddFunc("ParseGeneric", functions.TemplateParse(lib))
storage := memory.New(memory.Config{ storage := memory.New(memory.Config{
GCInterval: 24 * time.Hour, GCInterval: 24 * time.Hour,
}) })

File diff suppressed because it is too large Load Diff

File diff suppressed because one or more lines are too long

View File

@@ -30,9 +30,18 @@
</div> </div>
<div class="text flex flex-row print:flex-col"> <div class="text flex flex-row print:flex-col">
{{- Safe (ParseGeneric .text.Content) -}} {{- Safe $model.text -}}
</div> </div>
<div class="traditions mt-12 pt-3 border-t-gray-200 border-t-1 max-w-[90ch] print:border-none"> <div class="traditions mt-12 pt-3 border-t-gray-200 border-t-1 max-w-[90ch] print:border-none">
{{ template "_lettertrad" $model.meta -}} {{ template "_lettertrad" $model.meta -}}
</div> </div>
<script type="module">
// WARNING: We need to wait for the fonts to settle before rendering anything
document.fonts.ready.then(() => {
if (window.alignSidenotes) {
window.alignSidenotes();
}
});
</script>

View File

@@ -37,8 +37,8 @@
{{ else if $i -}} {{ else if $i -}}
, ,
{{ end }} {{ end }}
{{- $person := Person $p.Reference -}} {{- $person := Person $p.Reference }}
{{- $person.Name -}} {{ $person.Name -}}
{{- end -}} {{- end -}}
</div> </div>
<div class="mx-3"> <div class="mx-3">
@@ -57,8 +57,25 @@
{{- if $i -}} {{- if $i -}}
, ,
{{- end -}} {{- end -}}
{{- $person := Person $p.Reference -}} {{- $person := Person $p.Reference }}
{{- $person.Name -}} {{ $person.Name -}}
{{- end -}}
{{ if and $sr.Received.Places (len $sr.Received.Places) }}
{{- range $i, $p := $sr.Received.Places -}}
{{- $place := Place $p.Reference }}
{{- if and $i (eq $i (Minus (len $sr.Received.Places) 1)) -}}
und
{{- end -}}
{{- if $i -}}
,
{{- end -}}
{{- if eq $i 0 -}}
&nbsp;({{- $place.Name -}}
{{- else -}}
{{ $place.Name -}}
{{- end -}}
{{- end -}}
)
{{- end -}} {{- end -}}
</div> </div>
{{- else -}} {{- else -}}

View File

@@ -5,7 +5,7 @@
{{- (App $trad.Reference).Name -}} {{- (App $trad.Reference).Name -}}
</div> </div>
<div class="tradition-text text hyphens-auto font-sans"> <div class="tradition-text text hyphens-auto font-sans">
{{- Safe (ParseGeneric $trad.Content) -}} {{- Safe (ParseGeneric $model $trad.Content) -}}
</div> </div>
</div> </div>
{{- end -}} {{- end -}}

View File

@@ -158,45 +158,10 @@ class ScrollButton extends HTMLElement {
} }
} }
function Startup() { let positionedIntervals = [];
let pagedPreviewer = null;
const positionedIntervals = [];
// INFO: Generate a print preview of the page if the URL has ?print=true
if (new URL(window.location).searchParams.get("print") === "true") {
showPreview();
}
// INFO: Listeners for sidenotes
window.addEventListener("load", () => {
alignSidenotes();
});
window.addEventListener("resize", alignSidenotes);
if (htmx) {
window.addEventListener("htmx:afterSettle", (_) => {
alignSidenotes();
});
}
function showPreview() {
if (!pagedPreviewer) {
pagedPreviewer = new Previewer();
}
pagedPreviewer.preview().then(() => {
document.body.classList.add("previewing");
});
// INFO: this is probably not neccessary since we open the preview in a new window
// but just in case.
window.addEventListener("popstate", (_) => {
window.location.reload();
});
}
function alignSidenotes() { function alignSidenotes() {
positionedIntervals = [];
_alignSidenotes(".count", ".page", ".eanchor-page"); _alignSidenotes(".count", ".page", ".eanchor-page");
_alignSidenotes(".notes", ".note-hand", ".hand"); _alignSidenotes(".notes", ".note-hand", ".hand");
_alignSidenotes(".notes", ".note-sidenote-meta", ".sidenote"); _alignSidenotes(".notes", ".note-sidenote-meta", ".sidenote");
@@ -263,9 +228,39 @@ function Startup() {
note.style.visibility = "visible"; note.style.visibility = "visible";
}); });
} }
// INFO: these are global functions that should be executed ONCE when the page loads, not
// on every HTMX request.
function Startup() {
let pagedPreviewer = null;
// INFO: Generate a print preview of the page if the URL has ?print=true
if (new URL(window.location).searchParams.get("print") === "true") {
showPreview();
}
window.addEventListener("resize", alignSidenotes);
function showPreview() {
if (!pagedPreviewer) {
pagedPreviewer = new Previewer();
}
pagedPreviewer.preview().then(() => {
document.body.classList.add("previewing");
});
// INFO: this is probably not neccessary since we open the preview in a new window
// but just in case.
window.addEventListener("popstate", (_) => {
window.location.reload();
});
}
} }
customElements.define(SCROLL_BUTTON_ELEMENT, ScrollButton); customElements.define(SCROLL_BUTTON_ELEMENT, ScrollButton);
customElements.define(TOOLTIP_ELEMENT, ToolTip); customElements.define(TOOLTIP_ELEMENT, ToolTip);
export { XSLTParseProcess, ScrollButton, Previewer, Startup }; window.alignSidenotes = alignSidenotes;
export { XSLTParseProcess, ScrollButton, Previewer, Startup, alignSidenotes };

View File

@@ -109,6 +109,8 @@
.text { .text {
@apply font-serif relative; @apply font-serif relative;
--text-color-rgb: 53, 53, 53;
color: rgb(var(--text-color-rgb));
} }
.text .count { .text .count {
@@ -141,6 +143,8 @@
.text .i, .text .i,
.text .subst, .text .subst,
.text .insertion, .text .insertion,
.text .insertion-marker,
.text .ddel,
.text .del, .text .del,
.text .fn, .text .fn,
.text .anchor { .text .anchor {
@@ -220,7 +224,7 @@
} }
.text .dul { .text .dul {
@apply underline decoration-double; @apply underline decoration-double decoration-[1px];
} }
.text .it { .text .it {
@@ -255,10 +259,35 @@
.text .insertion::after { .text .insertion::after {
@apply text-slate-700; @apply text-slate-700;
margin-left: -0.2em; margin-left: -0.4ch;
content: "⌟"; content: "⌟";
} }
.text .insertion-marker {
@apply text-nowrap;
}
.text .insertion-marker::before {
@apply text-slate-700 text-nowrap text-sm relative bottom-[-0.15rem] -ml-[0.4ch] pr-[0.4ch] inline-block;
}
.text .insertion.pos-left .insertion-marker::before {
content: "🠊";
}
.text .insertion.pos-right .insertion-marker::before {
content: "🠜";
}
.text .insertion.pos-top .insertion-marker::before {
@apply bottom-0 text-xs;
content: "🠟";
}
.text .insertion.pos-bottom .insertion-marker::before {
@apply bottom-0 text-xs;
content: "🠝";
}
.text .nr::before { .text .nr::before {
@apply text-slate-700; @apply text-slate-700;
content: "⸰"; content: "⸰";
@@ -279,26 +308,60 @@
} }
.text .del { .text .del {
@apply line-through; @apply line-through relative;
} }
.text .del .del::before { .text .del .del::before {
content: ""; content: "";
@apply absolute inset-x-0 top-1/2 h-px bg-black; @apply absolute inset-x-0 top-[65%] h-[1px] bg-black w-full;
}
.text .ddel {
@apply line-through relative;
}
.text .ddel::before {
content: "";
@apply absolute inset-x-0 top-[65%] h-[1px] bg-black w-full;
}
.text .ddel {
@apply relative;
}
.text .ddel::before {
top: 55%; top: 55%;
} }
.text .ddel::after {
top: 45%;
}
.text .sidenote { .text .sidenote {
@apply border-l-4 border-slate-200 pl-2 my-4; @apply border-l-4 border-slate-200 pl-2 my-4;
} }
.text .hand { .text .hand {
@apply inline text-blue-950 !font-didone text-[0.9rem]; @apply inline !font-didone text-[0.9rem];
/* darker blue hue */
--text-color-rgb: 0, 0, 39;
color: rgb(var(--text-color-rgb));
} }
.text .er { .text .er {
text-decoration: line-through; background-image: repeating-linear-gradient(
text-decoration-thickness: 17px; -45deg,
rgba(var(--text-color-rgb), 0.5),
transparent 1px,
transparent 6px
);
-webkit-box-decoration-break: clone;
box-decoration-break: clone;
color: transparent;
text-shadow: 0 0 rgb(var(--text-color-rgb));
} }
.text .sidenote-page::before { .text .sidenote-page::before {

View File

@@ -1,132 +0,0 @@
package xmlparsing
import (
"encoding/xml"
"io"
"iter"
"strings"
)
type TokenType int
const (
StartElement TokenType = iota
EndElement
CharData
Comment
ProcInst
Directive
)
type Element struct {
Name string
Attributes map[string]string
CharData string
}
type Token struct {
Name string
Attributes map[string]string
Inner xml.Token
Type TokenType
Data string
}
type TokenResult[T any] struct {
State T
Token Token
Stack []Element
}
func Iterate[T any](xmlData string, initialState T) iter.Seq2[*TokenResult[T], error] {
decoder := xml.NewDecoder(strings.NewReader(xmlData))
stack := []Element{}
state := initialState
return func(yield func(*TokenResult[T], error) bool) {
for {
token, err := decoder.Token()
if err == io.EOF {
return
}
if err != nil {
yield(nil, err)
return
}
var customToken Token
switch t := token.(type) {
case xml.StartElement:
elem := Element{
Name: t.Name.Local,
Attributes: mapAttributes(t.Attr),
CharData: "",
}
stack = append(stack, elem)
customToken = Token{
Name: t.Name.Local,
Attributes: elem.Attributes,
Inner: t,
Type: StartElement,
}
case xml.EndElement:
if len(stack) > 0 {
stack = stack[:len(stack)-1]
}
customToken = Token{Name: t.Name.Local, Inner: t, Type: EndElement}
case xml.CharData:
text := string(t)
if text != "" && len(stack) > 0 {
for i := range stack {
stack[i].CharData += text
}
}
customToken = Token{
Name: "CharData",
Inner: t,
Data: text,
Type: CharData,
}
case xml.Comment:
customToken = Token{
Name: "Comment",
Inner: t,
Data: string(t),
Type: Comment,
}
case xml.ProcInst:
customToken = Token{
Name: t.Target,
Inner: t,
Data: string(t.Inst),
Type: ProcInst,
}
case xml.Directive:
customToken = Token{
Name: "Directive",
Inner: t,
Data: string(t),
Type: Directive,
}
}
result := &TokenResult[T]{
State: state,
Token: customToken,
Stack: stack,
}
if !yield(result, nil) {
return
}
}
}
}
// mapAttributes converts xml.Attr to a map[string]string.
func mapAttributes(attrs []xml.Attr) map[string]string {
attrMap := make(map[string]string)
for _, attr := range attrs {
attrMap[attr.Name.Local] = attr.Value
}
return attrMap
}

View File

@@ -1,6 +1,6 @@
package xmlmodels package xmlmodels
import xmlparsing "github.com/Theodor-Springmann-Stiftung/lenz-web/xml" import "github.com/Theodor-Springmann-Stiftung/lenz-web/xmlparsing"
type RefElement struct { type RefElement struct {
Reference int `xml:"ref,attr"` Reference int `xml:"ref,attr"`

View File

@@ -12,7 +12,7 @@ import (
"sync" "sync"
"time" "time"
xmlparsing "github.com/Theodor-Springmann-Stiftung/lenz-web/xml" "github.com/Theodor-Springmann-Stiftung/lenz-web/xmlparsing"
) )
const ( const (

View File

@@ -6,7 +6,7 @@ import (
"iter" "iter"
"slices" "slices"
xmlparsing "github.com/Theodor-Springmann-Stiftung/lenz-web/xml" "github.com/Theodor-Springmann-Stiftung/lenz-web/xmlparsing"
) )
type Meta struct { type Meta struct {
@@ -82,7 +82,7 @@ func (m Meta) SendRecieved() iter.Seq[SendRecievedPair] {
type Action struct { type Action struct {
Dates []Date `xml:"date"` Dates []Date `xml:"date"`
Places []RefElement `xml:"place"` Places []RefElement `xml:"location"`
Persons []RefElement `xml:"person"` Persons []RefElement `xml:"person"`
} }

View File

@@ -3,7 +3,7 @@ package xmlmodels
import ( import (
"sync" "sync"
xmlparsing "github.com/Theodor-Springmann-Stiftung/lenz-web/xml" "github.com/Theodor-Springmann-Stiftung/lenz-web/xmlparsing"
) )
var lib *Library var lib *Library

210
xmlparsing/parser.go Normal file
View File

@@ -0,0 +1,210 @@
package xmlparsing
import (
"encoding/xml"
"io"
"iter"
"strings"
)
type Parser struct {
Stack []*Token
LastCharData []*Token
pipeline []*Token
decoder *xml.Decoder
}
func NewFromTokens(tokens []*Token) *Parser {
return &Parser{
Stack: make([]*Token, 0, len(tokens)),
LastCharData: make([]*Token, 0, len(tokens)),
pipeline: tokens,
decoder: nil, // No decoder needed for pre-parsed tokens
}
}
func NewParser(xmlData string) *Parser {
return &Parser{
decoder: xml.NewDecoder(strings.NewReader(xmlData)),
}
}
func (p *Parser) GetStack() []*Token {
return p.Stack
}
func (p *Parser) Pipeline() []*Token {
return p.pipeline
}
func (p *Parser) PeekFrom(index int) iter.Seq2[*Token, error] {
if index < 0 || index >= len(p.pipeline) {
return func(yield func(*Token, error) bool) {
yield(nil, nil) // No tokens to yield
return
}
}
return func(yield func(*Token, error) bool) {
for i := index; i < len(p.pipeline); i++ {
if !yield(p.pipeline[i], nil) {
return
}
}
for {
token, err := p.Token()
if err != nil {
yield(nil, err)
return
}
if token == nil {
// EOF
return
}
if !yield(token, nil) {
return
}
}
}
}
func (p *Parser) Reset() {
p.Stack = []*Token{}
}
func (p *Parser) Token() (*Token, error) {
if p.decoder == nil {
return nil, nil // No more tokens to parse
}
start := p.decoder.InputOffset()
token, err := p.decoder.Token()
end := p.decoder.InputOffset()
if err == io.EOF {
return nil, nil
} else if err != nil {
return nil, err
}
var customToken Token = Token{
parser: p,
Index: len(p.pipeline),
Inner: token,
StartOffset: start + 1,
EndOffset: end,
Stack: make([]*Token, len(p.Stack)),
}
// INFO: these are just pointers, so it should go fast
copy(customToken.Stack, p.Stack)
switch t := token.(type) {
case xml.StartElement:
attr := mapAttributes(t.Attr)
customToken.Name = t.Name.Local
customToken.Attributes = attr
customToken.Type = StartElement
if len(p.Stack) > 0 && !p.Stack[len(p.Stack)-1].childrenParsed {
p.Stack[len(p.Stack)-1].children = append(p.Stack[len(p.Stack)-1].children, &customToken)
}
p.Stack = append(p.Stack, &customToken)
case xml.EndElement:
if len(p.Stack) > 0 {
element := p.Stack[len(p.Stack)-1]
element.childrenParsed = true
element.chardataParsed = true
p.Stack = p.Stack[:len(p.Stack)-1]
}
customToken.Name = t.Name.Local
customToken.Attributes = map[string]string{}
customToken.Type = EndElement
case xml.CharData:
text := string(t)
if text != "" && len(p.Stack) > 0 {
for i := range p.Stack {
if !p.Stack[i].chardataParsed {
p.Stack[i].charData += text
}
}
}
customToken.Data = text
customToken.Type = CharData
p.LastCharData = append(p.LastCharData, &customToken)
case xml.Comment:
customToken.Type = Comment
customToken.Data = string(t)
case xml.ProcInst:
customToken.Name = t.Target
customToken.Data = string(t.Inst)
customToken.Type = ProcInst
case xml.Directive:
customToken.Data = string(t)
customToken.Type = Directive
}
p.pipeline = append(p.pipeline, &customToken)
return &customToken, nil
}
func (p *Parser) Previous(index int) (tokens []*Token) {
if index < 0 || index >= len(p.pipeline) {
return
}
return p.pipeline[:index]
}
func (p *Parser) All() ([]*Token, error) {
for _, err := range p.Iterate() {
if err != nil {
return nil, err
}
}
return p.pipeline, nil
}
func (p *Parser) Iterate() iter.Seq2[*Token, error] {
var cursor int
return func(yield func(*Token, error) bool) {
for {
var token *Token
// INFO: cursor should be max. len(p.pipeline)
if cursor >= len(p.pipeline) {
t, err := p.Token()
if err != nil {
yield(nil, err)
return
}
if t == nil {
return // EOF
}
token = t
} else {
token = p.pipeline[cursor]
}
cursor++
if !yield(token, nil) {
return
}
}
}
}
// mapAttributes converts xml.Attr to a map[string]string.
func mapAttributes(attrs []xml.Attr) map[string]string {
attrMap := make(map[string]string)
for _, attr := range attrs {
attrMap[attr.Name.Local] = attr.Value
}
return attrMap
}

126
xmlparsing/token.go Normal file
View File

@@ -0,0 +1,126 @@
package xmlparsing
import (
"encoding/xml"
"iter"
"strings"
)
type TokenType int
const (
StartElement TokenType = iota
EndElement
CharData
Comment
ProcInst
Directive
)
type Token struct {
Name string
Attributes map[string]string
Inner xml.Token
Type TokenType
Data string
Stack []*Token
StartOffset int64
EndOffset int64
Index int
charData string
children []*Token
parser *Parser
childrenParsed bool
chardataParsed bool
}
func (t *Token) String() string {
builder := strings.Builder{}
return builder.String()
}
func (t *Token) Element() (tokens []*Token) {
if t.Type != StartElement {
return
}
for token, err := range t.parser.PeekFrom(t.Index) {
if err != nil || token == nil {
return tokens
}
tokens = append(tokens, token)
if token.Type == EndElement && token.Name == t.Name {
return tokens
}
}
return
}
func (t *Token) Next() iter.Seq2[*Token, error] {
return t.parser.PeekFrom(t.Index)
}
func (t *Token) Previous() (tokens []*Token) {
if t.Index <= 0 {
return
}
return t.parser.Previous(t.Index)
}
func (t *Token) Children() (tokens []*Token) {
if t.childrenParsed {
return t.children
}
tokens = t.Element()
if len(tokens) == 0 {
return
}
for _, token := range tokens {
if token.Type == StartElement {
t.children = append(t.children, token)
}
}
t.childrenParsed = true
return t.children
}
func (t *Token) CharData() string {
if t.Type == CharData || t.Type == ProcInst || t.Type == Comment || t.Type == Directive {
return t.Data
}
if t.chardataParsed {
return t.charData
}
tokens := t.Element()
if len(tokens) == 0 {
return ""
}
var builder strings.Builder
for _, token := range tokens {
if token.Type == CharData {
builder.WriteString(token.Data)
}
}
t.chardataParsed = true
t.charData = builder.String()
return builder.String()
}
func (t *Token) SubParser() *Parser {
if t.Type != StartElement {
return nil
}
tokens := t.Element()
return NewFromTokens(tokens)
}