Lots of stuff

This commit is contained in:
Simon Martens
2025-06-24 18:20:06 +02:00
parent 3127446dab
commit 9563145aeb
29 changed files with 1694 additions and 1386 deletions

View File

@@ -3,6 +3,7 @@ package controllers
import (
"strconv"
"github.com/Theodor-Springmann-Stiftung/lenz-web/helpers/functions"
"github.com/Theodor-Springmann-Stiftung/lenz-web/xmlmodels"
"github.com/gofiber/fiber/v2"
)
@@ -21,8 +22,8 @@ func GetLetter(c *fiber.Ctx) error {
}
np := lib.NextPrev(meta)
text := lib.Letters.Item(letter)
parsed := functions.ParseText(lib, meta)
tradition := lib.Traditions.Item(letter)
return c.Render("/brief/", fiber.Map{"meta": meta, "text": text, "tradition": tradition, "next": np.Next, "prev": np.Prev})
return c.Render("/brief/", fiber.Map{"meta": meta, "text": parsed, "tradition": tradition, "next": np.Next, "prev": np.Prev})
}

View File

@@ -3,7 +3,7 @@ package functions
import (
"strings"
xmlparsing "github.com/Theodor-Springmann-Stiftung/lenz-web/xml"
"github.com/Theodor-Springmann-Stiftung/lenz-web/xmlparsing"
)
type outType int
@@ -101,7 +101,7 @@ func (o *outToken) ClassesFromAttrs(attrs map[string]string) {
}
}
func Default(token xmlparsing.Token) outToken {
func Default(token *xmlparsing.Token) outToken {
o := outToken{}
switch token.Type {
case xmlparsing.StartElement:
@@ -126,7 +126,7 @@ func (s *Tokens) Prepend(token outToken) {
s.Out = append([]outToken{token}, s.Out...)
}
func (s *Tokens) AppendDefaultElement(token xmlparsing.Token, ids ...string) {
func (s *Tokens) AppendDefaultElement(token *xmlparsing.Token, ids ...string) {
t := Default(token)
if len(ids) > 0 {
t.Id = ids[0]

View File

@@ -5,8 +5,8 @@ import (
"strconv"
"strings"
xmlparsing "github.com/Theodor-Springmann-Stiftung/lenz-web/xml"
"github.com/Theodor-Springmann-Stiftung/lenz-web/xmlmodels"
"github.com/Theodor-Springmann-Stiftung/lenz-web/xmlparsing"
)
const charset = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
@@ -58,168 +58,189 @@ func (s *LenzParseState) AppendNote(note Note) {
s.Notes = append(s.Notes, note)
}
func Parse(lib *xmlmodels.Library) func(s string) string {
return func(s string) string {
if len(s) == 0 {
return ""
}
func ParseText(lib *xmlmodels.Library, meta *xmlmodels.Meta) string {
if lib == nil {
return ""
}
ps := LenzParseState{CloseElement: true, PC: "1"}
text := lib.Letters.Item(meta.Letter)
if text == nil {
return ""
}
for elem, err := range xmlparsing.Iterate(s, ps) {
if err != nil {
return err.Error()
}
return Parse(lib, meta, text.Content)
}
if elem.Token.Type < 3 {
if elem.Token.Type == xmlparsing.EndElement {
if elem.Token.Name == "sidenote" {
ps.LineBreak = true
}
if ps.CloseElement {
ps.Tokens.AppendEndElement()
} else {
ps.CloseElement = true
}
continue
}
switch elem.Token.Name {
case "sidenote":
id := RandString(8)
ps.Tokens.AppendDefaultElement(elem.Token)
ps.Break = false
ps.Tokens.AppendCustomAttribute("aria-describedby", id)
if elem.Token.Attributes["annotation"] != "" ||
elem.Token.Attributes["page"] != "" ||
elem.Token.Attributes["pos"] != "" {
note := Note{Id: id}
note.Tokens.AppendDivElement(id, "note-sidenote-meta")
ps.Tokens.AppendDivElement(id, "inline-sidenote-meta")
if elem.Token.Attributes["page"] != "" {
note.Tokens.AppendDivElement("", "sidenote-page")
note.Tokens.AppendText(elem.Token.Attributes["page"])
note.Tokens.AppendEndElement()
ps.Tokens.AppendDivElement("", "sidenote-page")
ps.Tokens.AppendText(elem.Token.Attributes["page"])
ps.Tokens.AppendEndElement()
}
if elem.Token.Attributes["annotation"] != "" {
note.Tokens.AppendDivElement("", "sidenote-note")
note.Tokens.AppendText(elem.Token.Attributes["annotation"])
note.Tokens.AppendEndElement()
ps.Tokens.AppendDivElement("", "sidenote-note")
ps.Tokens.AppendText(elem.Token.Attributes["annotation"])
ps.Tokens.AppendEndElement()
}
if elem.Token.Attributes["pos"] != "" {
note.Tokens.AppendDivElement("", "sidenote-pos")
note.Tokens.AppendText(elem.Token.Attributes["pos"])
note.Tokens.AppendEndElement()
ps.Tokens.AppendDivElement("", "sidenote-pos")
ps.Tokens.AppendText(elem.Token.Attributes["pos"])
ps.Tokens.AppendEndElement()
}
note.Tokens.AppendEndElement() // sidenote-meta
ps.Tokens.AppendEndElement()
ps.AppendNote(note)
}
case "note":
id := RandString(8)
ps.Tokens.AppendLink("#"+id, "nanchor-note")
ps.Tokens.AppendEndElement()
ps.Tokens.AppendDivElement(id, "note", "note-note")
case "nr":
ext := elem.Token.Attributes["extent"]
if ext == "" {
ext = "1"
}
extno, err := strconv.Atoi(ext)
if err != nil {
extno = 1
}
ps.Tokens.AppendDefaultElement(elem.Token)
for i := 0; i < extno; i++ {
ps.Tokens.AppendText("&nbsp;")
}
case "hand":
id := elem.Token.Attributes["ref"]
idno, err := strconv.Atoi(id)
var person *xmlmodels.PersonDef
if err == nil {
person = lib.Persons.Item(idno)
}
hand := "N/A"
if person != nil {
hand = person.Name
}
note := Note{Id: id}
note.Tokens.AppendDivElement(id, "note-hand")
note.Tokens.AppendText(hand)
note.Tokens.AppendEndElement()
ps.AppendNote(note)
ps.Tokens.AppendDivElement(id, "inline-hand")
ps.Tokens.AppendText(hand)
ps.Tokens.AppendEndElement()
ps.Tokens.AppendDivElement("", "hand")
ps.Tokens.AppendCustomAttribute("aria-describedby", id)
case "line":
if val := elem.Token.Attributes["type"]; val != "empty" {
ps.LC += 1
if ps.Break {
ps.Tokens.AppendEmptyElement("br", ps.PC+"-"+strconv.Itoa(ps.LC))
}
ps.Tokens.AppendDefaultElement(elem.Token) // This is for indents, must be closed
} else {
ps.Tokens.AppendEmptyElement("br", "", "empty")
ps.CloseElement = false // Here Indents make no sense, so we dont open an element
}
ps.LineBreak = true
case "page":
ps.PC = elem.Token.Attributes["index"]
ps.PageBreak = true
ps.CloseElement = false
default:
if !ps.Break && elem.Token.Type == xmlparsing.CharData && strings.TrimSpace(elem.Token.Data) != "" {
ps.Break = true
}
if ps.PageBreak && ps.PC != "1" && elem.Token.Type == xmlparsing.CharData && strings.TrimSpace(elem.Token.Data) != "" {
ps.PageBreak = false
note := Note{Id: ps.PC}
quality := "outside"
if !ps.LineBreak {
quality = "inside"
}
ps.Tokens.AppendDivElement("", "eanchor-page", "eanchor-page-"+quality)
ps.Tokens.AppendCustomAttribute("aria-describedby", ps.PC)
ps.Tokens.AppendEndElement()
ps.Tokens.AppendDivElement("", "page-counter", "page-"+quality)
ps.Tokens.AppendText(ps.PC)
ps.Tokens.AppendEndElement()
note.Tokens.AppendDivElement(ps.PC, "page", "page-"+quality)
note.Tokens.AppendText(ps.PC)
note.Tokens.AppendEndElement()
ps.Count = append(ps.Count, note)
strings.TrimLeft(elem.Token.Data, " \t\n\r")
}
if ps.LineBreak && elem.Token.Type == xmlparsing.CharData && strings.TrimSpace(elem.Token.Data) != "" {
strings.TrimLeft(elem.Token.Data, " \t\n\r")
ps.LineBreak = false
}
ps.Tokens.AppendDefaultElement(elem.Token)
}
}
}
return ps.String()
func TemplateParse(lib *xmlmodels.Library) func(letter *xmlmodels.Meta, s string) string {
return func(letter *xmlmodels.Meta, s string) string {
return Parse(lib, letter, s)
}
}
func Parse(lib *xmlmodels.Library, letter *xmlmodels.Meta, s string) string {
if len(s) == 0 {
return ""
}
ps := LenzParseState{CloseElement: true, PC: "1"}
parser := xmlparsing.NewParser(s)
for elem, err := range parser.Iterate() {
if err != nil {
return err.Error()
}
if elem.Type < 3 {
if elem.Type == xmlparsing.EndElement {
if elem.Name == "sidenote" {
ps.LineBreak = true
}
if ps.CloseElement {
ps.Tokens.AppendEndElement()
} else {
ps.CloseElement = true
}
continue
}
switch elem.Name {
case "insertion":
ps.Tokens.AppendDefaultElement(elem)
ps.Tokens.AppendDivElement("", "insertion-marker")
ps.Tokens.AppendEndElement()
case "sidenote":
id := RandString(8)
ps.Tokens.AppendDefaultElement(elem)
ps.Break = false
ps.Tokens.AppendCustomAttribute("aria-describedby", id)
if elem.Attributes["annotation"] != "" ||
elem.Attributes["page"] != "" ||
elem.Attributes["pos"] != "" {
note := Note{Id: id}
note.Tokens.AppendDivElement(id, "note-sidenote-meta")
ps.Tokens.AppendDivElement(id, "inline-sidenote-meta")
if elem.Attributes["page"] != "" {
note.Tokens.AppendDivElement("", "sidenote-page")
note.Tokens.AppendText(elem.Attributes["page"])
note.Tokens.AppendEndElement()
ps.Tokens.AppendDivElement("", "sidenote-page")
ps.Tokens.AppendText(elem.Attributes["page"])
ps.Tokens.AppendEndElement()
}
if elem.Attributes["annotation"] != "" {
note.Tokens.AppendDivElement("", "sidenote-note")
note.Tokens.AppendText(elem.Attributes["annotation"])
note.Tokens.AppendEndElement()
ps.Tokens.AppendDivElement("", "sidenote-note")
ps.Tokens.AppendText(elem.Attributes["annotation"])
ps.Tokens.AppendEndElement()
}
if elem.Attributes["pos"] != "" {
note.Tokens.AppendDivElement("", "sidenote-pos")
note.Tokens.AppendText(elem.Attributes["pos"])
note.Tokens.AppendEndElement()
ps.Tokens.AppendDivElement("", "sidenote-pos")
ps.Tokens.AppendText(elem.Attributes["pos"])
ps.Tokens.AppendEndElement()
}
note.Tokens.AppendEndElement() // sidenote-meta
ps.Tokens.AppendEndElement()
ps.AppendNote(note)
}
case "note":
id := RandString(8)
ps.Tokens.AppendLink("#"+id, "nanchor-note")
ps.Tokens.AppendEndElement()
ps.Tokens.AppendDivElement(id, "note", "note-note")
case "nr":
ext := elem.Attributes["extent"]
if ext == "" {
ext = "1"
}
extno, err := strconv.Atoi(ext)
if err != nil {
extno = 1
}
ps.Tokens.AppendDefaultElement(elem)
for i := 0; i < extno; i++ {
ps.Tokens.AppendText("&nbsp;")
}
case "hand":
id := RandString(8)
idno, err := strconv.Atoi(elem.Attributes["ref"])
var person *xmlmodels.PersonDef
if err == nil {
person = lib.Persons.Item(idno)
}
hand := "N/A"
if person != nil {
hand = person.Name
}
note := Note{Id: id}
note.Tokens.AppendDivElement(id, "note-hand")
note.Tokens.AppendText(hand)
note.Tokens.AppendEndElement()
ps.AppendNote(note)
ps.Tokens.AppendDivElement(id, "inline-hand")
ps.Tokens.AppendText(hand)
ps.Tokens.AppendEndElement()
ps.Tokens.AppendDivElement("", "hand")
ps.Tokens.AppendCustomAttribute("aria-describedby", id)
case "line":
if val := elem.Attributes["type"]; val != "empty" {
ps.LC += 1
if ps.Break {
ps.Tokens.AppendEmptyElement("br", ps.PC+"-"+strconv.Itoa(ps.LC))
}
ps.Tokens.AppendDefaultElement(elem) // This is for indents, must be closed
} else {
ps.Tokens.AppendEmptyElement("br", "", "empty")
ps.CloseElement = false // Here Indents make no sense, so we dont open an element
}
ps.LineBreak = true
case "page":
ps.PC = elem.Attributes["index"]
ps.PageBreak = true
ps.CloseElement = false
default:
if !ps.Break && elem.Type == xmlparsing.CharData && strings.TrimSpace(elem.Data) != "" {
ps.Break = true
}
if ps.PageBreak && ps.PC != "1" && elem.Type == xmlparsing.CharData && strings.TrimSpace(elem.Data) != "" {
ps.PageBreak = false
note := Note{Id: ps.PC}
quality := "outside"
if !ps.LineBreak {
quality = "inside"
}
ps.Tokens.AppendDivElement("", "eanchor-page", "eanchor-page-"+quality)
ps.Tokens.AppendCustomAttribute("aria-describedby", ps.PC)
ps.Tokens.AppendEndElement()
ps.Tokens.AppendDivElement("", "page-counter", "page-"+quality)
ps.Tokens.AppendText(ps.PC)
ps.Tokens.AppendEndElement()
note.Tokens.AppendDivElement(ps.PC, "page", "page-"+quality)
note.Tokens.AppendText(ps.PC)
note.Tokens.AppendEndElement()
ps.Count = append(ps.Count, note)
strings.TrimLeft(elem.Data, " \t\n\r")
}
if ps.LineBreak && elem.Type == xmlparsing.CharData && strings.TrimSpace(elem.Data) != "" {
strings.TrimLeft(elem.Data, " \t\n\r")
ps.LineBreak = false
}
ps.Tokens.AppendDefaultElement(elem)
}
}
}
return ps.String()
}

View File

@@ -53,7 +53,7 @@ func main() {
engine := templating.New(&views.LayoutFS, &views.RoutesFS)
engine.AddFuncs(lib.FuncMap())
engine.AddFunc("ParseGeneric", functions.Parse(lib))
engine.AddFunc("ParseGeneric", functions.TemplateParse(lib))
storage := memory.New(memory.Config{
GCInterval: 24 * time.Hour,
})

File diff suppressed because it is too large Load Diff

File diff suppressed because one or more lines are too long

View File

@@ -30,9 +30,18 @@
</div>
<div class="text flex flex-row print:flex-col">
{{- Safe (ParseGeneric .text.Content) -}}
{{- Safe $model.text -}}
</div>
<div class="traditions mt-12 pt-3 border-t-gray-200 border-t-1 max-w-[90ch] print:border-none">
{{ template "_lettertrad" $model.meta -}}
</div>
<script type="module">
// WARNING: We need to wait for the fonts to settle before rendering anything
document.fonts.ready.then(() => {
if (window.alignSidenotes) {
window.alignSidenotes();
}
});
</script>

View File

@@ -37,8 +37,8 @@
{{ else if $i -}}
,
{{ end }}
{{- $person := Person $p.Reference -}}
{{- $person.Name -}}
{{- $person := Person $p.Reference }}
{{ $person.Name -}}
{{- end -}}
</div>
<div class="mx-3">
@@ -57,8 +57,25 @@
{{- if $i -}}
,
{{- end -}}
{{- $person := Person $p.Reference -}}
{{- $person.Name -}}
{{- $person := Person $p.Reference }}
{{ $person.Name -}}
{{- end -}}
{{ if and $sr.Received.Places (len $sr.Received.Places) }}
{{- range $i, $p := $sr.Received.Places -}}
{{- $place := Place $p.Reference }}
{{- if and $i (eq $i (Minus (len $sr.Received.Places) 1)) -}}
und
{{- end -}}
{{- if $i -}}
,
{{- end -}}
{{- if eq $i 0 -}}
&nbsp;({{- $place.Name -}}
{{- else -}}
{{ $place.Name -}}
{{- end -}}
{{- end -}}
)
{{- end -}}
</div>
{{- else -}}

View File

@@ -5,7 +5,7 @@
{{- (App $trad.Reference).Name -}}
</div>
<div class="tradition-text text hyphens-auto font-sans">
{{- Safe (ParseGeneric $trad.Content) -}}
{{- Safe (ParseGeneric $model $trad.Content) -}}
</div>
</div>
{{- end -}}

View File

@@ -158,28 +158,89 @@ class ScrollButton extends HTMLElement {
}
}
let positionedIntervals = [];
function alignSidenotes() {
positionedIntervals = [];
_alignSidenotes(".count", ".page", ".eanchor-page");
_alignSidenotes(".notes", ".note-hand", ".hand");
_alignSidenotes(".notes", ".note-sidenote-meta", ".sidenote");
}
function _alignSidenotes(container, align, alignto) {
const fulltext = document.querySelector(".fulltext");
const cont = document.querySelector(container);
if (!cont) return;
const notes = Array.from(cont.querySelectorAll(align));
// Reset classes and inline styles
notes.forEach((note) => {
note.classList.remove("margin-note");
note.style.top = "";
});
// Skip on print
if (window.matchMedia("print").matches) return;
const textRect = cont.getBoundingClientRect();
const GUTTER = 0; // space in pixels between notes
notes.forEach((note) => {
const noteId = note.id;
if (!noteId) return;
const anchor = fulltext.querySelector(`${alignto}[aria-describedby="${noteId}"]`);
if (!anchor) return;
note.classList.add("margin-note");
const anchorRect = anchor.getBoundingClientRect();
const baseTop = anchorRect.top - textRect.top;
const noteHeight = note.getBoundingClientRect().height;
let top = baseTop;
// Adjust to prevent overlap
let collision;
do {
collision = false;
for (const interval of positionedIntervals) {
const intervalTop = interval.top;
const intervalBottom = interval.bottom;
if (top < intervalBottom && top + noteHeight > intervalTop) {
console.log("Collision detected", {
top,
bottom: top + noteHeight,
intervalTop,
intervalBottom,
newTop: intervalBottom + GUTTER,
});
top = intervalBottom + GUTTER;
collision = true;
}
}
} while (collision);
// Record this note's interval
positionedIntervals.push({ top, bottom: top + noteHeight });
note.style.top = `${top}px`;
});
notes.forEach((note) => {
note.style.visibility = "visible";
});
}
// INFO: these are global functions that should be executed ONCE when the page loads, not
// on every HTMX request.
function Startup() {
let pagedPreviewer = null;
const positionedIntervals = [];
// INFO: Generate a print preview of the page if the URL has ?print=true
if (new URL(window.location).searchParams.get("print") === "true") {
showPreview();
}
// INFO: Listeners for sidenotes
window.addEventListener("load", () => {
alignSidenotes();
});
window.addEventListener("resize", alignSidenotes);
if (htmx) {
window.addEventListener("htmx:afterSettle", (_) => {
alignSidenotes();
});
}
function showPreview() {
if (!pagedPreviewer) {
pagedPreviewer = new Previewer();
@@ -195,77 +256,11 @@ function Startup() {
window.location.reload();
});
}
function alignSidenotes() {
_alignSidenotes(".count", ".page", ".eanchor-page");
_alignSidenotes(".notes", ".note-hand", ".hand");
_alignSidenotes(".notes", ".note-sidenote-meta", ".sidenote");
}
function _alignSidenotes(container, align, alignto) {
const fulltext = document.querySelector(".fulltext");
const cont = document.querySelector(container);
if (!cont) return;
const notes = Array.from(cont.querySelectorAll(align));
// Reset classes and inline styles
notes.forEach((note) => {
note.classList.remove("margin-note");
note.style.top = "";
});
// Skip on print
if (window.matchMedia("print").matches) return;
const textRect = cont.getBoundingClientRect();
const GUTTER = 0; // space in pixels between notes
notes.forEach((note) => {
const noteId = note.id;
if (!noteId) return;
const anchor = fulltext.querySelector(`${alignto}[aria-describedby="${noteId}"]`);
if (!anchor) return;
note.classList.add("margin-note");
const anchorRect = anchor.getBoundingClientRect();
const baseTop = anchorRect.top - textRect.top;
const noteHeight = note.getBoundingClientRect().height;
let top = baseTop;
// Adjust to prevent overlap
let collision;
do {
collision = false;
for (const interval of positionedIntervals) {
const intervalTop = interval.top;
const intervalBottom = interval.bottom;
if (top < intervalBottom && top + noteHeight > intervalTop) {
console.log("Collision detected", {
top,
bottom: top + noteHeight,
intervalTop,
intervalBottom,
newTop: intervalBottom + GUTTER,
});
top = intervalBottom + GUTTER;
collision = true;
}
}
} while (collision);
// Record this note's interval
positionedIntervals.push({ top, bottom: top + noteHeight });
note.style.top = `${top}px`;
});
notes.forEach((note) => {
note.style.visibility = "visible";
});
}
}
customElements.define(SCROLL_BUTTON_ELEMENT, ScrollButton);
customElements.define(TOOLTIP_ELEMENT, ToolTip);
export { XSLTParseProcess, ScrollButton, Previewer, Startup };
window.alignSidenotes = alignSidenotes;
export { XSLTParseProcess, ScrollButton, Previewer, Startup, alignSidenotes };

View File

@@ -109,6 +109,8 @@
.text {
@apply font-serif relative;
--text-color-rgb: 53, 53, 53;
color: rgb(var(--text-color-rgb));
}
.text .count {
@@ -141,6 +143,8 @@
.text .i,
.text .subst,
.text .insertion,
.text .insertion-marker,
.text .ddel,
.text .del,
.text .fn,
.text .anchor {
@@ -220,7 +224,7 @@
}
.text .dul {
@apply underline decoration-double;
@apply underline decoration-double decoration-[1px];
}
.text .it {
@@ -255,10 +259,35 @@
.text .insertion::after {
@apply text-slate-700;
margin-left: -0.2em;
margin-left: -0.4ch;
content: "⌟";
}
.text .insertion-marker {
@apply text-nowrap;
}
.text .insertion-marker::before {
@apply text-slate-700 text-nowrap text-sm relative bottom-[-0.15rem] -ml-[0.4ch] pr-[0.4ch] inline-block;
}
.text .insertion.pos-left .insertion-marker::before {
content: "🠊";
}
.text .insertion.pos-right .insertion-marker::before {
content: "🠜";
}
.text .insertion.pos-top .insertion-marker::before {
@apply bottom-0 text-xs;
content: "🠟";
}
.text .insertion.pos-bottom .insertion-marker::before {
@apply bottom-0 text-xs;
content: "🠝";
}
.text .nr::before {
@apply text-slate-700;
content: "⸰";
@@ -279,26 +308,60 @@
}
.text .del {
@apply line-through;
@apply line-through relative;
}
.text .del .del::before {
content: "";
@apply absolute inset-x-0 top-1/2 h-px bg-black;
@apply absolute inset-x-0 top-[65%] h-[1px] bg-black w-full;
}
.text .ddel {
@apply line-through relative;
}
.text .ddel::before {
content: "";
@apply absolute inset-x-0 top-[65%] h-[1px] bg-black w-full;
}
.text .ddel {
@apply relative;
}
.text .ddel::before {
top: 55%;
}
.text .ddel::after {
top: 45%;
}
.text .sidenote {
@apply border-l-4 border-slate-200 pl-2 my-4;
}
.text .hand {
@apply inline text-blue-950 !font-didone text-[0.9rem];
@apply inline !font-didone text-[0.9rem];
/* darker blue hue */
--text-color-rgb: 0, 0, 39;
color: rgb(var(--text-color-rgb));
}
.text .er {
text-decoration: line-through;
text-decoration-thickness: 17px;
background-image: repeating-linear-gradient(
-45deg,
rgba(var(--text-color-rgb), 0.5),
transparent 1px,
transparent 6px
);
-webkit-box-decoration-break: clone;
box-decoration-break: clone;
color: transparent;
text-shadow: 0 0 rgb(var(--text-color-rgb));
}
.text .sidenote-page::before {

View File

@@ -1,132 +0,0 @@
package xmlparsing
import (
"encoding/xml"
"io"
"iter"
"strings"
)
type TokenType int
const (
StartElement TokenType = iota
EndElement
CharData
Comment
ProcInst
Directive
)
type Element struct {
Name string
Attributes map[string]string
CharData string
}
type Token struct {
Name string
Attributes map[string]string
Inner xml.Token
Type TokenType
Data string
}
type TokenResult[T any] struct {
State T
Token Token
Stack []Element
}
func Iterate[T any](xmlData string, initialState T) iter.Seq2[*TokenResult[T], error] {
decoder := xml.NewDecoder(strings.NewReader(xmlData))
stack := []Element{}
state := initialState
return func(yield func(*TokenResult[T], error) bool) {
for {
token, err := decoder.Token()
if err == io.EOF {
return
}
if err != nil {
yield(nil, err)
return
}
var customToken Token
switch t := token.(type) {
case xml.StartElement:
elem := Element{
Name: t.Name.Local,
Attributes: mapAttributes(t.Attr),
CharData: "",
}
stack = append(stack, elem)
customToken = Token{
Name: t.Name.Local,
Attributes: elem.Attributes,
Inner: t,
Type: StartElement,
}
case xml.EndElement:
if len(stack) > 0 {
stack = stack[:len(stack)-1]
}
customToken = Token{Name: t.Name.Local, Inner: t, Type: EndElement}
case xml.CharData:
text := string(t)
if text != "" && len(stack) > 0 {
for i := range stack {
stack[i].CharData += text
}
}
customToken = Token{
Name: "CharData",
Inner: t,
Data: text,
Type: CharData,
}
case xml.Comment:
customToken = Token{
Name: "Comment",
Inner: t,
Data: string(t),
Type: Comment,
}
case xml.ProcInst:
customToken = Token{
Name: t.Target,
Inner: t,
Data: string(t.Inst),
Type: ProcInst,
}
case xml.Directive:
customToken = Token{
Name: "Directive",
Inner: t,
Data: string(t),
Type: Directive,
}
}
result := &TokenResult[T]{
State: state,
Token: customToken,
Stack: stack,
}
if !yield(result, nil) {
return
}
}
}
}
// mapAttributes converts xml.Attr to a map[string]string.
func mapAttributes(attrs []xml.Attr) map[string]string {
attrMap := make(map[string]string)
for _, attr := range attrs {
attrMap[attr.Name.Local] = attr.Value
}
return attrMap
}

View File

@@ -1,6 +1,6 @@
package xmlmodels
import xmlparsing "github.com/Theodor-Springmann-Stiftung/lenz-web/xml"
import "github.com/Theodor-Springmann-Stiftung/lenz-web/xmlparsing"
type RefElement struct {
Reference int `xml:"ref,attr"`

View File

@@ -12,7 +12,7 @@ import (
"sync"
"time"
xmlparsing "github.com/Theodor-Springmann-Stiftung/lenz-web/xml"
"github.com/Theodor-Springmann-Stiftung/lenz-web/xmlparsing"
)
const (

View File

@@ -6,7 +6,7 @@ import (
"iter"
"slices"
xmlparsing "github.com/Theodor-Springmann-Stiftung/lenz-web/xml"
"github.com/Theodor-Springmann-Stiftung/lenz-web/xmlparsing"
)
type Meta struct {
@@ -82,7 +82,7 @@ func (m Meta) SendRecieved() iter.Seq[SendRecievedPair] {
type Action struct {
Dates []Date `xml:"date"`
Places []RefElement `xml:"place"`
Places []RefElement `xml:"location"`
Persons []RefElement `xml:"person"`
}

View File

@@ -3,7 +3,7 @@ package xmlmodels
import (
"sync"
xmlparsing "github.com/Theodor-Springmann-Stiftung/lenz-web/xml"
"github.com/Theodor-Springmann-Stiftung/lenz-web/xmlparsing"
)
var lib *Library

210
xmlparsing/parser.go Normal file
View File

@@ -0,0 +1,210 @@
package xmlparsing
import (
"encoding/xml"
"io"
"iter"
"strings"
)
type Parser struct {
Stack []*Token
LastCharData []*Token
pipeline []*Token
decoder *xml.Decoder
}
func NewFromTokens(tokens []*Token) *Parser {
return &Parser{
Stack: make([]*Token, 0, len(tokens)),
LastCharData: make([]*Token, 0, len(tokens)),
pipeline: tokens,
decoder: nil, // No decoder needed for pre-parsed tokens
}
}
func NewParser(xmlData string) *Parser {
return &Parser{
decoder: xml.NewDecoder(strings.NewReader(xmlData)),
}
}
func (p *Parser) GetStack() []*Token {
return p.Stack
}
func (p *Parser) Pipeline() []*Token {
return p.pipeline
}
func (p *Parser) PeekFrom(index int) iter.Seq2[*Token, error] {
if index < 0 || index >= len(p.pipeline) {
return func(yield func(*Token, error) bool) {
yield(nil, nil) // No tokens to yield
return
}
}
return func(yield func(*Token, error) bool) {
for i := index; i < len(p.pipeline); i++ {
if !yield(p.pipeline[i], nil) {
return
}
}
for {
token, err := p.Token()
if err != nil {
yield(nil, err)
return
}
if token == nil {
// EOF
return
}
if !yield(token, nil) {
return
}
}
}
}
func (p *Parser) Reset() {
p.Stack = []*Token{}
}
func (p *Parser) Token() (*Token, error) {
if p.decoder == nil {
return nil, nil // No more tokens to parse
}
start := p.decoder.InputOffset()
token, err := p.decoder.Token()
end := p.decoder.InputOffset()
if err == io.EOF {
return nil, nil
} else if err != nil {
return nil, err
}
var customToken Token = Token{
parser: p,
Index: len(p.pipeline),
Inner: token,
StartOffset: start + 1,
EndOffset: end,
Stack: make([]*Token, len(p.Stack)),
}
// INFO: these are just pointers, so it should go fast
copy(customToken.Stack, p.Stack)
switch t := token.(type) {
case xml.StartElement:
attr := mapAttributes(t.Attr)
customToken.Name = t.Name.Local
customToken.Attributes = attr
customToken.Type = StartElement
if len(p.Stack) > 0 && !p.Stack[len(p.Stack)-1].childrenParsed {
p.Stack[len(p.Stack)-1].children = append(p.Stack[len(p.Stack)-1].children, &customToken)
}
p.Stack = append(p.Stack, &customToken)
case xml.EndElement:
if len(p.Stack) > 0 {
element := p.Stack[len(p.Stack)-1]
element.childrenParsed = true
element.chardataParsed = true
p.Stack = p.Stack[:len(p.Stack)-1]
}
customToken.Name = t.Name.Local
customToken.Attributes = map[string]string{}
customToken.Type = EndElement
case xml.CharData:
text := string(t)
if text != "" && len(p.Stack) > 0 {
for i := range p.Stack {
if !p.Stack[i].chardataParsed {
p.Stack[i].charData += text
}
}
}
customToken.Data = text
customToken.Type = CharData
p.LastCharData = append(p.LastCharData, &customToken)
case xml.Comment:
customToken.Type = Comment
customToken.Data = string(t)
case xml.ProcInst:
customToken.Name = t.Target
customToken.Data = string(t.Inst)
customToken.Type = ProcInst
case xml.Directive:
customToken.Data = string(t)
customToken.Type = Directive
}
p.pipeline = append(p.pipeline, &customToken)
return &customToken, nil
}
func (p *Parser) Previous(index int) (tokens []*Token) {
if index < 0 || index >= len(p.pipeline) {
return
}
return p.pipeline[:index]
}
func (p *Parser) All() ([]*Token, error) {
for _, err := range p.Iterate() {
if err != nil {
return nil, err
}
}
return p.pipeline, nil
}
func (p *Parser) Iterate() iter.Seq2[*Token, error] {
var cursor int
return func(yield func(*Token, error) bool) {
for {
var token *Token
// INFO: cursor should be max. len(p.pipeline)
if cursor >= len(p.pipeline) {
t, err := p.Token()
if err != nil {
yield(nil, err)
return
}
if t == nil {
return // EOF
}
token = t
} else {
token = p.pipeline[cursor]
}
cursor++
if !yield(token, nil) {
return
}
}
}
}
// mapAttributes converts xml.Attr to a map[string]string.
func mapAttributes(attrs []xml.Attr) map[string]string {
attrMap := make(map[string]string)
for _, attr := range attrs {
attrMap[attr.Name.Local] = attr.Value
}
return attrMap
}

126
xmlparsing/token.go Normal file
View File

@@ -0,0 +1,126 @@
package xmlparsing
import (
"encoding/xml"
"iter"
"strings"
)
type TokenType int
const (
StartElement TokenType = iota
EndElement
CharData
Comment
ProcInst
Directive
)
type Token struct {
Name string
Attributes map[string]string
Inner xml.Token
Type TokenType
Data string
Stack []*Token
StartOffset int64
EndOffset int64
Index int
charData string
children []*Token
parser *Parser
childrenParsed bool
chardataParsed bool
}
func (t *Token) String() string {
builder := strings.Builder{}
return builder.String()
}
func (t *Token) Element() (tokens []*Token) {
if t.Type != StartElement {
return
}
for token, err := range t.parser.PeekFrom(t.Index) {
if err != nil || token == nil {
return tokens
}
tokens = append(tokens, token)
if token.Type == EndElement && token.Name == t.Name {
return tokens
}
}
return
}
func (t *Token) Next() iter.Seq2[*Token, error] {
return t.parser.PeekFrom(t.Index)
}
func (t *Token) Previous() (tokens []*Token) {
if t.Index <= 0 {
return
}
return t.parser.Previous(t.Index)
}
func (t *Token) Children() (tokens []*Token) {
if t.childrenParsed {
return t.children
}
tokens = t.Element()
if len(tokens) == 0 {
return
}
for _, token := range tokens {
if token.Type == StartElement {
t.children = append(t.children, token)
}
}
t.childrenParsed = true
return t.children
}
func (t *Token) CharData() string {
if t.Type == CharData || t.Type == ProcInst || t.Type == Comment || t.Type == Directive {
return t.Data
}
if t.chardataParsed {
return t.charData
}
tokens := t.Element()
if len(tokens) == 0 {
return ""
}
var builder strings.Builder
for _, token := range tokens {
if token.Type == CharData {
builder.WriteString(token.Data)
}
}
t.chardataParsed = true
t.charData = builder.String()
return builder.String()
}
func (t *Token) SubParser() *Parser {
if t.Type != StartElement {
return nil
}
tokens := t.Element()
return NewFromTokens(tokens)
}