mirror of
https://github.com/Theodor-Springmann-Stiftung/lenz-web.git
synced 2026-03-21 13:55:30 +00:00
306 lines
5.7 KiB
Go
306 lines
5.7 KiB
Go
package xmlmodels
|
|
|
|
import (
|
|
"encoding/xml"
|
|
"strings"
|
|
)
|
|
|
|
type TokenType int
|
|
|
|
const (
|
|
StartElement TokenType = iota
|
|
EndElement
|
|
CharData
|
|
)
|
|
|
|
type LineType int
|
|
|
|
const (
|
|
Continuation LineType = iota
|
|
First
|
|
Fist = First // backward-compatible alias for historical typo
|
|
Semantic LineType = iota // Indent=0 , still type="break"
|
|
Indent // Indent>0, type dosent matter
|
|
Empty // no line content, after that, an empty line
|
|
)
|
|
|
|
type Token struct {
|
|
Type TokenType
|
|
Name string
|
|
Attrs map[string]string
|
|
Value string
|
|
// INFO: true means synthetic token without corresponding XML token.
|
|
Synth bool
|
|
}
|
|
|
|
type Line struct {
|
|
Type LineType
|
|
Indent int
|
|
AlignCtx bool
|
|
TabCtx bool
|
|
Text string
|
|
Tokens []Token
|
|
}
|
|
|
|
type Page struct {
|
|
Number int
|
|
Lines []Line
|
|
Sidenotes []Sidenote
|
|
}
|
|
|
|
type Sidenote struct {
|
|
Position string
|
|
Annotation string
|
|
Lines []Line
|
|
}
|
|
|
|
type lineAccumulator struct {
|
|
curLine *Line
|
|
openStack []Token
|
|
implicitType LineType
|
|
hasAnyLine bool
|
|
appendLine func(Line)
|
|
hasCharData bool
|
|
}
|
|
|
|
func newLineAccumulator(implicitType LineType, appendLine func(Line)) *lineAccumulator {
|
|
return &lineAccumulator{
|
|
implicitType: implicitType,
|
|
appendLine: appendLine,
|
|
}
|
|
}
|
|
|
|
func (a *lineAccumulator) setImplicitType(lt LineType) {
|
|
a.implicitType = lt
|
|
}
|
|
|
|
func (a *lineAccumulator) startLine(lt LineType, indent int) {
|
|
a.curLine = &Line{Type: lt, Indent: indent}
|
|
a.hasCharData = false
|
|
for _, st := range a.openStack {
|
|
a.curLine.Tokens = append(a.curLine.Tokens, Token{
|
|
Type: StartElement,
|
|
Name: st.Name,
|
|
Attrs: st.Attrs,
|
|
Synth: true,
|
|
})
|
|
}
|
|
}
|
|
|
|
func (a *lineAccumulator) ensureLine() {
|
|
if a.curLine != nil {
|
|
return
|
|
}
|
|
a.startLine(a.implicitType, 0)
|
|
if a.implicitType == First || a.implicitType == Continuation {
|
|
a.implicitType = Semantic
|
|
}
|
|
}
|
|
|
|
func (a *lineAccumulator) closeLine() {
|
|
if a.curLine == nil {
|
|
a.ensureLine()
|
|
}
|
|
a.trimRightWhitespace()
|
|
for i := len(a.openStack) - 1; i >= 0; i-- {
|
|
a.curLine.Tokens = append(a.curLine.Tokens, Token{
|
|
Type: EndElement,
|
|
Name: a.openStack[i].Name,
|
|
Synth: true,
|
|
})
|
|
}
|
|
a.applyContextFlags()
|
|
a.curLine.Text = lineTextFromTokens(a.curLine.Tokens)
|
|
a.appendLine(*a.curLine)
|
|
a.hasAnyLine = true
|
|
a.curLine = nil
|
|
}
|
|
|
|
func (a *lineAccumulator) handleLineMarker(se xml.StartElement) {
|
|
lt, indent, emitEmpty := parseLineMarker(se)
|
|
if a.curLine != nil {
|
|
a.closeLine()
|
|
}
|
|
if emitEmpty {
|
|
a.startLine(Empty, 0)
|
|
a.closeLine()
|
|
a.implicitType = Semantic
|
|
return
|
|
}
|
|
a.startLine(lt, indent)
|
|
a.implicitType = Semantic
|
|
}
|
|
|
|
func (a *lineAccumulator) appendStart(name string, attrs map[string]string) {
|
|
a.ensureLine()
|
|
a.curLine.Tokens = append(a.curLine.Tokens, Token{
|
|
Type: StartElement,
|
|
Name: name,
|
|
Attrs: attrs,
|
|
})
|
|
a.openStack = append(a.openStack, Token{
|
|
Type: StartElement,
|
|
Name: name,
|
|
Attrs: attrs,
|
|
})
|
|
}
|
|
|
|
func (a *lineAccumulator) appendEnd(name string) {
|
|
a.ensureLine()
|
|
a.curLine.Tokens = append(a.curLine.Tokens, Token{
|
|
Type: EndElement,
|
|
Name: name,
|
|
})
|
|
if len(a.openStack) == 0 {
|
|
return
|
|
}
|
|
if a.openStack[len(a.openStack)-1].Name == name {
|
|
a.openStack = a.openStack[:len(a.openStack)-1]
|
|
return
|
|
}
|
|
for i := len(a.openStack) - 1; i >= 0; i-- {
|
|
if a.openStack[i].Name == name {
|
|
a.openStack = append(a.openStack[:i], a.openStack[i+1:]...)
|
|
return
|
|
}
|
|
}
|
|
}
|
|
|
|
func (a *lineAccumulator) appendText(s string) {
|
|
a.ensureLine()
|
|
if !a.hasCharData {
|
|
s = trimLeftASCIISpace(s)
|
|
}
|
|
if s == "" {
|
|
return
|
|
}
|
|
a.curLine.Tokens = append(a.curLine.Tokens, Token{
|
|
Type: CharData,
|
|
Value: s,
|
|
})
|
|
a.hasCharData = true
|
|
}
|
|
|
|
func (a *lineAccumulator) isAtLineStart() bool {
|
|
if a.curLine == nil {
|
|
return true
|
|
}
|
|
for _, tok := range a.curLine.Tokens {
|
|
if tok.Type == StartElement && tok.Synth {
|
|
continue
|
|
}
|
|
return false
|
|
}
|
|
return true
|
|
}
|
|
|
|
func (a *lineAccumulator) trimRightWhitespace() {
|
|
if a.curLine == nil {
|
|
return
|
|
}
|
|
toks := a.curLine.Tokens
|
|
for {
|
|
lastCharIdx := -1
|
|
for i := len(toks) - 1; i >= 0; i-- {
|
|
if toks[i].Type == CharData {
|
|
lastCharIdx = i
|
|
break
|
|
}
|
|
}
|
|
if lastCharIdx < 0 {
|
|
break
|
|
}
|
|
trimmed := trimRightASCIISpace(toks[lastCharIdx].Value)
|
|
if trimmed == "" {
|
|
toks = append(toks[:lastCharIdx], toks[lastCharIdx+1:]...)
|
|
continue
|
|
}
|
|
toks[lastCharIdx].Value = trimmed
|
|
break
|
|
}
|
|
a.curLine.Tokens = toks
|
|
}
|
|
|
|
func lineTextFromTokens(tokens []Token) string {
|
|
var b strings.Builder
|
|
for _, tok := range tokens {
|
|
if tok.Type == CharData {
|
|
b.WriteString(tok.Value)
|
|
}
|
|
}
|
|
return b.String()
|
|
}
|
|
|
|
func (a *lineAccumulator) applyContextFlags() {
|
|
if a.curLine == nil {
|
|
return
|
|
}
|
|
for _, tok := range a.curLine.Tokens {
|
|
if tok.Type != StartElement {
|
|
continue
|
|
}
|
|
switch tok.Name {
|
|
case "align":
|
|
a.curLine.AlignCtx = true
|
|
case "tab":
|
|
a.curLine.TabCtx = true
|
|
}
|
|
if a.curLine.AlignCtx && a.curLine.TabCtx {
|
|
return
|
|
}
|
|
}
|
|
}
|
|
|
|
func parseBlockLines(dec *xml.Decoder, endLocalName string) ([]Line, error) {
|
|
lines := make([]Line, 0, 8)
|
|
acc := newLineAccumulator(First, func(line Line) {
|
|
lines = append(lines, line)
|
|
})
|
|
|
|
for {
|
|
tok, err := dec.Token()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
switch t := tok.(type) {
|
|
case xml.StartElement:
|
|
name := t.Name.Local
|
|
if name == "line" {
|
|
acc.handleLineMarker(t)
|
|
continue
|
|
}
|
|
if isTransparentWrapper(name) {
|
|
continue
|
|
}
|
|
acc.appendStart(name, attrsToMap(t.Attr))
|
|
|
|
case xml.EndElement:
|
|
name := t.Name.Local
|
|
if isTransparentWrapper(name) {
|
|
continue
|
|
}
|
|
if name == endLocalName {
|
|
if acc.curLine != nil {
|
|
acc.closeLine()
|
|
}
|
|
return lines, nil
|
|
}
|
|
if name == "line" {
|
|
continue
|
|
}
|
|
acc.appendEnd(name)
|
|
|
|
case xml.CharData:
|
|
s := string([]byte(t))
|
|
if isOnlyASCIISpace(s) {
|
|
if acc.isAtLineStart() {
|
|
continue
|
|
}
|
|
s = " "
|
|
}
|
|
acc.appendText(s)
|
|
}
|
|
}
|
|
}
|