mirror of
https://github.com/Theodor-Springmann-Stiftung/lenz-web.git
synced 2025-10-28 16:55:32 +00:00
211 lines
4.2 KiB
Go
211 lines
4.2 KiB
Go
package xmlparsing
|
|
|
|
import (
|
|
"encoding/xml"
|
|
"io"
|
|
"iter"
|
|
"strings"
|
|
)
|
|
|
|
type Parser struct {
|
|
Stack []*Token
|
|
LastCharData []*Token
|
|
pipeline []*Token
|
|
decoder *xml.Decoder
|
|
}
|
|
|
|
func NewFromTokens(tokens []*Token) *Parser {
|
|
return &Parser{
|
|
Stack: make([]*Token, 0, len(tokens)),
|
|
LastCharData: make([]*Token, 0, len(tokens)),
|
|
pipeline: tokens,
|
|
decoder: nil, // No decoder needed for pre-parsed tokens
|
|
}
|
|
}
|
|
|
|
func NewParser(xmlData string) *Parser {
|
|
return &Parser{
|
|
decoder: xml.NewDecoder(strings.NewReader(xmlData)),
|
|
}
|
|
}
|
|
|
|
func (p *Parser) GetStack() []*Token {
|
|
return p.Stack
|
|
}
|
|
|
|
func (p *Parser) Pipeline() []*Token {
|
|
return p.pipeline
|
|
}
|
|
|
|
func (p *Parser) PeekFrom(index int) iter.Seq2[*Token, error] {
|
|
if index < 0 || index >= len(p.pipeline) {
|
|
return func(yield func(*Token, error) bool) {
|
|
yield(nil, nil) // No tokens to yield
|
|
return
|
|
}
|
|
}
|
|
|
|
return func(yield func(*Token, error) bool) {
|
|
for i := index; i < len(p.pipeline); i++ {
|
|
if !yield(p.pipeline[i], nil) {
|
|
return
|
|
}
|
|
}
|
|
|
|
for {
|
|
token, err := p.Token()
|
|
if err != nil {
|
|
yield(nil, err)
|
|
return
|
|
}
|
|
|
|
if token == nil {
|
|
// EOF
|
|
return
|
|
}
|
|
|
|
if !yield(token, nil) {
|
|
return
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
func (p *Parser) Reset() {
|
|
p.Stack = []*Token{}
|
|
}
|
|
|
|
func (p *Parser) Token() (*Token, error) {
|
|
if p.decoder == nil {
|
|
return nil, nil // No more tokens to parse
|
|
}
|
|
|
|
start := p.decoder.InputOffset()
|
|
token, err := p.decoder.Token()
|
|
end := p.decoder.InputOffset()
|
|
if err == io.EOF {
|
|
return nil, nil
|
|
} else if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
var customToken Token = Token{
|
|
parser: p,
|
|
Index: len(p.pipeline),
|
|
Inner: token,
|
|
StartOffset: start + 1,
|
|
EndOffset: end,
|
|
Stack: make([]*Token, len(p.Stack)),
|
|
}
|
|
|
|
// INFO: these are just pointers, so it should go fast
|
|
copy(customToken.Stack, p.Stack)
|
|
|
|
switch t := token.(type) {
|
|
case xml.StartElement:
|
|
attr := mapAttributes(t.Attr)
|
|
customToken.Name = t.Name.Local
|
|
customToken.Attributes = attr
|
|
customToken.Type = StartElement
|
|
if len(p.Stack) > 0 && !p.Stack[len(p.Stack)-1].childrenParsed {
|
|
p.Stack[len(p.Stack)-1].children = append(p.Stack[len(p.Stack)-1].children, &customToken)
|
|
}
|
|
p.Stack = append(p.Stack, &customToken)
|
|
|
|
case xml.EndElement:
|
|
if len(p.Stack) > 0 {
|
|
element := p.Stack[len(p.Stack)-1]
|
|
element.childrenParsed = true
|
|
element.chardataParsed = true
|
|
p.Stack = p.Stack[:len(p.Stack)-1]
|
|
}
|
|
customToken.Name = t.Name.Local
|
|
customToken.Attributes = map[string]string{}
|
|
customToken.Type = EndElement
|
|
|
|
case xml.CharData:
|
|
text := string(t)
|
|
if text != "" && len(p.Stack) > 0 {
|
|
for i := range p.Stack {
|
|
if !p.Stack[i].chardataParsed {
|
|
p.Stack[i].charData += text
|
|
}
|
|
}
|
|
}
|
|
customToken.Data = text
|
|
customToken.Type = CharData
|
|
p.LastCharData = append(p.LastCharData, &customToken)
|
|
|
|
case xml.Comment:
|
|
customToken.Type = Comment
|
|
customToken.Data = string(t)
|
|
|
|
case xml.ProcInst:
|
|
customToken.Name = t.Target
|
|
customToken.Data = string(t.Inst)
|
|
customToken.Type = ProcInst
|
|
|
|
case xml.Directive:
|
|
customToken.Data = string(t)
|
|
customToken.Type = Directive
|
|
}
|
|
|
|
p.pipeline = append(p.pipeline, &customToken)
|
|
return &customToken, nil
|
|
}
|
|
|
|
func (p *Parser) Previous(index int) (tokens []*Token) {
|
|
if index < 0 || index >= len(p.pipeline) {
|
|
return
|
|
}
|
|
|
|
return p.pipeline[:index]
|
|
}
|
|
|
|
func (p *Parser) All() ([]*Token, error) {
|
|
for _, err := range p.Iterate() {
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
}
|
|
return p.pipeline, nil
|
|
}
|
|
|
|
func (p *Parser) Iterate() iter.Seq2[*Token, error] {
|
|
var cursor int
|
|
return func(yield func(*Token, error) bool) {
|
|
for {
|
|
var token *Token
|
|
// INFO: cursor should be max. len(p.pipeline)
|
|
if cursor >= len(p.pipeline) {
|
|
t, err := p.Token()
|
|
if err != nil {
|
|
yield(nil, err)
|
|
return
|
|
}
|
|
if t == nil {
|
|
return // EOF
|
|
}
|
|
|
|
token = t
|
|
} else {
|
|
token = p.pipeline[cursor]
|
|
}
|
|
|
|
cursor++
|
|
if !yield(token, nil) {
|
|
return
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// mapAttributes converts xml.Attr to a map[string]string.
|
|
func mapAttributes(attrs []xml.Attr) map[string]string {
|
|
attrMap := make(map[string]string)
|
|
for _, attr := range attrs {
|
|
attrMap[attr.Name.Local] = attr.Value
|
|
}
|
|
return attrMap
|
|
}
|