mirror of
https://github.com/Theodor-Springmann-Stiftung/lenz-web.git
synced 2025-12-16 06:25:31 +00:00
Parsing upon deserialising
This commit is contained in:
168
xmlparsing/parsed.go
Normal file
168
xmlparsing/parsed.go
Normal file
@@ -0,0 +1,168 @@
|
||||
package xmlparsing
|
||||
|
||||
import (
|
||||
"iter"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// ParserHandler describes the callbacks a Parsed type invokes while walking
|
||||
// through the XML token stream.
|
||||
type ParserHandler[S any] interface {
|
||||
NewState() S
|
||||
OnOpenElement(*ParseState[S], *Token) error
|
||||
OnCloseElement(*ParseState[S], *Token) error
|
||||
OnText(*ParseState[S], *Token) error
|
||||
OnComment(*ParseState[S], *Token) error
|
||||
}
|
||||
|
||||
// Parsed orchestrates converting raw XML into a handler-defined representation.
|
||||
type Parsed[T ParserHandler[S], S any] struct {
|
||||
handler T
|
||||
state ParseState[S]
|
||||
raw string
|
||||
}
|
||||
|
||||
// NewParsed builds a Parsed wrapper with the provided handler.
|
||||
func NewParsed[T ParserHandler[S], S any](handler T) Parsed[T, S] {
|
||||
return Parsed[T, S]{handler: handler}
|
||||
}
|
||||
|
||||
// ParseString feeds the handler with events generated from the supplied XML.
|
||||
func (p *Parsed[T, S]) ParseString(xml string) error {
|
||||
p.raw = xml
|
||||
parser := NewParser(xml)
|
||||
state := ParseState[S]{
|
||||
state: p.handler.NewState(),
|
||||
general: newGeneralState(parser),
|
||||
}
|
||||
|
||||
for token, err := range parser.Iterate() {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if token == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
state.general.observe(token)
|
||||
|
||||
switch token.Type {
|
||||
case StartElement:
|
||||
if err := p.handler.OnOpenElement(&state, token); err != nil {
|
||||
return err
|
||||
}
|
||||
case EndElement:
|
||||
if err := p.handler.OnCloseElement(&state, token); err != nil {
|
||||
return err
|
||||
}
|
||||
case CharData:
|
||||
// Skip empty whitespace blocks to mimic encoding/xml behaviour.
|
||||
if strings.TrimSpace(token.Data) == "" {
|
||||
continue
|
||||
}
|
||||
if err := p.handler.OnText(&state, token); err != nil {
|
||||
return err
|
||||
}
|
||||
case Comment:
|
||||
if err := p.handler.OnComment(&state, token); err != nil {
|
||||
return err
|
||||
}
|
||||
default:
|
||||
// Other token types are ignored for now.
|
||||
}
|
||||
}
|
||||
|
||||
p.state = state
|
||||
return nil
|
||||
}
|
||||
|
||||
// Raw returns the unprocessed XML.
|
||||
func (p Parsed[T, S]) Raw() string {
|
||||
return p.raw
|
||||
}
|
||||
|
||||
// State exposes the accumulated ParseState.
|
||||
func (p *Parsed[T, S]) State() *ParseState[S] {
|
||||
return &p.state
|
||||
}
|
||||
|
||||
// Data returns the handler-defined state value.
|
||||
func (p *Parsed[T, S]) Data() S {
|
||||
return p.state.state
|
||||
}
|
||||
|
||||
// Handler exposes the handler instance for downstream consumers.
|
||||
func (p *Parsed[T, S]) Handler() *T {
|
||||
return &p.handler
|
||||
}
|
||||
|
||||
// ParseState passes both handler-specific state and shared navigation helpers.
|
||||
type ParseState[S any] struct {
|
||||
state S
|
||||
general *GeneralState
|
||||
}
|
||||
|
||||
// Data returns the handler-owned state.
|
||||
func (p *ParseState[S]) Data() S {
|
||||
return p.state
|
||||
}
|
||||
|
||||
// General exposes parser-wide helpers (tokens, peeking, etc.).
|
||||
func (p *ParseState[S]) General() *GeneralState {
|
||||
return p.general
|
||||
}
|
||||
|
||||
// GeneralState tracks all past tokens and enables look-back/peek helpers.
|
||||
type GeneralState struct {
|
||||
tokens []*Token
|
||||
parser *Parser
|
||||
current *Token
|
||||
}
|
||||
|
||||
func newGeneralState(parser *Parser) *GeneralState {
|
||||
return &GeneralState{
|
||||
parser: parser,
|
||||
}
|
||||
}
|
||||
|
||||
func (g *GeneralState) observe(token *Token) {
|
||||
g.tokens = append(g.tokens, token)
|
||||
g.current = token
|
||||
}
|
||||
|
||||
// Tokens returns all tokens seen so far.
|
||||
func (g *GeneralState) Tokens() []*Token {
|
||||
return g.tokens
|
||||
}
|
||||
|
||||
// Current returns the most recently processed token.
|
||||
func (g *GeneralState) Current() *Token {
|
||||
return g.current
|
||||
}
|
||||
|
||||
// Previous returns up to n previously processed tokens (latest first).
|
||||
func (g *GeneralState) Previous(n int) []*Token {
|
||||
if n <= 0 || len(g.tokens) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
if n > len(g.tokens) {
|
||||
n = len(g.tokens)
|
||||
}
|
||||
|
||||
out := make([]*Token, 0, n)
|
||||
for i := 0; i < n; i++ {
|
||||
out = append(out, g.tokens[len(g.tokens)-1-i])
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
// Peek exposes a cursor that yields upcoming tokens from the underlying parser.
|
||||
func (g *GeneralState) Peek() iter.Seq2[*Token, error] {
|
||||
if g.current == nil {
|
||||
return func(yield func(*Token, error) bool) {
|
||||
yield(nil, nil)
|
||||
}
|
||||
}
|
||||
return g.parser.PeekFrom(g.current.Index + 1)
|
||||
}
|
||||
Reference in New Issue
Block a user