Parsing upon deserialising

This commit is contained in:
Simon Martens
2025-11-14 15:29:51 +01:00
parent a46c171de7
commit 2e251f446f
9 changed files with 633 additions and 331 deletions

168
xmlparsing/parsed.go Normal file
View File

@@ -0,0 +1,168 @@
package xmlparsing
import (
"iter"
"strings"
)
// ParserHandler describes the callbacks a Parsed type invokes while walking
// through the XML token stream.
type ParserHandler[S any] interface {
NewState() S
OnOpenElement(*ParseState[S], *Token) error
OnCloseElement(*ParseState[S], *Token) error
OnText(*ParseState[S], *Token) error
OnComment(*ParseState[S], *Token) error
}
// Parsed orchestrates converting raw XML into a handler-defined representation.
type Parsed[T ParserHandler[S], S any] struct {
handler T
state ParseState[S]
raw string
}
// NewParsed builds a Parsed wrapper with the provided handler.
func NewParsed[T ParserHandler[S], S any](handler T) Parsed[T, S] {
return Parsed[T, S]{handler: handler}
}
// ParseString feeds the handler with events generated from the supplied XML.
func (p *Parsed[T, S]) ParseString(xml string) error {
p.raw = xml
parser := NewParser(xml)
state := ParseState[S]{
state: p.handler.NewState(),
general: newGeneralState(parser),
}
for token, err := range parser.Iterate() {
if err != nil {
return err
}
if token == nil {
continue
}
state.general.observe(token)
switch token.Type {
case StartElement:
if err := p.handler.OnOpenElement(&state, token); err != nil {
return err
}
case EndElement:
if err := p.handler.OnCloseElement(&state, token); err != nil {
return err
}
case CharData:
// Skip empty whitespace blocks to mimic encoding/xml behaviour.
if strings.TrimSpace(token.Data) == "" {
continue
}
if err := p.handler.OnText(&state, token); err != nil {
return err
}
case Comment:
if err := p.handler.OnComment(&state, token); err != nil {
return err
}
default:
// Other token types are ignored for now.
}
}
p.state = state
return nil
}
// Raw returns the unprocessed XML.
func (p Parsed[T, S]) Raw() string {
return p.raw
}
// State exposes the accumulated ParseState.
func (p *Parsed[T, S]) State() *ParseState[S] {
return &p.state
}
// Data returns the handler-defined state value.
func (p *Parsed[T, S]) Data() S {
return p.state.state
}
// Handler exposes the handler instance for downstream consumers.
func (p *Parsed[T, S]) Handler() *T {
return &p.handler
}
// ParseState passes both handler-specific state and shared navigation helpers.
type ParseState[S any] struct {
state S
general *GeneralState
}
// Data returns the handler-owned state.
func (p *ParseState[S]) Data() S {
return p.state
}
// General exposes parser-wide helpers (tokens, peeking, etc.).
func (p *ParseState[S]) General() *GeneralState {
return p.general
}
// GeneralState tracks all past tokens and enables look-back/peek helpers.
type GeneralState struct {
tokens []*Token
parser *Parser
current *Token
}
func newGeneralState(parser *Parser) *GeneralState {
return &GeneralState{
parser: parser,
}
}
func (g *GeneralState) observe(token *Token) {
g.tokens = append(g.tokens, token)
g.current = token
}
// Tokens returns all tokens seen so far.
func (g *GeneralState) Tokens() []*Token {
return g.tokens
}
// Current returns the most recently processed token.
func (g *GeneralState) Current() *Token {
return g.current
}
// Previous returns up to n previously processed tokens (latest first).
func (g *GeneralState) Previous(n int) []*Token {
if n <= 0 || len(g.tokens) == 0 {
return nil
}
if n > len(g.tokens) {
n = len(g.tokens)
}
out := make([]*Token, 0, n)
for i := 0; i < n; i++ {
out = append(out, g.tokens[len(g.tokens)-1-i])
}
return out
}
// Peek exposes a cursor that yields upcoming tokens from the underlying parser.
func (g *GeneralState) Peek() iter.Seq2[*Token, error] {
if g.current == nil {
return func(yield func(*Token, error) bool) {
yield(nil, nil)
}
}
return g.parser.PeekFrom(g.current.Index + 1)
}