Restart Init

This commit is contained in:
Simon Martens
2026-02-18 13:41:44 +01:00
parent 938cdeb27b
commit 4f4288905d
2955 changed files with 4795 additions and 53375 deletions

View File

@@ -1,10 +1,5 @@
package xmlparsing
type ItemInfo struct {
Source string
Parse ParseMeta
}
// INFO: These are just root elements that hold the data of the XML files.
// They get discarded after a parse.
type XMLRootElement[T any] interface {

View File

@@ -1,15 +0,0 @@
package xmlparsing
import "sync"
type Library struct {
pmux sync.Mutex
Parses []ParseMeta
}
func (l *Library) Latest() ParseMeta {
if len(l.Parses) == 0 {
return ParseMeta{}
}
return l.Parses[len(l.Parses)-1]
}

View File

@@ -11,10 +11,6 @@ type IXMLItem interface {
Type() string
}
type ILibrary interface {
Parse(meta ParseMeta) error
}
type ResolvingMap[T IXMLItem] map[string][]Resolved[T]
type ReferenceResolver[T IXMLItem] interface {

View File

@@ -1,168 +0,0 @@
package xmlparsing
import (
"iter"
"strings"
)
// ParserHandler describes the callbacks a Parsed type invokes while walking
// through the XML token stream.
type ParserHandler[S any] interface {
NewState() S
OnOpenElement(*ParseState[S], *Token) error
OnCloseElement(*ParseState[S], *Token) error
OnText(*ParseState[S], *Token) error
OnComment(*ParseState[S], *Token) error
}
// Parsed orchestrates converting raw XML into a handler-defined representation.
type Parsed[T ParserHandler[S], S any] struct {
handler T
state ParseState[S]
raw string
}
// NewParsed builds a Parsed wrapper with the provided handler.
func NewParsed[T ParserHandler[S], S any](handler T) Parsed[T, S] {
return Parsed[T, S]{handler: handler}
}
// ParseString feeds the handler with events generated from the supplied XML.
func (p *Parsed[T, S]) ParseString(xml string) error {
p.raw = xml
parser := NewParser(xml)
state := ParseState[S]{
state: p.handler.NewState(),
general: newGeneralState(parser),
}
for token, err := range parser.Iterate() {
if err != nil {
return err
}
if token == nil {
continue
}
state.general.observe(token)
switch token.Type {
case StartElement:
if err := p.handler.OnOpenElement(&state, token); err != nil {
return err
}
case EndElement:
if err := p.handler.OnCloseElement(&state, token); err != nil {
return err
}
case CharData:
// Skip empty whitespace blocks to mimic encoding/xml behaviour.
if strings.TrimSpace(token.Data) == "" {
continue
}
if err := p.handler.OnText(&state, token); err != nil {
return err
}
case Comment:
if err := p.handler.OnComment(&state, token); err != nil {
return err
}
default:
// Other token types are ignored for now.
}
}
p.state = state
return nil
}
// Raw returns the unprocessed XML.
func (p Parsed[T, S]) Raw() string {
return p.raw
}
// State exposes the accumulated ParseState.
func (p *Parsed[T, S]) State() *ParseState[S] {
return &p.state
}
// Data returns the handler-defined state value.
func (p *Parsed[T, S]) Data() S {
return p.state.state
}
// Handler exposes the handler instance for downstream consumers.
func (p *Parsed[T, S]) Handler() *T {
return &p.handler
}
// ParseState passes both handler-specific state and shared navigation helpers.
type ParseState[S any] struct {
state S
general *GeneralState
}
// Data returns the handler-owned state.
func (p *ParseState[S]) Data() S {
return p.state
}
// General exposes parser-wide helpers (tokens, peeking, etc.).
func (p *ParseState[S]) General() *GeneralState {
return p.general
}
// GeneralState tracks all past tokens and enables look-back/peek helpers.
type GeneralState struct {
tokens []*Token
parser *Parser
current *Token
}
func newGeneralState(parser *Parser) *GeneralState {
return &GeneralState{
parser: parser,
}
}
func (g *GeneralState) observe(token *Token) {
g.tokens = append(g.tokens, token)
g.current = token
}
// Tokens returns all tokens seen so far.
func (g *GeneralState) Tokens() []*Token {
return g.tokens
}
// Current returns the most recently processed token.
func (g *GeneralState) Current() *Token {
return g.current
}
// Previous returns up to n previously processed tokens (latest first).
func (g *GeneralState) Previous(n int) []*Token {
if n <= 0 || len(g.tokens) == 0 {
return nil
}
if n > len(g.tokens) {
n = len(g.tokens)
}
out := make([]*Token, 0, n)
for i := 0; i < n; i++ {
out = append(out, g.tokens[len(g.tokens)-1-i])
}
return out
}
// Peek exposes a cursor that yields upcoming tokens from the underlying parser.
func (g *GeneralState) Peek() iter.Seq2[*Token, error] {
if g.current == nil {
return func(yield func(*Token, error) bool) {
yield(nil, nil)
}
}
return g.parser.PeekFrom(g.current.Index + 1)
}

View File

@@ -1,210 +0,0 @@
package xmlparsing
import (
"encoding/xml"
"io"
"iter"
"strings"
)
type Parser struct {
Stack []*Token
LastCharData []*Token
pipeline []*Token
decoder *xml.Decoder
}
func NewFromTokens(tokens []*Token) *Parser {
return &Parser{
Stack: make([]*Token, 0, len(tokens)),
LastCharData: make([]*Token, 0, len(tokens)),
pipeline: tokens,
decoder: nil, // No decoder needed for pre-parsed tokens
}
}
func NewParser(xmlData string) *Parser {
return &Parser{
decoder: xml.NewDecoder(strings.NewReader(xmlData)),
}
}
func (p *Parser) GetStack() []*Token {
return p.Stack
}
func (p *Parser) Pipeline() []*Token {
return p.pipeline
}
func (p *Parser) PeekFrom(index int) iter.Seq2[*Token, error] {
if index < 0 || index >= len(p.pipeline) {
return func(yield func(*Token, error) bool) {
yield(nil, nil) // No tokens to yield
return
}
}
return func(yield func(*Token, error) bool) {
for i := index; i < len(p.pipeline); i++ {
if !yield(p.pipeline[i], nil) {
return
}
}
for {
token, err := p.Token()
if err != nil {
yield(nil, err)
return
}
if token == nil {
// EOF
return
}
if !yield(token, nil) {
return
}
}
}
}
func (p *Parser) Reset() {
p.Stack = []*Token{}
}
func (p *Parser) Token() (*Token, error) {
if p.decoder == nil {
return nil, nil // No more tokens to parse
}
start := p.decoder.InputOffset()
token, err := p.decoder.Token()
end := p.decoder.InputOffset()
if err == io.EOF {
return nil, nil
} else if err != nil {
return nil, err
}
var customToken Token = Token{
parser: p,
Index: len(p.pipeline),
Inner: token,
StartOffset: start + 1,
EndOffset: end,
Stack: make([]*Token, len(p.Stack)),
}
// INFO: these are just pointers, so it should go fast
copy(customToken.Stack, p.Stack)
switch t := token.(type) {
case xml.StartElement:
attr := mapAttributes(t.Attr)
customToken.Name = t.Name.Local
customToken.Attributes = attr
customToken.Type = StartElement
if len(p.Stack) > 0 && !p.Stack[len(p.Stack)-1].childrenParsed {
p.Stack[len(p.Stack)-1].children = append(p.Stack[len(p.Stack)-1].children, &customToken)
}
p.Stack = append(p.Stack, &customToken)
case xml.EndElement:
if len(p.Stack) > 0 {
element := p.Stack[len(p.Stack)-1]
element.childrenParsed = true
element.chardataParsed = true
p.Stack = p.Stack[:len(p.Stack)-1]
}
customToken.Name = t.Name.Local
customToken.Attributes = map[string]string{}
customToken.Type = EndElement
case xml.CharData:
text := string(t)
if text != "" && len(p.Stack) > 0 {
for i := range p.Stack {
if !p.Stack[i].chardataParsed {
p.Stack[i].charData += text
}
}
}
customToken.Data = text
customToken.Type = CharData
p.LastCharData = append(p.LastCharData, &customToken)
case xml.Comment:
customToken.Type = Comment
customToken.Data = string(t)
case xml.ProcInst:
customToken.Name = t.Target
customToken.Data = string(t.Inst)
customToken.Type = ProcInst
case xml.Directive:
customToken.Data = string(t)
customToken.Type = Directive
}
p.pipeline = append(p.pipeline, &customToken)
return &customToken, nil
}
func (p *Parser) Previous(index int) (tokens []*Token) {
if index < 0 || index >= len(p.pipeline) {
return
}
return p.pipeline[:index]
}
func (p *Parser) All() ([]*Token, error) {
for _, err := range p.Iterate() {
if err != nil {
return nil, err
}
}
return p.pipeline, nil
}
func (p *Parser) Iterate() iter.Seq2[*Token, error] {
var cursor int
return func(yield func(*Token, error) bool) {
for {
var token *Token
// INFO: cursor should be max. len(p.pipeline)
if cursor >= len(p.pipeline) {
t, err := p.Token()
if err != nil {
yield(nil, err)
return
}
if t == nil {
return // EOF
}
token = t
} else {
token = p.pipeline[cursor]
}
cursor++
if !yield(token, nil) {
return
}
}
}
}
// mapAttributes converts xml.Attr to a map[string]string.
func mapAttributes(attrs []xml.Attr) map[string]string {
attrMap := make(map[string]string)
for _, attr := range attrs {
attrMap[attr.Name.Local] = attr.Value
}
return attrMap
}

View File

@@ -1,97 +0,0 @@
package xmlparsing
import (
"testing"
)
type TestState struct {
ParsedElements []string
}
func TestIterate_ValidXML(t *testing.T) {
xmlData := `<root>
<child attr="value">Text</child>
<!-- This is a comment -->
<?xml-stylesheet type="text/css" href="style.css"?>
<!DOCTYPE note>
</root>`
state := TestState{}
for tokenResult, err := range Iterate(xmlData, state) {
if err != nil {
t.Fatalf("Unexpected error: %v", err)
}
if tokenResult == nil {
t.Fatal("Received nil token result")
}
state.ParsedElements = append(state.ParsedElements, tokenResult.Token.Name)
}
if len(state.ParsedElements) == 0 {
t.Fatal("No elements were parsed")
}
}
func TestIterate_InvalidXML(t *testing.T) {
xmlData := `<root><child></root>`
state := TestState{}
var global error
for _, err := range Iterate(xmlData, state) {
if err != nil {
global = err
}
}
if global == nil {
t.Fatal("Expected error, but got nil")
}
}
func TestIterate_EmptyXML(t *testing.T) {
xmlData := ""
state := TestState{}
for _, err := range Iterate(xmlData, state) {
if err != nil {
t.Fatalf("Expected iter.ErrEnd, but got: %v", err)
}
}
}
func TestIterate_CharDataTracking(t *testing.T) {
xmlData := `<root>
<child>First</child>
<child>Second</child>
</root>`
state := TestState{}
charDataCount := 0
for tokenResult, err := range Iterate(xmlData, state) {
if err != nil {
t.Fatalf("Unexpected error: %v", err)
}
if tokenResult.Token.Name == "CharData" {
charDataCount++
}
}
if charDataCount != 5 {
t.Fatalf("Expected 2 CharData elements, got %d", charDataCount)
}
}
func TestIterate_AttributeParsing(t *testing.T) {
xmlData := `<root>
<child attr1="value1" attr2="value2">Content</child>
</root>`
state := TestState{}
for tokenResult, err := range Iterate(xmlData, state) {
if err != nil {
t.Fatalf("Unexpected error: %v", err)
}
if tokenResult.Token.Name == "child" && tokenResult.Token.Type == StartElement {
if tokenResult.Token.Attributes["attr1"] != "value1" || tokenResult.Token.Attributes["attr2"] != "value2" {
t.Fatalf("Incorrect attributes parsed: %v", tokenResult.Token.Attributes)
}
}
}
}

View File

@@ -1,126 +0,0 @@
package xmlparsing
import (
"encoding/xml"
"iter"
"strings"
)
type TokenType int
const (
StartElement TokenType = iota
EndElement
CharData
Comment
ProcInst
Directive
)
type Token struct {
Name string
Attributes map[string]string
Inner xml.Token
Type TokenType
Data string
Stack []*Token
StartOffset int64
EndOffset int64
Index int
charData string
children []*Token
parser *Parser
childrenParsed bool
chardataParsed bool
}
func (t *Token) String() string {
builder := strings.Builder{}
return builder.String()
}
func (t *Token) Element() (tokens []*Token) {
if t.Type != StartElement {
return
}
for token, err := range t.parser.PeekFrom(t.Index) {
if err != nil || token == nil {
return tokens
}
tokens = append(tokens, token)
if token.Type == EndElement && token.Name == t.Name {
return tokens
}
}
return
}
func (t *Token) Next() iter.Seq2[*Token, error] {
return t.parser.PeekFrom(t.Index)
}
func (t *Token) Previous() (tokens []*Token) {
if t.Index <= 0 {
return
}
return t.parser.Previous(t.Index)
}
func (t *Token) Children() (tokens []*Token) {
if t.childrenParsed {
return t.children
}
tokens = t.Element()
if len(tokens) == 0 {
return
}
for _, token := range tokens {
if token.Type == StartElement {
t.children = append(t.children, token)
}
}
t.childrenParsed = true
return t.children
}
func (t *Token) CharData() string {
if t.Type == CharData || t.Type == ProcInst || t.Type == Comment || t.Type == Directive {
return t.Data
}
if t.chardataParsed {
return t.charData
}
tokens := t.Element()
if len(tokens) == 0 {
return ""
}
var builder strings.Builder
for _, token := range tokens {
if token.Type == CharData {
builder.WriteString(token.Data)
}
}
t.chardataParsed = true
t.charData = builder.String()
return builder.String()
}
func (t *Token) SubParser() *Parser {
if t.Type != StartElement {
return nil
}
tokens := t.Element()
return NewFromTokens(tokens)
}

View File

@@ -2,42 +2,13 @@ package xmlparsing
import (
"iter"
"slices"
"sync"
"time"
)
type ParseSource int
const (
SourceUnknown ParseSource = iota
Path
Commit
)
type ParseMeta struct {
Source ParseSource
BaseDir string
Commit string
Date time.Time
FailedPaths []string
}
func (p ParseMeta) Equals(other ParseMeta) bool {
return p.Source == other.Source && p.BaseDir == other.BaseDir && p.Commit == other.Commit && p.Date == other.Date
}
func (p ParseMeta) Failed(path string) bool {
return slices.Contains(p.FailedPaths, path)
}
// An XMLParser is a struct that holds holds serialized XML data of a specific type. It combines multiple parses IF a succeeded parse can not serialize the data from a path.
// An XMLParser holds serialized XML data of a specific type.
type XMLParser[T IXMLItem] struct {
// INFO: map is type map[string]*T
Items sync.Map
// INFO: map is type [string]ItemInfo
Infos sync.Map
// INFO: Resolver is used to resolve references (back-links) between XML items.
Resolver Resolver[T]
@@ -51,18 +22,7 @@ func NewXMLParser[T IXMLItem]() *XMLParser[T] {
return &XMLParser[T]{Resolver: *NewResolver[T]()}
}
// INFO: To parse sth, we call Prepare, then Serialize, then Cleanup.
// Prepare & Cleanup are called once per parse. Serialize is called for every path.
// and can be called concurretly.
func (p *XMLParser[T]) Prepare() {
p.mu.Lock()
defer p.mu.Unlock()
p.array = make([]T, 0, len(p.array))
p.Resolver.Clear()
}
func (p *XMLParser[T]) Serialize(dataholder XMLRootElement[T], path string, latest ParseMeta) error {
func (p *XMLParser[T]) Serialize(dataholder XMLRootElement[T], path string) error {
if err := UnmarshalFile(path, dataholder); err != nil {
return err
}
@@ -72,7 +32,6 @@ func (p *XMLParser[T]) Serialize(dataholder XMLRootElement[T], path string, late
for _, item := range newItems {
// INFO: Mostly it's just one ID, so the double loop is not that bad.
for _, id := range item.Keys() {
p.Infos.Store(id, ItemInfo{Source: path, Parse: latest})
p.Items.Store(id, &item)
}
@@ -85,43 +44,6 @@ func (p *XMLParser[T]) Serialize(dataholder XMLRootElement[T], path string, late
return nil
}
// INFO: Cleanup is called after all paths have been serialized.
// It deletes all items that have not been parsed in the last commit,
// and whose filepath has not been marked as failed.
func (p *XMLParser[T]) Cleanup(latest ParseMeta) {
todelete := make([]any, 0)
toappend := make([]*T, 0)
p.Infos.Range(func(key, value interface{}) bool {
info := value.(ItemInfo)
if !info.Parse.Equals(latest) {
if !latest.Failed(info.Source) {
todelete = append(todelete, key)
} else {
item, ok := p.Items.Load(key)
if ok {
i := item.(*T)
if !slices.Contains(toappend, i) {
toappend = append(toappend, i)
}
}
}
}
return true
})
for _, key := range todelete {
p.Infos.Delete(key)
p.Items.Delete(key)
}
p.mu.Lock()
defer p.mu.Unlock()
for _, item := range toappend {
p.array = append(p.array, *item)
p.addResolvable(*item)
}
}
func (p *XMLParser[T]) addResolvable(item T) {
// INFO: If the item has a GetReferences method, we add the references to the resolver.
if rr, ok := any(item).(ReferenceResolver[T]); ok {
@@ -157,14 +79,6 @@ func (a *XMLParser[T]) String() (s string) {
return
}
func (p *XMLParser[T]) Info(id string) ItemInfo {
info, ok := p.Infos.Load(id)
if !ok {
return ItemInfo{}
}
return info.(ItemInfo)
}
func (p *XMLParser[T]) Item(id any) *T {
item, ok := p.Items.Load(id)
if !ok {

View File

@@ -1 +0,0 @@
package xmlparsing