XML parsing overhaul

This commit is contained in:
Simon Martens
2024-11-22 00:35:27 +01:00
parent b93256c522
commit bc244fbad4
26 changed files with 507 additions and 352 deletions

View File

@@ -10,34 +10,40 @@ import (
"github.com/Theodor-Springmann-Stiftung/kgpz_web/helpers/logging"
)
type KGPZXML[T any] interface {
Append(data T) T
type XMLItem interface {
fmt.Stringer
GetIDs() []string
}
type XMLProvider[T KGPZXML[T]] struct {
mu sync.Mutex
paths []string
Items T
type XMLProvider[T XMLItem] struct {
Paths []string
Items sync.Map
mu sync.Mutex
}
type Library struct {
Agents *AgentProvider
Places *PlaceProvider
Works *WorkProvider
Categories *CategoryProvider
Issues *IssueProvider
Pieces *PieceProvider
Agents *XMLProvider[Agent]
Places *XMLProvider[Place]
Works *XMLProvider[Work]
Categories *XMLProvider[Category]
Issues *XMLProvider[Issue]
Pieces *XMLProvider[Piece]
}
func (l *Library) String() string {
return fmt.Sprintf("Agents: %s\nPlaces: %s\nWorks: %s\nCategories: %s\nIssues: %s\nPieces: %s\n",
l.Agents.String(), l.Places.String(), l.Works.String(), l.Categories.String(), l.Issues.String(), l.Pieces.String())
}
func NewLibrary(agentpaths, placepaths, workpaths, categorypaths, issuepaths, piecepaths []string) *Library {
return &Library{
Agents: NewAgentProvider(agentpaths),
Places: NewPlaceProvider(placepaths),
Works: NewWorkProvider(workpaths),
Categories: NewCategoryProvider(categorypaths),
Issues: NewIssueProvider(issuepaths),
Pieces: NewPieceProvider(piecepaths),
Agents: &XMLProvider[Agent]{Paths: agentpaths},
Places: &XMLProvider[Place]{Paths: placepaths},
Works: &XMLProvider[Work]{Paths: workpaths},
Categories: &XMLProvider[Category]{Paths: categorypaths},
Issues: &XMLProvider[Issue]{Paths: issuepaths},
Pieces: &XMLProvider[Piece]{Paths: piecepaths},
}
}
@@ -47,84 +53,98 @@ func (l *Library) Serialize() {
go func() {
defer wg.Done()
err := l.Agents.Serialize()
if err != nil {
l.Agents = nil
lwg := sync.WaitGroup{}
for _, path := range l.Places.Paths {
lwg.Add(1)
go l.Places.Serialize(NewPlaceRoot(), path, &lwg)
}
lwg.Wait()
}()
go func() {
defer wg.Done()
err := l.Places.Serialize()
if err != nil {
l.Places = nil
lwg := sync.WaitGroup{}
for _, path := range l.Agents.Paths {
lwg.Add(1)
go l.Agents.Serialize(NewAgentRoot(), path, &lwg)
}
lwg.Wait()
}()
go func() {
defer wg.Done()
err := l.Works.Serialize()
if err != nil {
l.Works = nil
lwg := sync.WaitGroup{}
for _, path := range l.Categories.Paths {
lwg.Add(1)
go l.Categories.Serialize(NewCategoryRoot(), path, &lwg)
}
lwg.Wait()
}()
go func() {
defer wg.Done()
err := l.Categories.Serialize()
if err != nil {
l.Categories = nil
lwg := sync.WaitGroup{}
for _, path := range l.Works.Paths {
lwg.Add(1)
go l.Works.Serialize(NewWorkRoot(), path, &lwg)
}
lwg.Wait()
}()
go func() {
defer wg.Done()
err := l.Issues.Serialize()
if err != nil {
l.Issues = nil
lwg := sync.WaitGroup{}
for _, path := range l.Issues.Paths {
lwg.Add(1)
go l.Issues.Serialize(NewIssueRoot(), path, &lwg)
}
lwg.Wait()
}()
go func() {
defer wg.Done()
err := l.Pieces.Serialize()
if err != nil {
l.Pieces = nil
lwg := sync.WaitGroup{}
for _, path := range l.Pieces.Paths {
lwg.Add(1)
go l.Pieces.Serialize(NewPieceRoot(), path, &lwg)
}
lwg.Wait()
}()
wg.Wait()
}
// TODO: make Items into a sync.Map
func (p *XMLProvider[T]) Serialize() error {
func (p *XMLProvider[T]) Serialize(dataholder XMLRootElement[T], path string, wg *sync.WaitGroup) error {
// Introduce goroutine for every path, locking on append:
var wg sync.WaitGroup
for _, path := range p.paths {
wg.Add(1)
go func(path string) {
defer wg.Done()
var data T
if err := UnmarshalFile(path, &data); err != nil {
return
}
p.mu.Lock()
defer p.mu.Unlock()
p.Items = p.Items.Append(data)
}(path)
if err := UnmarshalFile(path, dataholder); err != nil {
logging.Error(err, "Could not unmarshal file: "+path)
return err
}
for _, item := range dataholder.Children() {
// INFO: Mostly it's just one ID, so the double loop is not that bad.
for _, id := range item.GetIDs() {
p.Items.Store(id, item)
}
}
if wg != nil {
wg.Done()
}
wg.Wait()
return nil
}
func (a *XMLProvider[T]) String() string {
a.mu.Lock()
defer a.mu.Unlock()
return fmt.Sprintf("Items: %s", a.Items)
var s string
a.Items.Range(func(key, value interface{}) bool {
v := value.(T)
s += v.String()
return true
})
return s
}
func UnmarshalFile[T any](filename string, data *T) error {
func UnmarshalFile[T any](filename string, data T) error {
xmlFile, err := os.Open(filename)
if err != nil {
logging.Error(err, "Could not open file: "+filename)
@@ -138,7 +158,7 @@ func UnmarshalFile[T any](filename string, data *T) error {
logging.Error(err, "Could not read file: "+filename)
return err
}
err = xml.Unmarshal(byteValue, data)
err = xml.Unmarshal(byteValue, &data)
if err != nil {
logging.Error(err, "Could not unmarshal file: "+filename)
@@ -146,3 +166,44 @@ func UnmarshalFile[T any](filename string, data *T) error {
}
return nil
}
func (p *XMLProvider[T]) Item(id string) *T {
item, ok := p.Items.Load(id)
if !ok {
return nil
}
i := item.(T)
return &i
}
func (p *XMLProvider[T]) Find(fn func(T) bool) []T {
var items []T
p.Items.Range(func(key, value interface{}) bool {
if fn(value.(T)) {
items = append(items, value.(T))
}
return true
})
return items
}
func (p *XMLProvider[T]) FindKey(fn func(string) bool) []T {
var items []T
p.Items.Range(func(key, value interface{}) bool {
if fn(key.(string)) {
items = append(items, value.(T))
}
return true
})
return items
}
func (p *XMLProvider[T]) All() []T {
var items []T
p.Items.Range(func(key, value interface{}) bool {
items = append(items, value.(T))
return true
})
return items
}