Files
kgpz_web/providers/xmlprovider/xmlprovider.go

165 lines
3.8 KiB
Go

package xmlprovider
import (
"encoding/xml"
"fmt"
"io"
"os"
"slices"
"sync"
"time"
"github.com/Theodor-Springmann-Stiftung/kgpz_web/helpers/logging"
)
type XMLItem interface {
fmt.Stringer
GetIDs() []string
}
type Collection[T XMLItem] struct {
Collection []T
lock sync.Mutex
}
type XMLProvider[T XMLItem] struct {
Paths []string
// INFO: map is type [string]T
Items sync.Map
// INFO: map is type [string]ItemInfo
// It keeps information about parsing status of the items.
Infos sync.Map
mu sync.Mutex
failed []string
}
func (p *XMLProvider[T]) Prepare() {
p.mu.Lock()
defer p.mu.Unlock()
p.failed = make([]string, 0)
}
func (p *XMLProvider[T]) Serialize(dataholder XMLRootElement[T], path, commit string) error {
date := time.Now().Format("2006-01-02")
// Introduce goroutine for every path, locking on append:
if err := UnmarshalFile(path, dataholder); err != nil {
logging.Error(err, "Could not unmarshal file: "+path)
logging.ParseMessages.ParseErrors <- logging.ParseMessage{MessageType: logging.ErrorMessage, Message: "Could not unmarshal file: " + path}
p.mu.Lock()
defer p.mu.Unlock()
p.failed = append(p.failed, path)
return err
}
for _, item := range dataholder.Children() {
// INFO: Mostly it's just one ID, so the double loop is not that bad.
for _, id := range item.GetIDs() {
p.Infos.Store(id, ItemInfo{Source: path, Date: date, Commit: commit})
p.Items.Store(id, item)
}
}
return nil
}
func (a *XMLProvider[T]) String() string {
var s string
a.Items.Range(func(key, value interface{}) bool {
v := value.(T)
s += v.String()
return true
})
return s
}
func UnmarshalFile[T any](filename string, data T) error {
xmlFile, err := os.Open(filename)
if err != nil {
logging.Error(err, "Could not open file: "+filename)
return err
}
defer xmlFile.Close()
logging.Info("Deserialization: " + filename)
byteValue, err := io.ReadAll(xmlFile)
if err != nil {
logging.Error(err, "Could not read file: "+filename)
return err
}
err = xml.Unmarshal(byteValue, &data)
if err != nil {
logging.Error(err, "Could not unmarshal file: "+filename)
return err
}
return nil
}
func (p *XMLProvider[T]) Item(id string) *T {
item, ok := p.Items.Load(id)
if !ok {
return nil
}
i := item.(T)
return &i
}
func (p *XMLProvider[T]) Find(fn func(T) bool) []T {
var items []T
p.Items.Range(func(key, value interface{}) bool {
if fn(value.(T)) {
items = append(items, value.(T))
}
return true
})
return items
}
func (p *XMLProvider[T]) FindKey(fn func(string) bool) []T {
var items []T
p.Items.Range(func(key, value interface{}) bool {
if fn(key.(string)) {
items = append(items, value.(T))
}
return true
})
return items
}
// INFO: Do not use this, except when iterating over a collection multiple times (three times or more).
// Maps are slow to iterate, but many of the Iterations can only be done once.
func (p *XMLProvider[T]) Everything() []T {
var items []T
p.Items.Range(func(key, value interface{}) bool {
items = append(items, value.(T))
return true
})
return items
}
// TODO: how to find that the item was deleted, and couldn't just be serialized?
// -> We compare filepaths of failed serializations with filepaths of the items.
// - If the item is not in the failed serializations, it was deleted.
// - If the item is in the failed serializations, we don't know if it was deleted or not, and we keep it.
//
// Consequence: If all serializations completed, we cleanup everything.
func (p *XMLProvider[T]) Cleanup(commit string) {
todelete := make([]string, 0)
p.Infos.Range(func(key, value interface{}) bool {
info := value.(ItemInfo)
if info.Commit != commit {
if !slices.Contains(p.failed, info.Source) {
todelete = append(todelete, key.(string))
}
}
return true
})
for _, key := range todelete {
p.Infos.Delete(key)
p.Items.Delete(key)
}
}