Added time & date of last parse to the thing

This commit is contained in:
Simon Martens
2024-12-02 16:08:37 +01:00
parent e6b844cae1
commit 3dbbe6629c
3 changed files with 63 additions and 45 deletions

View File

@@ -2,6 +2,5 @@ package xmlprovider
type ItemInfo struct {
Source string
Date string
Commit string
Parse *ParseMeta
}

View File

@@ -42,12 +42,12 @@ func (l *Library) SetPaths(agentpaths, placepaths, workpaths, categorypaths, iss
func (l *Library) Serialize(commit string) {
wg := sync.WaitGroup{}
l.Prepare()
l.Prepare(commit)
for _, path := range l.Places.Paths {
wg.Add(1)
go func() {
l.Places.Serialize(NewPlaceRoot(), path, commit)
l.Places.Serialize(NewPlaceRoot(), path)
wg.Done()
}()
}
@@ -55,7 +55,7 @@ func (l *Library) Serialize(commit string) {
for _, path := range l.Agents.Paths {
wg.Add(1)
go func() {
l.Agents.Serialize(NewAgentRoot(), path, commit)
l.Agents.Serialize(NewAgentRoot(), path)
wg.Done()
}()
}
@@ -63,7 +63,7 @@ func (l *Library) Serialize(commit string) {
for _, path := range l.Categories.Paths {
wg.Add(1)
go func() {
l.Categories.Serialize(NewCategoryRoot(), path, commit)
l.Categories.Serialize(NewCategoryRoot(), path)
wg.Done()
}()
}
@@ -71,7 +71,7 @@ func (l *Library) Serialize(commit string) {
for _, path := range l.Works.Paths {
wg.Add(1)
go func() {
l.Works.Serialize(NewWorkRoot(), path, commit)
l.Works.Serialize(NewWorkRoot(), path)
wg.Done()
}()
}
@@ -79,7 +79,7 @@ func (l *Library) Serialize(commit string) {
for _, path := range l.Issues.Paths {
wg.Add(1)
go func() {
l.Issues.Serialize(NewIssueRoot(), path, commit)
l.Issues.Serialize(NewIssueRoot(), path)
wg.Done()
}()
}
@@ -87,7 +87,7 @@ func (l *Library) Serialize(commit string) {
for _, path := range l.Pieces.Paths {
wg.Add(1)
go func() {
l.Pieces.Serialize(NewPieceRoot(), path, commit)
l.Pieces.Serialize(NewPieceRoot(), path)
wg.Done()
}()
}
@@ -101,13 +101,13 @@ func (l *Library) Serialize(commit string) {
// TODO: Prepare resets the list of failed parses for a new parse.
// We need to set the logs accordingly.
func (l *Library) Prepare() {
l.Agents.Prepare()
l.Places.Prepare()
l.Works.Prepare()
l.Categories.Prepare()
l.Issues.Prepare()
l.Pieces.Prepare()
func (l *Library) Prepare(commit string) {
l.Agents.Prepare(commit)
l.Places.Prepare(commit)
l.Works.Prepare(commit)
l.Categories.Prepare(commit)
l.Issues.Prepare(commit)
l.Pieces.Prepare(commit)
}
func (l *Library) Cleanup(commit string) {

View File

@@ -12,6 +12,11 @@ import (
"github.com/Theodor-Springmann-Stiftung/kgpz_web/helpers/logging"
)
type ParseMeta struct {
Commit string
Date time.Time
}
type XMLItem interface {
fmt.Stringer
GetIDs() []string
@@ -22,6 +27,7 @@ type Collection[T XMLItem] struct {
lock sync.Mutex
}
// An XMLProvider is a struct that holds holds serialized XML data of a specific type. It combines multiple parses IF a succeeded parse can not serialize the data from a path.
type XMLProvider[T XMLItem] struct {
Paths []string
// INFO: map is type [string]T
@@ -32,16 +38,24 @@ type XMLProvider[T XMLItem] struct {
mu sync.Mutex
failed []string
parses []ParseMeta
}
func (p *XMLProvider[T]) Prepare() {
// INFO: To parse sth, we call Prepare, then Serialize, then Cleanup.
// Serialize can be called concurretly.
func (p *XMLProvider[T]) Prepare(commit string) {
p.mu.Lock()
defer p.mu.Unlock()
p.failed = make([]string, 0)
p.parses = append(p.parses, ParseMeta{Commit: commit, Date: time.Now()})
}
func (p *XMLProvider[T]) Serialize(dataholder XMLRootElement[T], path, commit string) error {
date := time.Now().Format("2006-01-02")
func (p *XMLProvider[T]) Serialize(dataholder XMLRootElement[T], path string) error {
if len(p.parses) == 0 {
logging.Error(fmt.Errorf("No commit set"), "No commit set")
return fmt.Errorf("No commit set")
}
// Introduce goroutine for every path, locking on append:
if err := UnmarshalFile(path, dataholder); err != nil {
logging.Error(err, "Could not unmarshal file: "+path)
@@ -55,7 +69,7 @@ func (p *XMLProvider[T]) Serialize(dataholder XMLRootElement[T], path, commit st
for _, item := range dataholder.Children() {
// INFO: Mostly it's just one ID, so the double loop is not that bad.
for _, id := range item.GetIDs() {
p.Infos.Store(id, ItemInfo{Source: path, Date: date, Commit: commit})
p.Infos.Store(id, ItemInfo{Source: path, Parse: &p.parses[len(p.parses)-1]})
p.Items.Store(id, item)
}
}
@@ -63,6 +77,33 @@ func (p *XMLProvider[T]) Serialize(dataholder XMLRootElement[T], path, commit st
return nil
}
func (p *XMLProvider[T]) Cleanup() {
p.mu.Lock()
defer p.mu.Unlock()
if len(p.parses) == 0 {
logging.Error(fmt.Errorf("Trying to cleanup an empty XMLProvider."))
return
}
lastcommit := p.parses[len(p.parses)-1].Commit
todelete := make([]string, 0)
p.Infos.Range(func(key, value interface{}) bool {
info := value.(ItemInfo)
if info.Parse.Commit != lastcommit {
if !slices.Contains(p.failed, info.Source) {
todelete = append(todelete, key.(string))
}
}
return true
})
for _, key := range todelete {
p.Infos.Delete(key)
p.Items.Delete(key)
}
}
func (a *XMLProvider[T]) String() string {
var s string
a.Items.Range(func(key, value interface{}) bool {
@@ -129,7 +170,9 @@ func (p *XMLProvider[T]) FindKey(fn func(string) bool) []T {
}
// INFO: Do not use this, except when iterating over a collection multiple times (three times or more).
// Maps are slow to iterate, but many of the Iterations can only be done once.
// Maps are slow to iterate, but many of the Iterations can only be done once, so it doesn´t matter for a
// few thousand objects. We prefer to lookup objects by key and have multiple meaningful keys; along with
// sensible caching rules to keep the application responsive.
func (p *XMLProvider[T]) Everything() []T {
var items []T
p.Items.Range(func(key, value interface{}) bool {
@@ -138,27 +181,3 @@ func (p *XMLProvider[T]) Everything() []T {
})
return items
}
// TODO: how to find that the item was deleted, and couldn't just be serialized?
// -> We compare filepaths of failed serializations with filepaths of the items.
// - If the item is not in the failed serializations, it was deleted.
// - If the item is in the failed serializations, we don't know if it was deleted or not, and we keep it.
//
// Consequence: If all serializations completed, we cleanup everything.
func (p *XMLProvider[T]) Cleanup(commit string) {
todelete := make([]string, 0)
p.Infos.Range(func(key, value interface{}) bool {
info := value.(ItemInfo)
if info.Commit != commit {
if !slices.Contains(p.failed, info.Source) {
todelete = append(todelete, key.(string))
}
}
return true
})
for _, key := range todelete {
p.Infos.Delete(key)
p.Items.Delete(key)
}
}