Added time & date of last parse to the thing

This commit is contained in:
Simon Martens
2024-12-02 16:08:37 +01:00
parent e6b844cae1
commit 3dbbe6629c
3 changed files with 63 additions and 45 deletions

View File

@@ -2,6 +2,5 @@ package xmlprovider
type ItemInfo struct { type ItemInfo struct {
Source string Source string
Date string Parse *ParseMeta
Commit string
} }

View File

@@ -42,12 +42,12 @@ func (l *Library) SetPaths(agentpaths, placepaths, workpaths, categorypaths, iss
func (l *Library) Serialize(commit string) { func (l *Library) Serialize(commit string) {
wg := sync.WaitGroup{} wg := sync.WaitGroup{}
l.Prepare() l.Prepare(commit)
for _, path := range l.Places.Paths { for _, path := range l.Places.Paths {
wg.Add(1) wg.Add(1)
go func() { go func() {
l.Places.Serialize(NewPlaceRoot(), path, commit) l.Places.Serialize(NewPlaceRoot(), path)
wg.Done() wg.Done()
}() }()
} }
@@ -55,7 +55,7 @@ func (l *Library) Serialize(commit string) {
for _, path := range l.Agents.Paths { for _, path := range l.Agents.Paths {
wg.Add(1) wg.Add(1)
go func() { go func() {
l.Agents.Serialize(NewAgentRoot(), path, commit) l.Agents.Serialize(NewAgentRoot(), path)
wg.Done() wg.Done()
}() }()
} }
@@ -63,7 +63,7 @@ func (l *Library) Serialize(commit string) {
for _, path := range l.Categories.Paths { for _, path := range l.Categories.Paths {
wg.Add(1) wg.Add(1)
go func() { go func() {
l.Categories.Serialize(NewCategoryRoot(), path, commit) l.Categories.Serialize(NewCategoryRoot(), path)
wg.Done() wg.Done()
}() }()
} }
@@ -71,7 +71,7 @@ func (l *Library) Serialize(commit string) {
for _, path := range l.Works.Paths { for _, path := range l.Works.Paths {
wg.Add(1) wg.Add(1)
go func() { go func() {
l.Works.Serialize(NewWorkRoot(), path, commit) l.Works.Serialize(NewWorkRoot(), path)
wg.Done() wg.Done()
}() }()
} }
@@ -79,7 +79,7 @@ func (l *Library) Serialize(commit string) {
for _, path := range l.Issues.Paths { for _, path := range l.Issues.Paths {
wg.Add(1) wg.Add(1)
go func() { go func() {
l.Issues.Serialize(NewIssueRoot(), path, commit) l.Issues.Serialize(NewIssueRoot(), path)
wg.Done() wg.Done()
}() }()
} }
@@ -87,7 +87,7 @@ func (l *Library) Serialize(commit string) {
for _, path := range l.Pieces.Paths { for _, path := range l.Pieces.Paths {
wg.Add(1) wg.Add(1)
go func() { go func() {
l.Pieces.Serialize(NewPieceRoot(), path, commit) l.Pieces.Serialize(NewPieceRoot(), path)
wg.Done() wg.Done()
}() }()
} }
@@ -101,13 +101,13 @@ func (l *Library) Serialize(commit string) {
// TODO: Prepare resets the list of failed parses for a new parse. // TODO: Prepare resets the list of failed parses for a new parse.
// We need to set the logs accordingly. // We need to set the logs accordingly.
func (l *Library) Prepare() { func (l *Library) Prepare(commit string) {
l.Agents.Prepare() l.Agents.Prepare(commit)
l.Places.Prepare() l.Places.Prepare(commit)
l.Works.Prepare() l.Works.Prepare(commit)
l.Categories.Prepare() l.Categories.Prepare(commit)
l.Issues.Prepare() l.Issues.Prepare(commit)
l.Pieces.Prepare() l.Pieces.Prepare(commit)
} }
func (l *Library) Cleanup(commit string) { func (l *Library) Cleanup(commit string) {

View File

@@ -12,6 +12,11 @@ import (
"github.com/Theodor-Springmann-Stiftung/kgpz_web/helpers/logging" "github.com/Theodor-Springmann-Stiftung/kgpz_web/helpers/logging"
) )
type ParseMeta struct {
Commit string
Date time.Time
}
type XMLItem interface { type XMLItem interface {
fmt.Stringer fmt.Stringer
GetIDs() []string GetIDs() []string
@@ -22,6 +27,7 @@ type Collection[T XMLItem] struct {
lock sync.Mutex lock sync.Mutex
} }
// An XMLProvider is a struct that holds holds serialized XML data of a specific type. It combines multiple parses IF a succeeded parse can not serialize the data from a path.
type XMLProvider[T XMLItem] struct { type XMLProvider[T XMLItem] struct {
Paths []string Paths []string
// INFO: map is type [string]T // INFO: map is type [string]T
@@ -32,16 +38,24 @@ type XMLProvider[T XMLItem] struct {
mu sync.Mutex mu sync.Mutex
failed []string failed []string
parses []ParseMeta
} }
func (p *XMLProvider[T]) Prepare() { // INFO: To parse sth, we call Prepare, then Serialize, then Cleanup.
// Serialize can be called concurretly.
func (p *XMLProvider[T]) Prepare(commit string) {
p.mu.Lock() p.mu.Lock()
defer p.mu.Unlock() defer p.mu.Unlock()
p.failed = make([]string, 0) p.failed = make([]string, 0)
p.parses = append(p.parses, ParseMeta{Commit: commit, Date: time.Now()})
}
func (p *XMLProvider[T]) Serialize(dataholder XMLRootElement[T], path string) error {
if len(p.parses) == 0 {
logging.Error(fmt.Errorf("No commit set"), "No commit set")
return fmt.Errorf("No commit set")
} }
func (p *XMLProvider[T]) Serialize(dataholder XMLRootElement[T], path, commit string) error {
date := time.Now().Format("2006-01-02")
// Introduce goroutine for every path, locking on append: // Introduce goroutine for every path, locking on append:
if err := UnmarshalFile(path, dataholder); err != nil { if err := UnmarshalFile(path, dataholder); err != nil {
logging.Error(err, "Could not unmarshal file: "+path) logging.Error(err, "Could not unmarshal file: "+path)
@@ -55,7 +69,7 @@ func (p *XMLProvider[T]) Serialize(dataholder XMLRootElement[T], path, commit st
for _, item := range dataholder.Children() { for _, item := range dataholder.Children() {
// INFO: Mostly it's just one ID, so the double loop is not that bad. // INFO: Mostly it's just one ID, so the double loop is not that bad.
for _, id := range item.GetIDs() { for _, id := range item.GetIDs() {
p.Infos.Store(id, ItemInfo{Source: path, Date: date, Commit: commit}) p.Infos.Store(id, ItemInfo{Source: path, Parse: &p.parses[len(p.parses)-1]})
p.Items.Store(id, item) p.Items.Store(id, item)
} }
} }
@@ -63,6 +77,33 @@ func (p *XMLProvider[T]) Serialize(dataholder XMLRootElement[T], path, commit st
return nil return nil
} }
func (p *XMLProvider[T]) Cleanup() {
p.mu.Lock()
defer p.mu.Unlock()
if len(p.parses) == 0 {
logging.Error(fmt.Errorf("Trying to cleanup an empty XMLProvider."))
return
}
lastcommit := p.parses[len(p.parses)-1].Commit
todelete := make([]string, 0)
p.Infos.Range(func(key, value interface{}) bool {
info := value.(ItemInfo)
if info.Parse.Commit != lastcommit {
if !slices.Contains(p.failed, info.Source) {
todelete = append(todelete, key.(string))
}
}
return true
})
for _, key := range todelete {
p.Infos.Delete(key)
p.Items.Delete(key)
}
}
func (a *XMLProvider[T]) String() string { func (a *XMLProvider[T]) String() string {
var s string var s string
a.Items.Range(func(key, value interface{}) bool { a.Items.Range(func(key, value interface{}) bool {
@@ -129,7 +170,9 @@ func (p *XMLProvider[T]) FindKey(fn func(string) bool) []T {
} }
// INFO: Do not use this, except when iterating over a collection multiple times (three times or more). // INFO: Do not use this, except when iterating over a collection multiple times (three times or more).
// Maps are slow to iterate, but many of the Iterations can only be done once. // Maps are slow to iterate, but many of the Iterations can only be done once, so it doesn´t matter for a
// few thousand objects. We prefer to lookup objects by key and have multiple meaningful keys; along with
// sensible caching rules to keep the application responsive.
func (p *XMLProvider[T]) Everything() []T { func (p *XMLProvider[T]) Everything() []T {
var items []T var items []T
p.Items.Range(func(key, value interface{}) bool { p.Items.Range(func(key, value interface{}) bool {
@@ -138,27 +181,3 @@ func (p *XMLProvider[T]) Everything() []T {
}) })
return items return items
} }
// TODO: how to find that the item was deleted, and couldn't just be serialized?
// -> We compare filepaths of failed serializations with filepaths of the items.
// - If the item is not in the failed serializations, it was deleted.
// - If the item is in the failed serializations, we don't know if it was deleted or not, and we keep it.
//
// Consequence: If all serializations completed, we cleanup everything.
func (p *XMLProvider[T]) Cleanup(commit string) {
todelete := make([]string, 0)
p.Infos.Range(func(key, value interface{}) bool {
info := value.(ItemInfo)
if info.Commit != commit {
if !slices.Contains(p.failed, info.Source) {
todelete = append(todelete, key.(string))
}
}
return true
})
for _, key := range todelete {
p.Infos.Delete(key)
p.Items.Delete(key)
}
}