From 3dbbe6629c1d170782ff346cdfe4a0de608b15ad Mon Sep 17 00:00:00 2001 From: Simon Martens Date: Mon, 2 Dec 2024 16:08:37 +0100 Subject: [PATCH] Added time & date of last parse to the thing --- providers/xmlprovider/item.go | 3 +- providers/xmlprovider/library.go | 28 +++++----- providers/xmlprovider/xmlprovider.go | 77 +++++++++++++++++----------- 3 files changed, 63 insertions(+), 45 deletions(-) diff --git a/providers/xmlprovider/item.go b/providers/xmlprovider/item.go index 7a0a472..1fef1ef 100644 --- a/providers/xmlprovider/item.go +++ b/providers/xmlprovider/item.go @@ -2,6 +2,5 @@ package xmlprovider type ItemInfo struct { Source string - Date string - Commit string + Parse *ParseMeta } diff --git a/providers/xmlprovider/library.go b/providers/xmlprovider/library.go index 957e0ef..b670a08 100644 --- a/providers/xmlprovider/library.go +++ b/providers/xmlprovider/library.go @@ -42,12 +42,12 @@ func (l *Library) SetPaths(agentpaths, placepaths, workpaths, categorypaths, iss func (l *Library) Serialize(commit string) { wg := sync.WaitGroup{} - l.Prepare() + l.Prepare(commit) for _, path := range l.Places.Paths { wg.Add(1) go func() { - l.Places.Serialize(NewPlaceRoot(), path, commit) + l.Places.Serialize(NewPlaceRoot(), path) wg.Done() }() } @@ -55,7 +55,7 @@ func (l *Library) Serialize(commit string) { for _, path := range l.Agents.Paths { wg.Add(1) go func() { - l.Agents.Serialize(NewAgentRoot(), path, commit) + l.Agents.Serialize(NewAgentRoot(), path) wg.Done() }() } @@ -63,7 +63,7 @@ func (l *Library) Serialize(commit string) { for _, path := range l.Categories.Paths { wg.Add(1) go func() { - l.Categories.Serialize(NewCategoryRoot(), path, commit) + l.Categories.Serialize(NewCategoryRoot(), path) wg.Done() }() } @@ -71,7 +71,7 @@ func (l *Library) Serialize(commit string) { for _, path := range l.Works.Paths { wg.Add(1) go func() { - l.Works.Serialize(NewWorkRoot(), path, commit) + l.Works.Serialize(NewWorkRoot(), path) wg.Done() }() } @@ -79,7 +79,7 @@ func (l *Library) Serialize(commit string) { for _, path := range l.Issues.Paths { wg.Add(1) go func() { - l.Issues.Serialize(NewIssueRoot(), path, commit) + l.Issues.Serialize(NewIssueRoot(), path) wg.Done() }() } @@ -87,7 +87,7 @@ func (l *Library) Serialize(commit string) { for _, path := range l.Pieces.Paths { wg.Add(1) go func() { - l.Pieces.Serialize(NewPieceRoot(), path, commit) + l.Pieces.Serialize(NewPieceRoot(), path) wg.Done() }() } @@ -101,13 +101,13 @@ func (l *Library) Serialize(commit string) { // TODO: Prepare resets the list of failed parses for a new parse. // We need to set the logs accordingly. -func (l *Library) Prepare() { - l.Agents.Prepare() - l.Places.Prepare() - l.Works.Prepare() - l.Categories.Prepare() - l.Issues.Prepare() - l.Pieces.Prepare() +func (l *Library) Prepare(commit string) { + l.Agents.Prepare(commit) + l.Places.Prepare(commit) + l.Works.Prepare(commit) + l.Categories.Prepare(commit) + l.Issues.Prepare(commit) + l.Pieces.Prepare(commit) } func (l *Library) Cleanup(commit string) { diff --git a/providers/xmlprovider/xmlprovider.go b/providers/xmlprovider/xmlprovider.go index 75b90dc..cefd0f8 100644 --- a/providers/xmlprovider/xmlprovider.go +++ b/providers/xmlprovider/xmlprovider.go @@ -12,6 +12,11 @@ import ( "github.com/Theodor-Springmann-Stiftung/kgpz_web/helpers/logging" ) +type ParseMeta struct { + Commit string + Date time.Time +} + type XMLItem interface { fmt.Stringer GetIDs() []string @@ -22,6 +27,7 @@ type Collection[T XMLItem] struct { lock sync.Mutex } +// An XMLProvider is a struct that holds holds serialized XML data of a specific type. It combines multiple parses IF a succeeded parse can not serialize the data from a path. type XMLProvider[T XMLItem] struct { Paths []string // INFO: map is type [string]T @@ -32,16 +38,24 @@ type XMLProvider[T XMLItem] struct { mu sync.Mutex failed []string + parses []ParseMeta } -func (p *XMLProvider[T]) Prepare() { +// INFO: To parse sth, we call Prepare, then Serialize, then Cleanup. +// Serialize can be called concurretly. +func (p *XMLProvider[T]) Prepare(commit string) { p.mu.Lock() defer p.mu.Unlock() p.failed = make([]string, 0) + p.parses = append(p.parses, ParseMeta{Commit: commit, Date: time.Now()}) } -func (p *XMLProvider[T]) Serialize(dataholder XMLRootElement[T], path, commit string) error { - date := time.Now().Format("2006-01-02") +func (p *XMLProvider[T]) Serialize(dataholder XMLRootElement[T], path string) error { + if len(p.parses) == 0 { + logging.Error(fmt.Errorf("No commit set"), "No commit set") + return fmt.Errorf("No commit set") + } + // Introduce goroutine for every path, locking on append: if err := UnmarshalFile(path, dataholder); err != nil { logging.Error(err, "Could not unmarshal file: "+path) @@ -55,7 +69,7 @@ func (p *XMLProvider[T]) Serialize(dataholder XMLRootElement[T], path, commit st for _, item := range dataholder.Children() { // INFO: Mostly it's just one ID, so the double loop is not that bad. for _, id := range item.GetIDs() { - p.Infos.Store(id, ItemInfo{Source: path, Date: date, Commit: commit}) + p.Infos.Store(id, ItemInfo{Source: path, Parse: &p.parses[len(p.parses)-1]}) p.Items.Store(id, item) } } @@ -63,6 +77,33 @@ func (p *XMLProvider[T]) Serialize(dataholder XMLRootElement[T], path, commit st return nil } +func (p *XMLProvider[T]) Cleanup() { + p.mu.Lock() + defer p.mu.Unlock() + + if len(p.parses) == 0 { + logging.Error(fmt.Errorf("Trying to cleanup an empty XMLProvider.")) + return + } + + lastcommit := p.parses[len(p.parses)-1].Commit + todelete := make([]string, 0) + p.Infos.Range(func(key, value interface{}) bool { + info := value.(ItemInfo) + if info.Parse.Commit != lastcommit { + if !slices.Contains(p.failed, info.Source) { + todelete = append(todelete, key.(string)) + } + } + return true + }) + + for _, key := range todelete { + p.Infos.Delete(key) + p.Items.Delete(key) + } +} + func (a *XMLProvider[T]) String() string { var s string a.Items.Range(func(key, value interface{}) bool { @@ -129,7 +170,9 @@ func (p *XMLProvider[T]) FindKey(fn func(string) bool) []T { } // INFO: Do not use this, except when iterating over a collection multiple times (three times or more). -// Maps are slow to iterate, but many of the Iterations can only be done once. +// Maps are slow to iterate, but many of the Iterations can only be done once, so it doesn´t matter for a +// few thousand objects. We prefer to lookup objects by key and have multiple meaningful keys; along with +// sensible caching rules to keep the application responsive. func (p *XMLProvider[T]) Everything() []T { var items []T p.Items.Range(func(key, value interface{}) bool { @@ -138,27 +181,3 @@ func (p *XMLProvider[T]) Everything() []T { }) return items } - -// TODO: how to find that the item was deleted, and couldn't just be serialized? -// -> We compare filepaths of failed serializations with filepaths of the items. -// - If the item is not in the failed serializations, it was deleted. -// - If the item is in the failed serializations, we don't know if it was deleted or not, and we keep it. -// -// Consequence: If all serializations completed, we cleanup everything. -func (p *XMLProvider[T]) Cleanup(commit string) { - todelete := make([]string, 0) - p.Infos.Range(func(key, value interface{}) bool { - info := value.(ItemInfo) - if info.Commit != commit { - if !slices.Contains(p.failed, info.Source) { - todelete = append(todelete, key.(string)) - } - } - return true - }) - - for _, key := range todelete { - p.Infos.Delete(key) - p.Items.Delete(key) - } -}