Reworked init method of KGPZ

This commit is contained in:
Simon Martens
2025-01-10 01:14:54 +01:00
parent b65da464bb
commit 54ce05a67c
4 changed files with 73 additions and 86 deletions

View File

@@ -1,7 +1,6 @@
package app package app
import ( import (
"os"
"sync" "sync"
"github.com/Theodor-Springmann-Stiftung/kgpz_web/controllers" "github.com/Theodor-Springmann-Stiftung/kgpz_web/controllers"
@@ -17,6 +16,7 @@ import (
// INFO: this holds all the stuff specific to the KGPZ application // INFO: this holds all the stuff specific to the KGPZ application
// It implements Map(*fiber.App) error, so it can be used as a MuxProvider // It implements Map(*fiber.App) error, so it can be used as a MuxProvider
// It also implements Funcs() map[string]interface{} to map funcs to a template engine // It also implements Funcs() map[string]interface{} to map funcs to a template engine
// It is meant to be constructed once and then used as a singleton.
const ( const (
ASSETS_URL_PREFIX = "/assets" ASSETS_URL_PREFIX = "/assets"
@@ -38,53 +38,62 @@ const (
) )
type KGPZ struct { type KGPZ struct {
// GMU is only here to prevent concurrent pulls // INFO: We need to prevent concurrent reads and writes to the fs here since
// or file system operations while parsing // - Git is accessing the FS
gmu sync.Mutex // - The Library is accessing the FS
// So we need to prevent concurrent pulls and serializations
// This is what fsmu is for. IT IS NOT FOR SETTING Config, Repo. GND or Library.
// Those are only set once during initalization and construction.
fsmu sync.Mutex
Config *providers.ConfigProvider Config *providers.ConfigProvider
Repo *providers.GitProvider Repo *providers.GitProvider
GND *gnd.GNDProvider GND *gnd.GNDProvider
Library *xmlmodels.Library Library *xmlmodels.Library
} }
func (k *KGPZ) Init() { func NewKGPZ(config *providers.ConfigProvider) (*KGPZ, error) {
if k.Config.Debug {
// NOTE: validity checks done wrong, but speeding up dev mode:
// In dev mode we expect the folder to be a valid repository
if _, err := os.Stat(k.Config.FolderPath); err != nil {
k.initRepo()
} else {
go k.initRepo()
}
k.Serialize()
k.InitGND()
k.Enrich()
return
}
k.initRepo()
k.Serialize()
k.InitGND()
k.Enrich()
}
func NewKGPZ(config *providers.ConfigProvider) *KGPZ {
helpers.AssertNonNil(config, "Config is nil") helpers.AssertNonNil(config, "Config is nil")
if err := config.Validate(); err != nil { if err := config.Validate(); err != nil {
helpers.Assert(err, "Error validating config") helpers.Assert(err, "Error validating config")
} }
return &KGPZ{Config: config} kgpz := &KGPZ{Config: config}
err := kgpz.Init()
if err != nil {
return nil, err
}
return kgpz, nil
} }
func (k *KGPZ) InitGND() { func (k *KGPZ) Init() error {
if k.GND == nil { if gp, err := providers.NewGitProvider(
k.GND = gnd.NewGNDProvider() k.Config.Config.GitURL,
k.Config.Config.FolderPath,
k.Config.Config.GitBranch); err != nil {
logging.Error(err, "Error initializing GitProvider. Continuing without Git.")
} else {
k.Repo = gp
} }
if err := k.GND.ReadCache(k.Config.GNDPath); err != nil { if err := k.Serialize(); err != nil {
logging.Error(err, "Error reading GND cache") logging.Error(err, "Error parsing XML.")
return err
} }
if err := k.initGND(); err != nil {
logging.Error(err, "Error reading GND-Cache. Continuing.")
}
go k.Enrich()
go k.Pull()
return nil
}
func (k *KGPZ) initGND() error {
k.GND = gnd.NewGNDProvider()
return k.GND.ReadCache(k.Config.GNDPath)
} }
func (k *KGPZ) Routes(srv *fiber.App) error { func (k *KGPZ) Routes(srv *fiber.App) error {
@@ -134,15 +143,13 @@ func (k *KGPZ) Funcs() map[string]interface{} {
} }
func (k *KGPZ) Enrich() error { func (k *KGPZ) Enrich() error {
if k.GND == nil {
k.InitGND()
}
if k.Library == nil || k.Library.Agents == nil { if k.Library == nil || k.Library.Agents == nil {
return nil return nil
} }
go func() { go func() {
k.fsmu.Lock()
defer k.fsmu.Unlock()
data := xmlmodels.AgentsIntoDataset(k.Library.Agents) data := xmlmodels.AgentsIntoDataset(k.Library.Agents)
k.GND.FetchPersons(data) k.GND.FetchPersons(data)
k.GND.WriteCache(k.Config.GNDPath) k.GND.WriteCache(k.Config.GNDPath)
@@ -151,11 +158,11 @@ func (k *KGPZ) Enrich() error {
return nil return nil
} }
func (k *KGPZ) Serialize() { func (k *KGPZ) Serialize() error {
// TODO: this is error handling from hell // TODO: this is error handling from hell
// Preventing pulling and serializing at the same time // Preventing pulling and serializing at the same time
k.gmu.Lock() k.fsmu.Lock()
defer k.gmu.Unlock() defer k.fsmu.Unlock()
commit := "" commit := ""
source := xmlprovider.Path source := xmlprovider.Path
@@ -168,7 +175,8 @@ func (k *KGPZ) Serialize() {
k.Library = xmlmodels.NewLibrary() k.Library = xmlmodels.NewLibrary()
} }
k.Library.Parse(source, k.Config.FolderPath, commit) err := k.Library.Parse(source, k.Config.FolderPath, commit)
return err
} }
func (k *KGPZ) IsDebug() bool { func (k *KGPZ) IsDebug() bool {
@@ -176,44 +184,22 @@ func (k *KGPZ) IsDebug() bool {
} }
func (k *KGPZ) Pull() { func (k *KGPZ) Pull() {
go func() { if k.Repo == nil {
logging.Info("Pulling Repository...")
k.gmu.Lock()
if k.Repo == nil {
k.gmu.Unlock()
return
}
err, changed := k.Repo.Pull()
logging.Error(err, "Error pulling GitProvider")
// Need to unlock here to prevent deadlock, since Serialize locks the same mutex
k.gmu.Unlock()
if changed {
logging.ObjDebug(&k.Repo, "Remote changed. Reparsing")
k.Serialize()
}
}()
}
func (k *KGPZ) initRepo() {
gp, err := providers.NewGitProvider(k.Config.Config.GitURL, k.Config.Config.FolderPath, k.Config.Config.GitBranch)
// TODO: what to do if the repo can't be initialized?
// What to do if the data can't be read?
// Handle in Serialize --> it musttry to initialize the repository if files are missing.
if err != nil {
logging.Error(err, "Error initializing GitProvider")
return return
} }
k.gmu.Lock() logging.Info("Pulling Repository...")
k.Repo = gp
k.gmu.Unlock()
k.Pull()
logging.ObjDebug(&k.Repo, "GitProvider initialized") k.fsmu.Lock()
err, changed := k.Repo.Pull()
logging.Error(err, "Error pulling GitProvider")
k.fsmu.Unlock()
if changed {
logging.ObjDebug(&k.Repo, "Remote changed. Reparsing")
k.Serialize()
k.Enrich()
}
} }
func (k *KGPZ) Shutdown() { func (k *KGPZ) Shutdown() {

View File

@@ -47,9 +47,10 @@ func Init(cfg *providers.ConfigProvider) (*App, error) {
logging.SetInfo() logging.SetInfo()
} }
kgpz := app.NewKGPZ(cfg) kgpz, err := app.NewKGPZ(cfg)
// TODO: this must return an error on failure if err != nil {
kgpz.Init() panic(err)
}
engine := Engine(kgpz, cfg) engine := Engine(kgpz, cfg)
server := server.Create(cfg, engine) server := server.Create(cfg, engine)

View File

@@ -8,8 +8,9 @@ import (
) )
type Resolver[T XMLItem] struct { type Resolver[T XMLItem] struct {
index map[string]map[string][]Resolved[T] // Map[typeName][refID] -> []*T // INFO: map[type][ID]
mu sync.RWMutex // Synchronization for thread safety index map[string]map[string][]Resolved[T]
mu sync.RWMutex
} }
func NewResolver[T XMLItem]() *Resolver[T] { func NewResolver[T XMLItem]() *Resolver[T] {

View File

@@ -60,8 +60,8 @@ func NewXMLProvider[T XMLItem]() *XMLProvider[T] {
func (p *XMLProvider[T]) Prepare() { func (p *XMLProvider[T]) Prepare() {
p.mu.Lock() p.mu.Lock()
defer p.mu.Unlock() defer p.mu.Unlock()
// INFO: We take 1000 here as to not reallocate the memory as mutch.
p.Array = make([]T, 0, 1000) p.Array = make([]T, 0, len(p.Array))
p.Resolver.Clear() p.Resolver.Clear()
} }
@@ -72,8 +72,6 @@ func (p *XMLProvider[T]) Serialize(dataholder XMLRootElement[T], path string, la
return err return err
} }
p.mu.Lock()
defer p.mu.Unlock()
newItems := dataholder.Children() newItems := dataholder.Children()
for _, item := range newItems { for _, item := range newItems {
@@ -86,6 +84,8 @@ func (p *XMLProvider[T]) Serialize(dataholder XMLRootElement[T], path string, la
p.addResolvable(item) p.addResolvable(item)
} }
p.mu.Lock()
defer p.mu.Unlock()
p.Array = append(p.Array, newItems...) p.Array = append(p.Array, newItems...)
return nil return nil
} }
@@ -94,9 +94,6 @@ func (p *XMLProvider[T]) Serialize(dataholder XMLRootElement[T], path string, la
// It deletes all items that have not been parsed in the last commit, // It deletes all items that have not been parsed in the last commit,
// and whose filepath has not been marked as failed. // and whose filepath has not been marked as failed.
func (p *XMLProvider[T]) Cleanup(latest ParseMeta) { func (p *XMLProvider[T]) Cleanup(latest ParseMeta) {
p.mu.Lock()
defer p.mu.Unlock()
todelete := make([]string, 0) todelete := make([]string, 0)
toappend := make([]*T, 0) toappend := make([]*T, 0)
p.Infos.Range(func(key, value interface{}) bool { p.Infos.Range(func(key, value interface{}) bool {
@@ -122,6 +119,8 @@ func (p *XMLProvider[T]) Cleanup(latest ParseMeta) {
p.Items.Delete(key) p.Items.Delete(key)
} }
p.mu.Lock()
defer p.mu.Unlock()
for _, item := range toappend { for _, item := range toappend {
p.Array = append(p.Array, *item) p.Array = append(p.Array, *item)
p.addResolvable(*item) p.addResolvable(*item)