Speed up startup

This commit is contained in:
Simon Martens
2024-11-10 19:05:06 +01:00
parent a81e78c0fd
commit bdd4eeab26
14 changed files with 463 additions and 208 deletions

1
.gitignore vendored
View File

@@ -3,3 +3,4 @@ data_git/
cache_geo/
cache_gnd/
config.json
kgpz_web

View File

@@ -3,5 +3,5 @@
"git_branch": "main",
"webhook_endpoint": "/webhook",
"webhook_secret": "secret",
"debug": true
"debug": false
}

View File

@@ -5,6 +5,14 @@ import (
"os"
)
func Panic(err error, msg string) {
fmt.Println(msg)
if err != nil {
fmt.Println("Error: ", err)
}
os.Exit(1)
}
func MaybePanic(err error, msg string) {
if err == nil {
return

View File

@@ -2,19 +2,25 @@ package helpers
import "fmt"
func LogOnDebug[T fmt.Stringer](object T, msg string) {
func LogOnDebug[T fmt.Stringer](object *T, msg string) {
if msg != "" {
fmt.Println(msg)
}
fmt.Println(object)
if object != nil {
fmt.Println(*object)
}
}
func LogOnErr[T fmt.Stringer](object T, err error, msg string) {
func LogOnErr[T fmt.Stringer](object *T, err error, msg string) {
if err != nil {
if msg != "" {
fmt.Println(msg)
}
fmt.Println(object)
if object != nil {
fmt.Println(*object)
}
fmt.Println("Error: ", err)
}
}

View File

@@ -1,8 +1,10 @@
package main
import (
"fmt"
"os"
"path/filepath"
"sync"
"githib.com/Theodor-Springmann-Stiftung/kgpz_web/helpers"
"githib.com/Theodor-Springmann-Stiftung/kgpz_web/providers"
@@ -10,24 +12,39 @@ import (
// 1. Check if folder exists
// - If not, clone the repo, if possible or throw if error
// 2. If folder exists, try to pull the repo, and if successful:
// 2. If the folder exists, we try to serialize -- and spawn a goroutine to pull.
// Upon pulling, we read in the current state of the repository, even if it's up to date.
// -> If the repo was changed we execute a callback and parse again.
// -> If pulling fails, we retry after a certain amount of time.
// Still we can continue if serialization proceeds.
// -> If serialization fails, we throw an error, log it. We try to pull in the background.
// - setup commit date & hash
// - Setup GitHub webhook if set
// 3. Serialize XML DATA
const (
AGENTS_PATH = "XML/akteure.xml"
PLACES_PATH = "XML/orte.xml"
WORKS_PATH = "XML/werke.xml"
CATEGORIES_PATH = "XML/kategorien.xml"
ISSUES_DIR = "XML/stuecke/"
PIECES_DIR = "XML/beitraege/"
)
type Library struct {
smu sync.Mutex
Agents *providers.AgentProvider
Places *providers.PlaceProvider
Works *providers.WorkProvider
Categories *providers.CategoryProvider
Issues *providers.IssueProvider
Pieces *providers.PieceProvider
}
type KGPZ struct {
Config *providers.ConfigProvider
Repo *providers.GitProvider
Agents *providers.AgentProvider
Places *providers.PlaceProvider
Works *providers.WorkProvider
Library
}
func NewKGPZ(config *providers.ConfigProvider) *KGPZ {
@@ -46,86 +63,208 @@ func (k *KGPZ) IsDebug() bool {
return k.Config.Debug
}
func (k *KGPZ) Pull() {
go func(k *KGPZ) {
if k.Repo == nil {
return
}
err, changed := k.Repo.Pull()
if err != nil {
helpers.LogOnErr(&k.Repo, err, "Error pulling repo")
}
if changed {
if k.IsDebug() {
helpers.LogOnDebug(&k.Repo, "GitProvider changed")
}
// Locking is handled in Serialize()
k.Serialize()
}
}(k)
}
func (k *KGPZ) InitRepo() {
gp := providers.NewGitProvider(k.Config.Config.GitURL, k.Config.Config.FolderPath, k.Config.Config.GitBranch)
// If folder exists try to pull, otherwise clone:
// TODO: there is no need to panic if clone can't be done, jus log the errors
// The code will panic if the XML data can't be parsed.
if gp != nil {
if _, err := os.Stat(k.Config.FolderPath); os.IsNotExist(err) {
err := gp.Clone()
gp, err := providers.NewGitProvider(k.Config.Config.GitURL, k.Config.Config.FolderPath, k.Config.Config.GitBranch)
if err != nil {
helpers.LogOnErr(gp, err, "Error cloning repo")
helpers.LogOnErr(&gp, err, "Error creating GitProvider")
return
}
fmt.Println("InitRepo")
k.Repo = gp
k.Pull()
if k.IsDebug() {
helpers.LogOnDebug(&gp, "GitProvider")
}
}
// This panics if the data cant be read, and there is no data read
func (k *KGPZ) Serialize() {
k.smu.Lock()
defer k.smu.Unlock()
// TODO: maybe dont panic if a webhook can be setup, we need to check the requirements only when starting the server
// TODO: do this in parallel goroutines using a waitgroup
agents := k.InitAgents()
if agents == nil && k.Agents != nil {
helpers.LogOnErr(&k.Agents, nil, "Error initializing agents, keeping old state")
} else if agents == nil {
helpers.Panic(nil, "Error initializing agents")
} else {
err := gp.Pull()
if err != nil {
helpers.LogOnErr(gp, err, "Error pulling repo")
}
k.Agents = agents
}
if err := gp.Validate(); err != nil {
helpers.LogOnErr(gp, err, "Error validating repo")
gp = nil
places := k.InitPlaces()
if places == nil && k.Places != nil {
helpers.LogOnErr(&k.Places, nil, "Error initializing places, keeping old state")
} else if places == nil {
helpers.Panic(nil, "Error initializing places")
} else {
k.Places = places
}
if k.IsDebug() && gp != nil {
helpers.LogOnDebug(gp, "GitProvider")
works := k.InitWorks()
if works == nil && k.Works != nil {
helpers.LogOnErr(&k.Works, nil, "Error initializing works, keeping old state")
} else if works == nil {
helpers.Panic(nil, "Error initializing works")
} else {
k.Works = works
}
categories := k.InitCategories()
if categories == nil && k.Categories != nil {
helpers.LogOnErr(&k.Categories, nil, "Error initializing categories, keeping old state")
} else if categories == nil {
helpers.Panic(nil, "Error initializing categories")
} else {
k.Categories = categories
}
issues := k.InitIssues()
if issues == nil && k.Issues != nil {
helpers.LogOnErr(&k.Issues, nil, "Error initializing issues, keeping old state")
} else if issues == nil {
helpers.Panic(nil, "Error initializing issues")
} else {
k.Issues = issues
}
pieces := k.InitPieces()
if pieces == nil && k.Pieces != nil {
helpers.LogOnErr(&k.Pieces, nil, "Error initializing pieces, keeping old state")
} else if pieces == nil {
helpers.Panic(nil, "Error initializing pieces")
} else {
k.Pieces = pieces
}
}
func (k *KGPZ) InitAgents() {
func (k *KGPZ) InitAgents() *providers.AgentProvider {
ap := providers.NewAgentProvider([]string{filepath.Join(k.Config.FolderPath, AGENTS_PATH)})
if ap != nil {
if err := ap.Load(); err != nil {
helpers.LogOnErr(ap, err, "Error loading agents")
helpers.LogOnErr(&ap, err, "Error loading agents")
return nil
}
if k.IsDebug() {
helpers.LogOnDebug(ap, "AgentProvider")
}
helpers.LogOnDebug(&ap, "AgentProvider")
}
return ap
}
func (k *KGPZ) InitPlaces() {
func (k *KGPZ) InitPlaces() *providers.PlaceProvider {
pp := providers.NewPlaceProvider([]string{filepath.Join(k.Config.FolderPath, PLACES_PATH)})
if pp != nil {
if err := pp.Load(); err != nil {
helpers.LogOnErr(pp, err, "Error loading places")
helpers.LogOnErr(&pp, err, "Error loading places")
return nil
}
if k.IsDebug() {
helpers.LogOnDebug(pp, "PlaceProvider")
}
helpers.LogOnDebug(&pp, "PlaceProvider")
}
return pp
}
func (k *KGPZ) InitWorks() {
func (k *KGPZ) InitWorks() *providers.WorkProvider {
wp := providers.NewWorkProvider([]string{filepath.Join(k.Config.FolderPath, WORKS_PATH)})
if wp != nil {
if err := wp.Load(); err != nil {
helpers.LogOnErr(wp, err, "Error loading works")
helpers.LogOnErr(&wp, err, "Error loading works")
return nil
}
if k.IsDebug() {
helpers.LogOnDebug(wp, "WorkProvider")
}
helpers.LogOnDebug(&wp, "WorkProvider")
}
return wp
}
func (k *KGPZ) InitCategories() {
func (k *KGPZ) InitCategories() *providers.CategoryProvider {
cp := providers.NewCategoryProvider([]string{filepath.Join(k.Config.FolderPath, CATEGORIES_PATH)})
if cp != nil {
if err := cp.Load(); err != nil {
helpers.LogOnErr(cp, err, "Error loading categories")
helpers.LogOnErr(&cp, err, "Error loading categories")
return nil
}
if k.IsDebug() {
helpers.LogOnDebug(cp, "CategoryProvider")
helpers.LogOnDebug(&cp, "CategoryProvider")
}
return cp
}
func (k *KGPZ) InitIssues() *providers.IssueProvider {
files, err := getXMLFiles(filepath.Join(k.Config.FolderPath, ISSUES_DIR))
if err != nil {
helpers.MaybePanic(err, "Error getting issues files")
}
cp := providers.NewIssueProvider(*files)
if err := cp.Load(); err != nil {
helpers.LogOnErr(&cp, err, "Error loading issues")
return nil
}
if k.IsDebug() {
helpers.LogOnDebug(&cp, "IssueProvider")
}
return cp
}
func (k *KGPZ) InitPieces() *providers.PieceProvider {
files, err := getXMLFiles(filepath.Join(k.Config.FolderPath, PIECES_DIR))
if err != nil {
helpers.MaybePanic(err, "Error getting pieces files")
return nil
}
cp := providers.NewPieceProvider(*files)
if err := cp.Load(); err != nil {
helpers.LogOnErr(&cp, err, "Error loading pieces")
}
if k.IsDebug() {
helpers.LogOnDebug(&cp, "PieceProvider")
}
return cp
}
func getXMLFiles(path string) (*[]string, error) {
if _, err := os.Stat(path); os.IsNotExist(err) {
return nil, err
}
matches, err := filepath.Glob(filepath.Join(path, "*.xml"))
return &matches, err
}
func main() {
@@ -136,8 +275,5 @@ func main() {
kgpz := NewKGPZ(cfg)
kgpz.InitRepo()
kgpz.InitAgents()
kgpz.InitPlaces()
kgpz.InitWorks()
kgpz.InitCategories()
kgpz.Serialize()
}

View File

@@ -11,11 +11,11 @@ type AgentProvider struct {
type Agent struct {
XMLName xml.Name `xml:"akteur"`
ID string `xml:"id,attr"`
Names []string `xml:"name"`
SortName string `xml:"sortiername"`
Life string `xml:"lebensdaten"`
GND string `xml:"gnd"`
Identifier
AnnotationNote
}

View File

@@ -15,9 +15,10 @@ type Categories struct {
}
type Category struct {
ID string `xml:"id,attr"`
XMLName xml.Name `xml:"kategorie"`
Names []string `xml:"name"`
SortName string `xml:"sortiername"`
Identifier
AnnotationNote
}

View File

@@ -13,9 +13,12 @@ import (
var InvalidBranchError = errors.New("The currently checked out branch does not match the requested branch. Please checkout the correct branch first.")
var InvalidStateError = errors.New("The GitProvider is not in a valid state. Fix the issues or continue without Git data.")
var NoURLProvidedError = errors.New("No URL provided for GitProvider.")
var NoPathProvidedError = errors.New("No path or branch provided for GitProvider.")
// NOTE: GitProvider does not open the files, it can only
// - clone the repo, given an URL
// NOTE: GitProvider does not open XML files, it can only
// - read in information from the repo, given a path
// - clone the repo, given an URL & a path
// - pull the repo, given a path
// In case of success in either case it updates the commit hash and date and closes the repo again.
// The Files are opened and serialized by the FSProvider, which operates on the same file path.
@@ -29,33 +32,67 @@ type GitProvider struct {
Date time.Time
}
func NewGitProvider(url string, path string, branch string) *GitProvider {
if branch == "" || url == "" || path == "" {
return nil
func NewGitProvider(url string, path string, branch string) (*GitProvider, error) {
// TODO: check if directory is empty
// TODO: force clone
if _, err := os.Stat(path); err == nil {
return GitProviderFromPath(path, branch)
}
return &GitProvider{URL: url, Path: path, Branch: branch}
return GitProviderFromURL(url, path, branch)
}
func (g *GitProvider) Pull() error {
func GitProviderFromPath(path string, branch string) (*GitProvider, error) {
if branch == "" || path == "" {
return nil, NoPathProvidedError
}
gp := GitProvider{Path: path, Branch: branch}
if err := gp.Read(); err != nil {
return nil, err
}
return &gp, nil
}
func GitProviderFromURL(url string, path string, branch string) (*GitProvider, error) {
if url == "" {
return nil, NoURLProvidedError
}
if branch == "" || path == "" {
return nil, NoPathProvidedError
}
gp := GitProvider{URL: url, Path: path, Branch: branch}
if err := gp.Clone(); err != nil {
return nil, err
}
return &gp, nil
}
// Returs true if the repo was updated remotely, false otherwise
func (g *GitProvider) Pull() (error, bool) {
g.mu.Lock()
defer g.mu.Unlock()
branch := plumbing.NewBranchReferenceName(g.Branch)
repo, err := git.PlainOpen(g.Path)
if err != nil {
return err
return err, false
}
wt, err := repo.Worktree()
if err != nil {
return err
return err, false
}
if err := wt.Checkout(&git.CheckoutOptions{
Branch: branch,
Force: true,
}); err != nil {
return err
return err, false
}
if err := wt.Pull(&git.PullOptions{
@@ -63,14 +100,20 @@ func (g *GitProvider) Pull() error {
ReferenceName: branch,
Progress: os.Stdout,
}); err != nil && err != git.NoErrAlreadyUpToDate {
return err
return err, false
} else if err == git.NoErrAlreadyUpToDate {
return nil, false
}
defer wt.Clean(&git.CleanOptions{Dir: true})
return g.setValues(repo)
return g.setValues(repo), true
}
func (g *GitProvider) Clone() error {
if g.URL == "" {
return NoURLProvidedError
}
g.mu.Lock()
defer g.mu.Unlock()
@@ -106,7 +149,7 @@ func (g *GitProvider) Clone() error {
// Implement String Interface
func (g *GitProvider) String() string {
return fmt.Sprintf("GitProvider{URL: %s, Path: %s, Branch: %s, Commit: %s, Date: %s}", g.URL, g.Path, g.Branch, g.Commit, g.Date)
return fmt.Sprintf("GitProvider\nURL: %s\nPath: %s\nBranch: %s\nCommit: %s\nDate: %s\n", g.URL, g.Path, g.Branch, g.Commit, g.Date)
}
func (g *GitProvider) setValues(repo *git.Repository) error {
@@ -124,10 +167,9 @@ func (g *GitProvider) setValues(repo *git.Repository) error {
g.Commit = commit.Hash.String()
g.Date = commit.Author.When
return err
return nil
}
// WARNING: this expects the repo to be in a certain state and is intended to be used in tests.
func (g *GitProvider) Read() error {
g.mu.Lock()
defer g.mu.Unlock()
@@ -137,6 +179,47 @@ func (g *GitProvider) Read() error {
return err
}
if err := g.ValidateBranch(repo); err != nil {
branch := plumbing.NewBranchReferenceName(g.Branch)
wt, err := repo.Worktree()
if err != nil {
return err
}
defer wt.Clean(&git.CleanOptions{Dir: true})
if err := wt.Checkout(&git.CheckoutOptions{
Branch: branch,
Force: true,
}); err != nil {
return err
}
if err := g.ValidateBranch(repo); err != nil {
return err
}
}
return g.setValues(repo)
}
func (g *GitProvider) Validate() error {
repo, err := git.PlainOpen(g.Path)
if err != nil {
return err
}
if err := g.ValidateBranch(repo); err != nil {
return err
}
if err := g.ValidateCommit(); err != nil {
return err
}
return nil
}
func (g *GitProvider) ValidateBranch(repo *git.Repository) error {
head, err := repo.Head()
if err != nil {
return err
@@ -147,13 +230,10 @@ func (g *GitProvider) Read() error {
return InvalidBranchError
}
return g.setValues(repo)
return nil
}
func (g *GitProvider) Validate() error {
g.mu.Lock()
defer g.mu.Unlock()
func (g *GitProvider) ValidateCommit() error {
if g.Commit == "" || g.Date.IsZero() {
return InvalidStateError
}

View File

@@ -15,26 +15,30 @@ type Issues struct {
}
type Issue struct {
ID string `xml:"id,attr"`
XMLName xml.Name `xml:"stueck"`
Number IssueNumber `xml:"nummer"`
Datum KGPZDate `xml:"datum"`
Von string `xml:"von"`
Bis string `xml:"bis"`
Additionals []Additional `xml:"beilage"`
Identifier
AnnotationNote
}
type IssueNumber struct {
XMLName xml.Name `xml:"nummer"`
Value string `xml:",chardata"`
Value
Corrected string `xml:"korrigiert,attr"`
}
type KGPZDate struct {
XMLName xml.Name `xml:"datum"`
When string `xml:"when,attr"`
NotBefore string `xml:"notBefore,attr"`
NotAfter string `xml:"notAfter,attr"`
From string `xml:"from,attr"`
To string `xml:"to,attr"`
Value
}
type Additional struct {

View File

@@ -15,7 +15,7 @@ type Pieces struct {
}
type Piece struct {
ID string `xml:"id,attr"`
XMLName xml.Name `xml:"beitrag"`
IssueRefs []IssueRef `xml:"stueck"`
PlaceRefs []PlaceRef `xml:"ort"`
CategoryRefs []CategoryRef `xml:"kategorie"`
@@ -25,50 +25,10 @@ type Piece struct {
AdditionalRef []AdditionalRef `xml:"beilage"`
Incipit []string `xml:"incipit"`
Title []string `xml:"titel"`
Identifier
AnnotationNote
}
type AdditionalRef struct {
Datum string `xml:"datum,attr"`
Nr string `xml:"nr,attr"`
Von string `xml:"von,attr"`
Bis string `xml:"bis,attr"`
Value string `xml:",chardata"`
}
type IssueRef struct {
Datum string `xml:"datum,attr"`
Nr string `xml:"nr,attr"`
Von string `xml:"von,attr"`
Bis string `xml:"bis,attr"`
Value string `xml:",chardata"`
Category string `xml:"kat,attr"`
}
type PlaceRef struct {
Ref string `xml:"ref,attr"`
Value string `xml:",chardata"`
Category string `xml:"kat,attr"`
}
type CategoryRef struct {
Ref string `xml:"ref,attr"`
Value string `xml:",chardata"`
}
type WorkRef struct {
Ref string `xml:"ref,attr"`
Value string `xml:",chardata"`
Category string `xml:"kat,attr"`
Page string `xml:"s,attr"`
}
type PieceRef struct {
Ref string `xml:"ref,attr"`
Category string `xml:"kat,attr"`
Value string `xml:",chardata"`
}
func (p Pieces) Append(data Pieces) Pieces {
p.Piece = append(p.Piece, data.Piece...)
return p

View File

@@ -15,10 +15,11 @@ type Places struct {
}
type Place struct {
ID string `xml:"id,attr"`
XMLName xml.Name `xml:"ort"`
Names []string `xml:"name"`
SortName string `xml:"sortiername"`
Geo string `xml:"geonames"`
Identifier
AnnotationNote
}

View File

@@ -15,24 +15,14 @@ type Works struct {
}
type Work struct {
ID string `xml:"id,attr"`
XMLName xml.Name `xml:"werk"`
URLs []URL `xml:"url"`
Citation []string `xml:"zitation"`
Akteur []AgentRef `xml:"akteur"`
Identifier
AnnotationNote
}
type AgentRef struct {
Ref string `xml:"ref,attr"`
Category string `xml:"Kat,attr"`
Value string `xml:",chardata"`
}
type URL struct {
Address string `xml:"address,attr"`
Value string `xml:",chardata"`
}
func (w Works) Append(data Works) Works {
w.Work = append(w.Work, data.Work...)
return w

View File

@@ -1,48 +1,55 @@
package providers
import (
"encoding/xml"
"fmt"
"io"
"os"
"sync"
)
import "encoding/xml"
type KGPZXML[T any] interface {
Append(data T) T
fmt.Stringer
type AgentRef struct {
XMLName xml.Name `xml:"akteur"`
Reference
}
type XMLProvider[T KGPZXML[T]] struct {
mu sync.Mutex
paths []string
Items T
type URL struct {
XMLName xml.Name `xml:"url"`
Address string `xml:"address,attr"`
Value
}
func (p *XMLProvider[T]) Load() error {
var wg sync.WaitGroup
for _, path := range p.paths {
wg.Add(1)
go func(path string) {
defer wg.Done()
var data T
if err := UnmarshalFile(path, &data); err != nil {
fmt.Println(err)
return
}
p.mu.Lock()
p.Items = p.Items.Append(data)
p.mu.Unlock()
}(path)
}
wg.Wait()
return nil
type AdditionalRef struct {
XMLName xml.Name `xml:"beilage"`
Reference
Datum string `xml:"datum,attr"`
Nr string `xml:"nr,attr"`
Von string `xml:"von,attr"`
Bis string `xml:"bis,attr"`
}
func (a *XMLProvider[T]) String() string {
a.mu.Lock()
defer a.mu.Unlock()
return fmt.Sprintf("Items: %s", a.Items)
type IssueRef struct {
XMLName xml.Name `xml:"stueck"`
Reference
Datum string `xml:"datum,attr"`
Nr string `xml:"nr,attr"`
Von string `xml:"von,attr"`
Bis string `xml:"bis,attr"`
}
type PlaceRef struct {
XMLName xml.Name `xml:"ort"`
Reference
}
type CategoryRef struct {
XMLName xml.Name `xml:"kategorie"`
Reference
}
type WorkRef struct {
XMLName xml.Name `xml:"werk"`
Reference
Page string `xml:"s,attr"`
}
type PieceRef struct {
XMLName xml.Name `xml:"beitrag"`
Reference
}
type AnnotationNote struct {
@@ -50,16 +57,17 @@ type AnnotationNote struct {
Notes []string `xml:"vermerk"`
}
func UnmarshalFile[T any](filename string, data *T) error {
xmlFile, err := os.Open(filename)
if err != nil {
fmt.Println(err)
return err
}
fmt.Println("Successfully opened " + filename)
defer xmlFile.Close()
byteValue, _ := io.ReadAll(xmlFile)
xml.Unmarshal(byteValue, data)
return nil
type Identifier struct {
ID string `xml:"id,attr"`
}
type Reference struct {
Ref string `xml:"ref,attr"`
Category string `xml:"kat,attr"`
Unsicher bool `xml:"unsicher,attr"`
Value
}
type Value struct {
Value string `xml:",chardata"`
}

60
providers/xmlprovider.go Normal file
View File

@@ -0,0 +1,60 @@
package providers
import (
"encoding/xml"
"fmt"
"io"
"os"
"sync"
)
type KGPZXML[T any] interface {
Append(data T) T
fmt.Stringer
}
type XMLProvider[T KGPZXML[T]] struct {
mu sync.Mutex
paths []string
Items T
}
func (p *XMLProvider[T]) Load() error {
var wg sync.WaitGroup
for _, path := range p.paths {
wg.Add(1)
go func(path string) {
defer wg.Done()
var data T
if err := UnmarshalFile(path, &data); err != nil {
fmt.Println(err)
return
}
p.mu.Lock()
p.Items = p.Items.Append(data)
p.mu.Unlock()
}(path)
}
wg.Wait()
return nil
}
func (a *XMLProvider[T]) String() string {
a.mu.Lock()
defer a.mu.Unlock()
return fmt.Sprintf("Items: %s", a.Items)
}
func UnmarshalFile[T any](filename string, data *T) error {
xmlFile, err := os.Open(filename)
if err != nil {
fmt.Println(err)
return err
}
fmt.Println("Successfully opened " + filename)
defer xmlFile.Close()
byteValue, _ := io.ReadAll(xmlFile)
xml.Unmarshal(byteValue, data)
return nil
}