mirror of
https://github.com/Theodor-Springmann-Stiftung/kgpz_web.git
synced 2025-10-29 17:15:31 +00:00
Added functiontionality to cleanup XML cached collections
This commit is contained in:
@@ -1,5 +1,7 @@
|
|||||||
package logging
|
package logging
|
||||||
|
|
||||||
|
// BUG: loggings happens without manual flush, so the messagees come from all threads at the same time.
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"log/slog"
|
"log/slog"
|
||||||
|
|||||||
@@ -14,7 +14,6 @@ type Agent struct {
|
|||||||
Org bool `xml:"org,attr"`
|
Org bool `xml:"org,attr"`
|
||||||
Identifier
|
Identifier
|
||||||
AnnotationNote
|
AnnotationNote
|
||||||
SerializedItem
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (a Agent) String() string {
|
func (a Agent) String() string {
|
||||||
|
|||||||
@@ -11,7 +11,6 @@ type Category struct {
|
|||||||
SortName string `xml:"sortiername"`
|
SortName string `xml:"sortiername"`
|
||||||
Identifier
|
Identifier
|
||||||
AnnotationNote
|
AnnotationNote
|
||||||
SerializedItem
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c Category) String() string {
|
func (c Category) String() string {
|
||||||
|
|||||||
@@ -15,7 +15,6 @@ type Issue struct {
|
|||||||
Additionals []Additional `xml:"beilage"`
|
Additionals []Additional `xml:"beilage"`
|
||||||
Identifier
|
Identifier
|
||||||
AnnotationNote
|
AnnotationNote
|
||||||
SerializedItem
|
|
||||||
}
|
}
|
||||||
|
|
||||||
type Nummer struct {
|
type Nummer struct {
|
||||||
|
|||||||
@@ -1,19 +1,7 @@
|
|||||||
package xmlprovider
|
package xmlprovider
|
||||||
|
|
||||||
type SerializedItem struct {
|
type ItemInfo struct {
|
||||||
Source string
|
Source string
|
||||||
Date string
|
Date string
|
||||||
Commit string
|
Commit string
|
||||||
}
|
}
|
||||||
|
|
||||||
func (si SerializedItem) SetSource(s string) {
|
|
||||||
si.Source = s
|
|
||||||
}
|
|
||||||
|
|
||||||
func (si SerializedItem) SetDate(d string) {
|
|
||||||
si.Date = d
|
|
||||||
}
|
|
||||||
|
|
||||||
func (si SerializedItem) SetCommit(c string) {
|
|
||||||
si.Commit = c
|
|
||||||
}
|
|
||||||
|
|||||||
120
providers/xmlprovider/library.go
Normal file
120
providers/xmlprovider/library.go
Normal file
@@ -0,0 +1,120 @@
|
|||||||
|
package xmlprovider
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"sync"
|
||||||
|
)
|
||||||
|
|
||||||
|
type Library struct {
|
||||||
|
Agents *XMLProvider[Agent]
|
||||||
|
Places *XMLProvider[Place]
|
||||||
|
Works *XMLProvider[Work]
|
||||||
|
Categories *XMLProvider[Category]
|
||||||
|
Issues *XMLProvider[Issue]
|
||||||
|
Pieces *XMLProvider[Piece]
|
||||||
|
}
|
||||||
|
|
||||||
|
func (l *Library) String() string {
|
||||||
|
return fmt.Sprintf("Agents: %s\nPlaces: %s\nWorks: %s\nCategories: %s\nIssues: %s\nPieces: %s\n",
|
||||||
|
l.Agents.String(), l.Places.String(), l.Works.String(), l.Categories.String(), l.Issues.String(), l.Pieces.String())
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewLibrary(agentpaths, placepaths, workpaths, categorypaths, issuepaths, piecepaths []string) *Library {
|
||||||
|
return &Library{
|
||||||
|
Agents: &XMLProvider[Agent]{Paths: agentpaths},
|
||||||
|
Places: &XMLProvider[Place]{Paths: placepaths},
|
||||||
|
Works: &XMLProvider[Work]{Paths: workpaths},
|
||||||
|
Categories: &XMLProvider[Category]{Paths: categorypaths},
|
||||||
|
Issues: &XMLProvider[Issue]{Paths: issuepaths},
|
||||||
|
Pieces: &XMLProvider[Piece]{Paths: piecepaths},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (l *Library) SetPaths(agentpaths, placepaths, workpaths, categorypaths, issuepaths, piecepaths []string) {
|
||||||
|
l.Agents.Paths = agentpaths
|
||||||
|
l.Places.Paths = placepaths
|
||||||
|
l.Works.Paths = workpaths
|
||||||
|
l.Categories.Paths = categorypaths
|
||||||
|
l.Issues.Paths = issuepaths
|
||||||
|
l.Pieces.Paths = piecepaths
|
||||||
|
}
|
||||||
|
|
||||||
|
func (l *Library) Serialize(commit string) {
|
||||||
|
wg := sync.WaitGroup{}
|
||||||
|
|
||||||
|
l.Prepare()
|
||||||
|
|
||||||
|
for _, path := range l.Places.Paths {
|
||||||
|
wg.Add(1)
|
||||||
|
go func() {
|
||||||
|
l.Places.Serialize(NewPlaceRoot(), path, commit)
|
||||||
|
wg.Done()
|
||||||
|
}()
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, path := range l.Agents.Paths {
|
||||||
|
wg.Add(1)
|
||||||
|
go func() {
|
||||||
|
l.Agents.Serialize(NewAgentRoot(), path, commit)
|
||||||
|
wg.Done()
|
||||||
|
}()
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, path := range l.Categories.Paths {
|
||||||
|
wg.Add(1)
|
||||||
|
go func() {
|
||||||
|
l.Categories.Serialize(NewCategoryRoot(), path, commit)
|
||||||
|
wg.Done()
|
||||||
|
}()
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, path := range l.Works.Paths {
|
||||||
|
wg.Add(1)
|
||||||
|
go func() {
|
||||||
|
l.Works.Serialize(NewWorkRoot(), path, commit)
|
||||||
|
wg.Done()
|
||||||
|
}()
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, path := range l.Issues.Paths {
|
||||||
|
wg.Add(1)
|
||||||
|
go func() {
|
||||||
|
l.Issues.Serialize(NewIssueRoot(), path, commit)
|
||||||
|
wg.Done()
|
||||||
|
}()
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, path := range l.Pieces.Paths {
|
||||||
|
wg.Add(1)
|
||||||
|
go func() {
|
||||||
|
l.Pieces.Serialize(NewPieceRoot(), path, commit)
|
||||||
|
wg.Done()
|
||||||
|
}()
|
||||||
|
}
|
||||||
|
|
||||||
|
wg.Wait()
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
l.Cleanup(commit)
|
||||||
|
}()
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: Prepare resets the list of failed parses for a new parse.
|
||||||
|
// We need to set the logs accordingly.
|
||||||
|
func (l *Library) Prepare() {
|
||||||
|
l.Agents.Prepare()
|
||||||
|
l.Places.Prepare()
|
||||||
|
l.Works.Prepare()
|
||||||
|
l.Categories.Prepare()
|
||||||
|
l.Issues.Prepare()
|
||||||
|
l.Pieces.Prepare()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (l *Library) Cleanup(commit string) {
|
||||||
|
l.Agents.Cleanup(commit)
|
||||||
|
l.Places.Cleanup(commit)
|
||||||
|
l.Works.Cleanup(commit)
|
||||||
|
l.Categories.Cleanup(commit)
|
||||||
|
l.Issues.Cleanup(commit)
|
||||||
|
l.Pieces.Cleanup(commit)
|
||||||
|
}
|
||||||
@@ -22,7 +22,6 @@ type Piece struct {
|
|||||||
Title []string `xml:"titel"`
|
Title []string `xml:"titel"`
|
||||||
Identifier
|
Identifier
|
||||||
AnnotationNote
|
AnnotationNote
|
||||||
SerializedItem
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p Piece) String() string {
|
func (p Piece) String() string {
|
||||||
|
|||||||
@@ -12,7 +12,6 @@ type Place struct {
|
|||||||
Geo string `xml:"geonames"`
|
Geo string `xml:"geonames"`
|
||||||
Identifier
|
Identifier
|
||||||
AnnotationNote
|
AnnotationNote
|
||||||
SerializedItem
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p Place) String() string {
|
func (p Place) String() string {
|
||||||
|
|||||||
@@ -13,7 +13,6 @@ type Work struct {
|
|||||||
Akteur []AgentRef `xml:"akteur"`
|
Akteur []AgentRef `xml:"akteur"`
|
||||||
Identifier
|
Identifier
|
||||||
AnnotationNote
|
AnnotationNote
|
||||||
SerializedItem
|
|
||||||
}
|
}
|
||||||
|
|
||||||
type Citation struct {
|
type Citation struct {
|
||||||
|
|||||||
@@ -5,6 +5,7 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"os"
|
"os"
|
||||||
|
"slices"
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
@@ -14,9 +15,6 @@ import (
|
|||||||
type XMLItem interface {
|
type XMLItem interface {
|
||||||
fmt.Stringer
|
fmt.Stringer
|
||||||
GetIDs() []string
|
GetIDs() []string
|
||||||
SetSource(string)
|
|
||||||
SetDate(string)
|
|
||||||
SetCommit(string)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
type Collection[T XMLItem] struct {
|
type Collection[T XMLItem] struct {
|
||||||
@@ -28,94 +26,18 @@ type XMLProvider[T XMLItem] struct {
|
|||||||
Paths []string
|
Paths []string
|
||||||
// INFO: map is type [string]T
|
// INFO: map is type [string]T
|
||||||
Items sync.Map
|
Items sync.Map
|
||||||
|
// INFO: map is type [string]ItemInfo
|
||||||
|
// It keeps information about parsing status of the items.
|
||||||
|
Infos sync.Map
|
||||||
|
|
||||||
|
mu sync.Mutex
|
||||||
|
failed []string
|
||||||
}
|
}
|
||||||
|
|
||||||
type Library struct {
|
func (p *XMLProvider[T]) Prepare() {
|
||||||
Agents *XMLProvider[Agent]
|
p.mu.Lock()
|
||||||
Places *XMLProvider[Place]
|
defer p.mu.Unlock()
|
||||||
Works *XMLProvider[Work]
|
p.failed = make([]string, 0)
|
||||||
Categories *XMLProvider[Category]
|
|
||||||
Issues *XMLProvider[Issue]
|
|
||||||
Pieces *XMLProvider[Piece]
|
|
||||||
}
|
|
||||||
|
|
||||||
func (l *Library) String() string {
|
|
||||||
return fmt.Sprintf("Agents: %s\nPlaces: %s\nWorks: %s\nCategories: %s\nIssues: %s\nPieces: %s\n",
|
|
||||||
l.Agents.String(), l.Places.String(), l.Works.String(), l.Categories.String(), l.Issues.String(), l.Pieces.String())
|
|
||||||
}
|
|
||||||
|
|
||||||
func NewLibrary(agentpaths, placepaths, workpaths, categorypaths, issuepaths, piecepaths []string) *Library {
|
|
||||||
return &Library{
|
|
||||||
Agents: &XMLProvider[Agent]{Paths: agentpaths},
|
|
||||||
Places: &XMLProvider[Place]{Paths: placepaths},
|
|
||||||
Works: &XMLProvider[Work]{Paths: workpaths},
|
|
||||||
Categories: &XMLProvider[Category]{Paths: categorypaths},
|
|
||||||
Issues: &XMLProvider[Issue]{Paths: issuepaths},
|
|
||||||
Pieces: &XMLProvider[Piece]{Paths: piecepaths},
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (l *Library) SetPaths(agentpaths, placepaths, workpaths, categorypaths, issuepaths, piecepaths []string) {
|
|
||||||
l.Agents.Paths = agentpaths
|
|
||||||
l.Places.Paths = placepaths
|
|
||||||
l.Works.Paths = workpaths
|
|
||||||
l.Categories.Paths = categorypaths
|
|
||||||
l.Issues.Paths = issuepaths
|
|
||||||
l.Pieces.Paths = piecepaths
|
|
||||||
}
|
|
||||||
|
|
||||||
func (l *Library) Serialize(commit string) {
|
|
||||||
wg := sync.WaitGroup{}
|
|
||||||
|
|
||||||
for _, path := range l.Places.Paths {
|
|
||||||
wg.Add(1)
|
|
||||||
go func() {
|
|
||||||
l.Places.Serialize(NewPlaceRoot(), path, commit)
|
|
||||||
wg.Done()
|
|
||||||
}()
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, path := range l.Agents.Paths {
|
|
||||||
wg.Add(1)
|
|
||||||
go func() {
|
|
||||||
l.Agents.Serialize(NewAgentRoot(), path, commit)
|
|
||||||
wg.Done()
|
|
||||||
}()
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, path := range l.Categories.Paths {
|
|
||||||
wg.Add(1)
|
|
||||||
go func() {
|
|
||||||
l.Categories.Serialize(NewCategoryRoot(), path, commit)
|
|
||||||
wg.Done()
|
|
||||||
}()
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, path := range l.Works.Paths {
|
|
||||||
wg.Add(1)
|
|
||||||
go func() {
|
|
||||||
l.Works.Serialize(NewWorkRoot(), path, commit)
|
|
||||||
wg.Done()
|
|
||||||
}()
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, path := range l.Issues.Paths {
|
|
||||||
wg.Add(1)
|
|
||||||
go func() {
|
|
||||||
l.Issues.Serialize(NewIssueRoot(), path, commit)
|
|
||||||
wg.Done()
|
|
||||||
}()
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, path := range l.Pieces.Paths {
|
|
||||||
wg.Add(1)
|
|
||||||
go func() {
|
|
||||||
l.Pieces.Serialize(NewPieceRoot(), path, commit)
|
|
||||||
wg.Done()
|
|
||||||
}()
|
|
||||||
}
|
|
||||||
|
|
||||||
wg.Wait()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *XMLProvider[T]) Serialize(dataholder XMLRootElement[T], path, commit string) error {
|
func (p *XMLProvider[T]) Serialize(dataholder XMLRootElement[T], path, commit string) error {
|
||||||
@@ -124,14 +46,16 @@ func (p *XMLProvider[T]) Serialize(dataholder XMLRootElement[T], path, commit st
|
|||||||
if err := UnmarshalFile(path, dataholder); err != nil {
|
if err := UnmarshalFile(path, dataholder); err != nil {
|
||||||
logging.Error(err, "Could not unmarshal file: "+path)
|
logging.Error(err, "Could not unmarshal file: "+path)
|
||||||
logging.ParseMessages.ParseErrors <- logging.ParseMessage{MessageType: logging.ErrorMessage, Message: "Could not unmarshal file: " + path}
|
logging.ParseMessages.ParseErrors <- logging.ParseMessage{MessageType: logging.ErrorMessage, Message: "Could not unmarshal file: " + path}
|
||||||
|
p.mu.Lock()
|
||||||
|
defer p.mu.Unlock()
|
||||||
|
p.failed = append(p.failed, path)
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, item := range dataholder.Children() {
|
for _, item := range dataholder.Children() {
|
||||||
item.SetSource(path)
|
|
||||||
item.SetDate(date)
|
|
||||||
item.SetCommit(commit)
|
|
||||||
// INFO: Mostly it's just one ID, so the double loop is not that bad.
|
// INFO: Mostly it's just one ID, so the double loop is not that bad.
|
||||||
for _, id := range item.GetIDs() {
|
for _, id := range item.GetIDs() {
|
||||||
|
p.Infos.Store(id, ItemInfo{Source: path, Date: date, Commit: commit})
|
||||||
p.Items.Store(id, item)
|
p.Items.Store(id, item)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -214,3 +138,27 @@ func (p *XMLProvider[T]) Everything() []T {
|
|||||||
})
|
})
|
||||||
return items
|
return items
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO: how to find that the item was deleted, and couldn't just be serialized?
|
||||||
|
// -> We compare filepaths of failed serializations with filepaths of the items.
|
||||||
|
// - If the item is not in the failed serializations, it was deleted.
|
||||||
|
// - If the item is in the failed serializations, we don't know if it was deleted or not, and we keep it.
|
||||||
|
//
|
||||||
|
// Consequence: If all serializations completed, we cleanup everything.
|
||||||
|
func (p *XMLProvider[T]) Cleanup(commit string) {
|
||||||
|
todelete := make([]string, 0)
|
||||||
|
p.Infos.Range(func(key, value interface{}) bool {
|
||||||
|
info := value.(ItemInfo)
|
||||||
|
if info.Commit != commit {
|
||||||
|
if !slices.Contains(p.failed, info.Source) {
|
||||||
|
todelete = append(todelete, key.(string))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
})
|
||||||
|
|
||||||
|
for _, key := range todelete {
|
||||||
|
p.Infos.Delete(key)
|
||||||
|
p.Items.Delete(key)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
3
reset.sh
Executable file
3
reset.sh
Executable file
@@ -0,0 +1,3 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
rm -rf ./cache_gnd
|
||||||
|
rm -rf ./data_git
|
||||||
@@ -112,6 +112,7 @@ func (s *Server) Start() {
|
|||||||
// TODO: Error handler, which sadly, is global:
|
// TODO: Error handler, which sadly, is global:
|
||||||
ErrorHandler: fiber.DefaultErrorHandler,
|
ErrorHandler: fiber.DefaultErrorHandler,
|
||||||
// WARNING: The app must be run in a console, since this uses environment variables:
|
// WARNING: The app must be run in a console, since this uses environment variables:
|
||||||
|
// It is not trivial to turn this on, since we need to mark goroutines that can be started only once.
|
||||||
// Prefork: true,
|
// Prefork: true,
|
||||||
StreamRequestBody: false,
|
StreamRequestBody: false,
|
||||||
WriteTimeout: REQUEST_TIMEOUT,
|
WriteTimeout: REQUEST_TIMEOUT,
|
||||||
@@ -156,7 +157,7 @@ func (s *Server) Start() {
|
|||||||
|
|
||||||
srv.Get("/:year?", controllers.GetYear(s.kgpz))
|
srv.Get("/:year?", controllers.GetYear(s.kgpz))
|
||||||
srv.Get("/:year/:issue/:page?", controllers.GetIssue(s.kgpz))
|
srv.Get("/:year/:issue/:page?", controllers.GetIssue(s.kgpz))
|
||||||
srv.Get("/:year/:issue/beilage/:subpage?", controllers.GetIssue(s.kgpz))
|
srv.Get("/:year/:issue/beilage/:page?", controllers.GetIssue(s.kgpz))
|
||||||
|
|
||||||
s.runner(srv)
|
s.runner(srv)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user