diff --git a/app/kgpz.go b/app/kgpz.go index 42e9b5d..097aef6 100644 --- a/app/kgpz.go +++ b/app/kgpz.go @@ -9,17 +9,7 @@ import ( "github.com/Theodor-Springmann-Stiftung/kgpz_web/helpers/logging" "github.com/Theodor-Springmann-Stiftung/kgpz_web/providers" "github.com/Theodor-Springmann-Stiftung/kgpz_web/providers/gnd" - "github.com/Theodor-Springmann-Stiftung/kgpz_web/providers/xmlprovider" -) - -const ( - AGENTS_PATH = "XML/akteure.xml" - PLACES_PATH = "XML/orte.xml" - WORKS_PATH = "XML/werke.xml" - CATEGORIES_PATH = "XML/kategorien.xml" - - ISSUES_DIR = "XML/stuecke/" - PIECES_DIR = "XML/beitraege/" + "github.com/Theodor-Springmann-Stiftung/kgpz_web/xmlmodels" ) type KGPZ struct { @@ -29,7 +19,7 @@ type KGPZ struct { Config *providers.ConfigProvider Repo *providers.GitProvider GND *gnd.GNDProvider - Library *xmlprovider.Library + Library *xmlmodels.Library } func (k *KGPZ) Init() { @@ -81,10 +71,8 @@ func (k *KGPZ) Enrich() error { return nil } - // TODO: Library locking is never needed, since the library items, once set, are never changed - // We only need to check if set go func() { - data := gnd.ProviderIntoDataset(k.Library.Agents) + data := xmlmodels.AgentsIntoDataset(k.Library.Agents) k.GND.FetchPersons(data) k.GND.WriteCache(k.Config.GNDPath) }() @@ -110,22 +98,9 @@ func (k *KGPZ) Serialize() { helpers.Assert(err, "Error getting pieces") if k.Library == nil { - k.Library = xmlprovider.NewLibrary( - []string{filepath.Join(k.Config.FolderPath, AGENTS_PATH)}, - []string{filepath.Join(k.Config.FolderPath, PLACES_PATH)}, - []string{filepath.Join(k.Config.FolderPath, WORKS_PATH)}, - []string{filepath.Join(k.Config.FolderPath, CATEGORIES_PATH)}, - *issues, - *pieces) - } else { - k.Library.SetPaths( - []string{filepath.Join(k.Config.FolderPath, AGENTS_PATH)}, - []string{filepath.Join(k.Config.FolderPath, PLACES_PATH)}, - []string{filepath.Join(k.Config.FolderPath, WORKS_PATH)}, - []string{filepath.Join(k.Config.FolderPath, CATEGORIES_PATH)}, - *issues, - *pieces) + k.Library = xmlmodels.NewLibrary(k.Config.FolderPath) } + k.Library.Serialize(commit) } @@ -177,13 +152,3 @@ func (k *KGPZ) initRepo() { func (k *KGPZ) Shutdown() { k.Repo.Wait() } - -func getXMLFiles(path string) (*[]string, error) { - if _, err := os.Stat(path); os.IsNotExist(err) { - return nil, err - } - - matches, err := filepath.Glob(filepath.Join(path, "*.xml")) - - return &matches, err -} diff --git a/config.dev.json b/config.dev.json index 9465f5b..b527ace 100644 --- a/config.dev.json +++ b/config.dev.json @@ -4,5 +4,5 @@ "webhook_endpoint": "/webhook", "webhook_secret": "secret", "debug": true, - "watch": true + "watch": false } diff --git a/providers/gnd/helpers.go b/providers/gnd/helpers.go index 038500e..dc32594 100644 --- a/providers/gnd/helpers.go +++ b/providers/gnd/helpers.go @@ -1,17 +1,5 @@ package gnd -import "github.com/Theodor-Springmann-Stiftung/kgpz_web/providers/xmlprovider" - type GNDData struct { ID, GND string } - -func ProviderIntoDataset(provider *xmlprovider.XMLProvider[xmlprovider.Agent]) []GNDData { - provider.Lock() - defer provider.Unlock() - var data []GNDData - for _, agent := range provider.Array { - data = append(data, GNDData{ID: agent.ID, GND: agent.GND}) - } - return data -} diff --git a/providers/xmlprovider/helpers.go b/providers/xmlprovider/helpers.go index 8d653b8..061aa0c 100644 --- a/providers/xmlprovider/helpers.go +++ b/providers/xmlprovider/helpers.go @@ -4,6 +4,7 @@ import ( "encoding/xml" "io" "os" + "path/filepath" "github.com/Theodor-Springmann-Stiftung/kgpz_web/helpers/logging" ) @@ -30,3 +31,13 @@ func UnmarshalFile[T any](filename string, data T) error { } return nil } + +func XMLFilesForPath(path string) ([]string, error) { + if _, err := os.Stat(path); os.IsNotExist(err) { + return nil, err + } + + matches, err := filepath.Glob(filepath.Join(path, "*.xml")) + + return matches, err +} diff --git a/providers/xmlprovider/item.go b/providers/xmlprovider/item.go index ef8ae15..cd1b5af 100644 --- a/providers/xmlprovider/item.go +++ b/providers/xmlprovider/item.go @@ -5,6 +5,8 @@ type ItemInfo struct { Parse *ParseMeta } -type KeyedItem struct { - keys []string +// INFO: These are just root elements that hold the data of the XML files. +// They get discarded after a parse. +type XMLRootElement[T any] interface { + Children() []T } diff --git a/providers/xmlprovider/library.go b/providers/xmlprovider/library.go deleted file mode 100644 index 279aa4b..0000000 --- a/providers/xmlprovider/library.go +++ /dev/null @@ -1,147 +0,0 @@ -package xmlprovider - -import ( - "fmt" - "sync" -) - -type Library struct { - amu sync.Mutex - Agents *XMLProvider[Agent] - Places *XMLProvider[Place] - Works *XMLProvider[Work] - Categories *XMLProvider[Category] - Issues *XMLProvider[Issue] - Pieces *XMLProvider[Piece] -} - -func (l *Library) String() string { - return fmt.Sprintf("Agents: %s\nPlaces: %s\nWorks: %s\nCategories: %s\nIssues: %s\nPieces: %s\n", - l.Agents.String(), l.Places.String(), l.Works.String(), l.Categories.String(), l.Issues.String(), l.Pieces.String()) -} - -// INFO: this is the only place where the providers are created. There is no need for locking on access. -func NewLibrary(agentpaths, placepaths, workpaths, categorypaths, issuepaths, piecepaths []string) *Library { - return &Library{ - Agents: &XMLProvider[Agent]{Paths: agentpaths}, - Places: &XMLProvider[Place]{Paths: placepaths}, - Works: &XMLProvider[Work]{Paths: workpaths}, - Categories: &XMLProvider[Category]{Paths: categorypaths}, - Issues: &XMLProvider[Issue]{Paths: issuepaths}, - Pieces: &XMLProvider[Piece]{Paths: piecepaths}, - } -} - -func (l *Library) SetPaths(agentpaths, placepaths, workpaths, categorypaths, issuepaths, piecepaths []string) { - l.amu.Lock() - defer l.amu.Unlock() - l.Agents.Paths = agentpaths - l.Places.Paths = placepaths - l.Works.Paths = workpaths - l.Categories.Paths = categorypaths - l.Issues.Paths = issuepaths - l.Pieces.Paths = piecepaths -} - -func (l *Library) Serialize(commit string) { - wg := sync.WaitGroup{} - - l.Prepare(commit) - - for _, path := range l.Places.Paths { - wg.Add(1) - go func() { - l.Places.Serialize(NewPlaceRoot(), path) - wg.Done() - }() - } - - for _, path := range l.Agents.Paths { - wg.Add(1) - go func() { - l.Agents.Serialize(NewAgentRoot(), path) - wg.Done() - }() - } - - for _, path := range l.Categories.Paths { - wg.Add(1) - go func() { - l.Categories.Serialize(NewCategoryRoot(), path) - wg.Done() - }() - } - - for _, path := range l.Works.Paths { - wg.Add(1) - go func() { - l.Works.Serialize(NewWorkRoot(), path) - wg.Done() - }() - } - - for _, path := range l.Issues.Paths { - wg.Add(1) - go func() { - l.Issues.Serialize(NewIssueRoot(), path) - wg.Done() - }() - } - - for _, path := range l.Pieces.Paths { - wg.Add(1) - go func() { - l.Pieces.Serialize(NewPieceRoot(), path) - wg.Done() - }() - } - - wg.Wait() - l.Cleanup() -} - -func (l *Library) Prepare(commit string) { - l.Agents.Prepare(commit) - l.Places.Prepare(commit) - l.Works.Prepare(commit) - l.Categories.Prepare(commit) - l.Issues.Prepare(commit) - l.Pieces.Prepare(commit) -} - -func (l *Library) Cleanup() { - wg := sync.WaitGroup{} - wg.Add(6) - - go func() { - l.Agents.Cleanup() - wg.Done() - }() - - go func() { - l.Places.Cleanup() - wg.Done() - }() - - go func() { - l.Works.Cleanup() - wg.Done() - }() - - go func() { - l.Categories.Cleanup() - wg.Done() - }() - - go func() { - l.Issues.Cleanup() - wg.Done() - }() - - go func() { - l.Pieces.Cleanup() - wg.Done() - }() - - wg.Wait() -} diff --git a/providers/xmlprovider/xmlprovider.go b/providers/xmlprovider/xmlprovider.go index d05562a..c04e0fb 100644 --- a/providers/xmlprovider/xmlprovider.go +++ b/providers/xmlprovider/xmlprovider.go @@ -21,7 +21,6 @@ type XMLItem interface { // An XMLProvider is a struct that holds holds serialized XML data of a specific type. It combines multiple parses IF a succeeded parse can not serialize the data from a path. type XMLProvider[T XMLItem] struct { - Paths []string // INFO: map is type map[string]*T Items sync.Map // INFO: map is type [string]ItemInfo @@ -60,15 +59,17 @@ func (p *XMLProvider[T]) Serialize(dataholder XMLRootElement[T], path string) er } p.mu.Lock() + defer p.mu.Unlock() + if len(p.parses) == 0 { logging.Error(fmt.Errorf("No commit set"), "No commit set") return fmt.Errorf("No commit set") } - commit := &p.parses[len(p.parses)-1] - p.Array = append(p.Array, dataholder.Children()...) - p.mu.Unlock() - for _, item := range dataholder.Children() { + commit := &p.parses[len(p.parses)-1] + newItems := dataholder.Children() + + for _, item := range newItems { // INFO: Mostly it's just one ID, so the double loop is not that bad. for _, id := range item.Keys() { p.Infos.Store(id, ItemInfo{Source: path, Parse: commit}) @@ -76,6 +77,7 @@ func (p *XMLProvider[T]) Serialize(dataholder XMLRootElement[T], path string) er } } + p.Array = append(p.Array, newItems...) return nil } diff --git a/providers/xmlprovider/xmlrefs.go b/providers/xmlprovider/xmlrefs.go deleted file mode 100644 index 852e15e..0000000 --- a/providers/xmlprovider/xmlrefs.go +++ /dev/null @@ -1,50 +0,0 @@ -package xmlprovider - -import "encoding/xml" - -type AgentRef struct { - XMLName xml.Name `xml:"akteur"` - Reference -} - -type AdditionalRef struct { - XMLName xml.Name `xml:"beilage"` - Reference // Ist nicht im Schema - Datum string `xml:"datum,attr"` - Nr int `xml:"nr,attr"` - AdditionalNo int `xml:"beilage,attr"` - Von int `xml:"von,attr"` - Bis int `xml:"bis,attr"` -} - -type IssueRef struct { - XMLName xml.Name `xml:"stueck"` - Reference // Ist nicht im Schema - DateAttributes - Nr int `xml:"nr,attr"` - Von int `xml:"von,attr"` - Bis int `xml:"bis,attr"` - Beilage int `xml:"beilage,attr"` -} - -type PlaceRef struct { - XMLName xml.Name `xml:"ort"` - Reference -} - -type CategoryRef struct { - XMLName xml.Name `xml:"kategorie"` - Reference -} - -type WorkRef struct { - XMLName xml.Name `xml:"werk"` - Reference - Page string `xml:"s,attr"` -} - -type PieceRef struct { - XMLName xml.Name `xml:"beitrag"` - Page string `xml:"s,attr"` - Reference -} diff --git a/server/server.go b/server/server.go index 96b3203..8bca666 100644 --- a/server/server.go +++ b/server/server.go @@ -13,13 +13,14 @@ import ( "github.com/gofiber/fiber/v2" "github.com/gofiber/fiber/v2/middleware/cache" + "github.com/gofiber/fiber/v2/middleware/etag" "github.com/gofiber/fiber/v2/middleware/logger" "github.com/gofiber/fiber/v2/middleware/recover" "github.com/gofiber/storage/memory/v2" ) const ( - // INFO: This timeout is stupid. Uploads can take a long time, others might not. It's messy. + // INFO: This timeout is stupid. Uploads can take a long time, other routes might not. It's messy. REQUEST_TIMEOUT = 16 * time.Second SERVER_TIMEOUT = 16 * time.Second @@ -91,6 +92,7 @@ func (s *Server) Engine(e *templating.Engine) { s.Start() } +// TODO: There is no error handler func (s *Server) Start() { s.mu.Lock() defer s.mu.Unlock() @@ -133,6 +135,7 @@ func (s *Server) Start() { srv.Use(recover.New()) + srv.Use(ASSETS_URL_PREFIX, etag.New()) srv.Use(ASSETS_URL_PREFIX, static(&views.StaticFS)) // TODO: Dont cache static assets, bc storage gets huge diff --git a/viewmodels/agent_view.go b/viewmodels/agent_view.go index 1a4ffd0..04b0764 100644 --- a/viewmodels/agent_view.go +++ b/viewmodels/agent_view.go @@ -5,7 +5,7 @@ import ( "slices" "strings" - "github.com/Theodor-Springmann-Stiftung/kgpz_web/providers/xmlprovider" + "github.com/Theodor-Springmann-Stiftung/kgpz_web/xmlmodels" ) type AgentsListView struct { @@ -16,22 +16,28 @@ type AgentsListView struct { } type AgentView struct { - xmlprovider.Agent + xmlmodels.Agent Works []WorkByAgent Pieces []PieceByAgent } type WorkByAgent struct { - xmlprovider.Work - Reference xmlprovider.AgentRef + xmlmodels.Work + Pieces []PieceByWork + Reference xmlmodels.AgentRef } type PieceByAgent struct { - xmlprovider.Piece - Reference xmlprovider.AgentRef + xmlmodels.Piece + Reference xmlmodels.AgentRef } -func AgentsView(letterorid string, lib *xmlprovider.Library) *AgentsListView { +type PieceByWork struct { + xmlmodels.Piece + Reference xmlmodels.WorkRef +} + +func AgentsView(letterorid string, lib *xmlmodels.Library) *AgentsListView { res := AgentsListView{Search: letterorid, Agents: make(map[string]AgentView)} av := make(map[string]bool) @@ -56,7 +62,7 @@ func AgentsView(letterorid string, lib *xmlprovider.Library) *AgentsListView { } } - // TODO: We won't need to lock the library if we take down all routes during parsing + // TODO: We won't need to lock the library if we take down the server during parsing lib.Works.Lock() for _, w := range lib.Works.Array { if ref, ok := w.ReferencesAgent(letterorid); ok { @@ -76,6 +82,15 @@ func AgentsView(letterorid string, lib *xmlprovider.Library) *AgentsListView { res.Agents[ref.Ref] = entry } } + + // PERF: This is really slow: resolve all backlinks after parse? + for _, a := range res.Agents { + for _, w := range a.Works { + if ref, ok := p.ReferencesWork(w.ID); ok { + w.Pieces = append(w.Pieces, PieceByWork{Piece: p, Reference: *ref}) + } + } + } } lib.Pieces.Unlock() diff --git a/viewmodels/issue_view.go b/viewmodels/issue_view.go index c490ab3..15ecb53 100644 --- a/viewmodels/issue_view.go +++ b/viewmodels/issue_view.go @@ -6,13 +6,13 @@ import ( "slices" "github.com/Theodor-Springmann-Stiftung/kgpz_web/functions" - "github.com/Theodor-Springmann-Stiftung/kgpz_web/providers/xmlprovider" + "github.com/Theodor-Springmann-Stiftung/kgpz_web/xmlmodels" ) type PieceByIssue struct { - xmlprovider.Piece + xmlmodels.Piece // TODO: this is a bit hacky, but it refences the page number of the piece in the issue - Reference xmlprovider.IssueRef + Reference xmlmodels.IssueRef } type PiecesByPage struct { @@ -22,12 +22,12 @@ type PiecesByPage struct { // TODO: Next & Prev type IssueVM struct { - xmlprovider.Issue + xmlmodels.Issue Pieces PiecesByPage AdditionalPieces PiecesByPage } -func NewSingleIssueView(y string, no string, lib *xmlprovider.Library) (*IssueVM, error) { +func NewSingleIssueView(y string, no string, lib *xmlmodels.Library) (*IssueVM, error) { issue := lib.Issues.Item(no + "-" + y) if issue == nil { return nil, fmt.Errorf("No issue found for %v-%v", y, no) @@ -48,7 +48,7 @@ func NewSingleIssueView(y string, no string, lib *xmlprovider.Library) (*IssueVM return &sivm, nil } -func PiecesForIsssue(lib *xmlprovider.Library, issue xmlprovider.Issue) (PiecesByPage, PiecesByPage, error) { +func PiecesForIsssue(lib *xmlmodels.Library, issue xmlmodels.Issue) (PiecesByPage, PiecesByPage, error) { year := issue.Datum.When.Year ppi := PiecesByPage{Items: make(map[int][]PieceByIssue)} diff --git a/viewmodels/year_view.go b/viewmodels/year_view.go index 80b9f0c..4cf9718 100644 --- a/viewmodels/year_view.go +++ b/viewmodels/year_view.go @@ -7,10 +7,10 @@ import ( "sort" "github.com/Theodor-Springmann-Stiftung/kgpz_web/functions" - "github.com/Theodor-Springmann-Stiftung/kgpz_web/providers/xmlprovider" + "github.com/Theodor-Springmann-Stiftung/kgpz_web/xmlmodels" ) -type IssuesByMonth map[int][]xmlprovider.Issue +type IssuesByMonth map[int][]xmlmodels.Issue func (ibm *IssuesByMonth) Sort() { for _, issues := range *ibm { @@ -26,7 +26,7 @@ type YearVM struct { Issues IssuesByMonth } -func YearView(year int, lib *xmlprovider.Library) (*YearVM, error) { +func YearView(year int, lib *xmlmodels.Library) (*YearVM, error) { issues := make(IssuesByMonth, 12) years := make(map[int]bool) diff --git a/views/routes/akteure/body.gohtml b/views/routes/akteure/body.gohtml index 13d53a9..639e547 100644 --- a/views/routes/akteure/body.gohtml +++ b/views/routes/akteure/body.gohtml @@ -11,7 +11,7 @@ {{ $letter }} -