Files
kgpz_web/app/kgpz.go
2025-09-30 01:53:19 +02:00

654 lines
17 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

package app
import (
"os"
"path/filepath"
"strings"
"sync"
"github.com/Theodor-Springmann-Stiftung/kgpz_web/controllers"
"github.com/Theodor-Springmann-Stiftung/kgpz_web/helpers"
"github.com/Theodor-Springmann-Stiftung/kgpz_web/helpers/logging"
"github.com/Theodor-Springmann-Stiftung/kgpz_web/providers"
"github.com/Theodor-Springmann-Stiftung/kgpz_web/providers/geonames"
"github.com/Theodor-Springmann-Stiftung/kgpz_web/providers/gnd"
"github.com/Theodor-Springmann-Stiftung/kgpz_web/providers/pictures"
searchprovider "github.com/Theodor-Springmann-Stiftung/kgpz_web/providers/search"
"github.com/Theodor-Springmann-Stiftung/kgpz_web/providers/xmlprovider"
"github.com/Theodor-Springmann-Stiftung/kgpz_web/xmlmodels"
"github.com/gofiber/fiber/v2"
"github.com/gofiber/fiber/v2/middleware/compress"
"github.com/gofiber/fiber/v2/middleware/etag"
)
// It also implements Funcs() map[string]interface{} to map funcs to a template engine
// It is meant to be constructed once and then used as a singleton.
const (
IMG_PREFIX = "/img/"
PICTURES_PREFIX = "/static/pictures/"
EDITION_URL = "/edition/"
PRIVACY_URL = "/datenschutz/"
CONTACT_URL = "/kontakt/"
CITATION_URL = "/zitation/"
SEARCH_URL = "/suche/"
FILTER_URL = "/filter"
INDEX_URL = "/jahrgang/1764"
YEAR_OVERVIEW_URL = "/jahrgang/:year"
PLACE_OVERVIEW_URL = "/ort/:place?"
AGENTS_OVERVIEW_URL = "/akteure/:letterorid"
CATEGORY_OVERVIEW_URL = "/kategorie/:category?/:year?"
PIECE_URL = "/beitrag/:id"
PIECE_PAGE_URL = "/beitrag/:id/:page"
PAGE_JUMP_URL = "/jump/:year/:page"
PAGE_JUMP_FORM_URL = "/jump"
ISSSUE_URL = "/:year/:issue/:page?"
ADDITIONS_URL = "/:year/:issue/beilage/:page?"
)
// GitUpdateCallback is called when git data changes
type GitUpdateCallback func(commit string, date string, url string)
type KGPZ struct {
// INFO: We need to prevent concurrent reads and writes to the fs here since
// - Git is accessing the FS
// - The Library is accessing the FS
// So we need to prevent concurrent pulls and serializations
// This is what fsmu is for. IT IS NOT FOR SETTING Config, Repo. GND or Library.
// Those are only set once during initalization and construction.
fsmu sync.Mutex
Config *providers.ConfigProvider
Repo *providers.GitProvider
GND *gnd.GNDProvider
Geonames *geonames.GeonamesProvider
Pictures *pictures.PicturesProvider
Library *xmlmodels.Library
Search *searchprovider.SearchProvider
// Callback for when git data is updated
gitUpdateCallback GitUpdateCallback
}
func NewKGPZ(config *providers.ConfigProvider) (*KGPZ, error) {
helpers.AssertNonNil(config, "Config is nil")
if err := config.Validate(); err != nil {
helpers.Assert(err, "Error validating config")
}
kgpz := &KGPZ{Config: config}
err := kgpz.Init()
if err != nil {
return nil, err
}
return kgpz, nil
}
func (k *KGPZ) Pre(srv *fiber.App) error {
// Check if folder exists and if yes, serve image files from it
if _, err := os.Stat(k.Config.Config.ImgPath); err == nil {
fs := os.DirFS(k.Config.Config.ImgPath)
srv.Use(IMG_PREFIX, compress.New(compress.Config{
Level: compress.LevelBestSpeed,
}), etag.New(), helpers.StaticHandler(&fs))
} else {
logging.Info("Image folder not found. Skipping image serving.")
}
// Serve newspaper pictures from pictures directory
picturesPath := k.Config.Config.PicturesPath
if _, err := os.Stat(picturesPath); err == nil {
picturesFS := os.DirFS(picturesPath)
srv.Use(PICTURES_PREFIX, compress.New(compress.Config{
Level: compress.LevelBestSpeed,
}), etag.New(), helpers.StaticHandler(&picturesFS))
logging.Info("Serving newspaper pictures from " + picturesPath + " directory.")
} else {
logging.Info("Pictures folder not found at " + picturesPath + ". Skipping picture serving.")
}
return nil
}
func (k *KGPZ) Init() error {
if gp, err := providers.NewGitProvider(
k.Config.Config.GitURL,
filepath.Join(k.Config.Config.BaseDIR, k.Config.Config.GITPath),
k.Config.Config.GitBranch); err != nil {
logging.Error(err, "Error initializing GitProvider. Continuing without Git.")
} else {
k.Repo = gp
}
if err := k.Serialize(); err != nil {
logging.Error(err, "Error parsing XML.")
return err
}
if err := k.initGND(); err != nil {
logging.Error(err, "Error reading GND-Cache. Continuing.")
}
if err := k.initGeonames(); err != nil {
logging.Error(err, "Error reading Geonames-Cache. Continuing.")
}
if err := k.initPictures(); err != nil {
logging.Error(err, "Error scanning pictures directory. Continuing without pictures.")
}
if sp, err := searchprovider.NewSearchProvider(filepath.Join(k.Config.Config.BaseDIR, k.Config.SearchPath)); err != nil {
logging.Error(err, "Error initializing SearchProvider. Continuing without Search.")
} else {
k.Search = sp
}
k.Enrich()
go k.Pull()
err := k.Search.LoadIndeces()
if err != nil {
logging.Error(err, "Error loading search indeces.")
k.BuildSearchIndex()
} else {
logging.Info("Search indeces loaded.")
}
return nil
}
func (k *KGPZ) initGND() error {
k.GND = gnd.NewGNDProvider()
return k.GND.ReadCache(filepath.Join(k.Config.BaseDIR, k.Config.GNDPath))
}
func (k *KGPZ) initGeonames() error {
k.Geonames = geonames.NewGeonamesProvider()
return k.Geonames.ReadCache(filepath.Join(k.Config.BaseDIR, k.Config.GeoPath))
}
func (k *KGPZ) initPictures() error {
k.Pictures = pictures.NewPicturesProvider()
return k.Pictures.Scan(k.Config.Config.PicturesPath)
}
func (k *KGPZ) Routes(srv *fiber.App) error {
srv.Get("/", func(c *fiber.Ctx) error {
c.Redirect(INDEX_URL)
return nil
})
srv.Get(SEARCH_URL, controllers.GetSearch(k.Library, k.Search))
srv.Get(FILTER_URL, controllers.GetQuickFilter(k.Library))
srv.Get("/ort/fragment/:place", controllers.GetPlaceFragment(k.Library, k.Geonames))
srv.Get(PLACE_OVERVIEW_URL, controllers.GetPlace(k.Library, k.Geonames))
srv.Get(CATEGORY_OVERVIEW_URL, controllers.GetCategory(k.Library))
srv.Get(AGENTS_OVERVIEW_URL, controllers.GetAgents(k.Library))
srv.Get(PIECE_PAGE_URL, controllers.GetPieceWithPage(k.Library, k.Pictures))
srv.Get(PIECE_URL, controllers.GetPiece(k.Library, k.Pictures))
// Page jump routes for direct navigation
srv.Get(PAGE_JUMP_URL, controllers.GetPageJump(k.Library))
srv.Post(PAGE_JUMP_FORM_URL, controllers.GetPageJumpForm(k.Library))
// TODO: YEAR_OVERVIEW_URL being /:year is a bad idea, since it captures basically everything,
// probably creating problems with static files, and also in case we add a front page later.
// That's why we redirect to /1764 on "/ " above and don´t use an optional /:year? paramter.
// -> Check SEO requirements on index pages that are 301 forwarded.
// This applies to all paths with two or three segments without a static prefix:
// Prob better to do /ausgabe/:year/:issue/:page? and /jahrgang/:year? respectively.
srv.Get(YEAR_OVERVIEW_URL, controllers.GetYear(k.Library))
srv.Get(ISSSUE_URL, controllers.GetIssue(k.Library, k.Pictures))
srv.Get(ADDITIONS_URL, controllers.GetIssue(k.Library, k.Pictures))
srv.Get(EDITION_URL, controllers.Get(EDITION_URL))
srv.Get(PRIVACY_URL, controllers.Get(PRIVACY_URL))
srv.Get(CONTACT_URL, controllers.Get(CONTACT_URL))
srv.Get(CITATION_URL, controllers.Get(CITATION_URL))
if k.Config.WebHookSecret != "" && k.Config.WebHookEndpoint != "" {
srv.Post(k.Config.WebHookEndpoint, controllers.PostWebhook(k))
}
return nil
}
func (k *KGPZ) Funcs() map[string]interface{} {
e := make(map[string]interface{})
// App specific
e["GetAgent"] = k.Library.Agents.Item
e["GetPlace"] = k.Library.Places.Item
e["GetWork"] = k.Library.Works.Item
e["GetCategory"] = k.Library.Categories.Item
e["GetIssue"] = k.Library.Issues.Item
e["GetPiece"] = k.Library.Pieces.Item
e["GetGND"] = k.GND.Person
e["GetGeonames"] = k.Geonames.Place
// Math functions
e["sub"] = func(a, b int) int { return a - b }
e["add"] = func(a, b int) int { return a + b }
// String functions
e["contains"] = func(s, substr string) bool { return strings.Contains(s, substr) }
e["lower"] = func(s string) string { return strings.ToLower(s) }
// Place helper functions
e["GetModernCountryName"] = func(geoID string) string {
if geoID == "" || k.Geonames == nil {
return ""
}
geoPlace := k.Geonames.Place(geoID)
if geoPlace == nil {
return ""
}
// Map country names to German translations
switch geoPlace.CountryName {
case "France":
return "heutiges Frankreich"
case "United Kingdom":
return "heutiges Großbritannien"
case "Russia":
return "heutiges Russland"
case "Czech Republic", "Czechia":
return "heutiges Tschechien"
case "Netherlands", "The Netherlands":
return "heutige Niederlande"
case "Poland":
return "heutiges Polen"
case "Switzerland":
return "heutige Schweiz"
case "Latvia":
return "heutiges Lettland"
case "Sweden":
return "heutiges Schweden"
case "Austria":
return "heutiges Österreich"
case "Belgium":
return "heutiges Belgien"
case "Slovakia":
return "heutige Slowakei"
case "Finland":
return "heutiges Finnland"
case "Denmark":
return "heutiges Dänemark"
default:
// Return original country name for unknown countries (excluding Germany)
if geoPlace.CountryName != "Germany" && geoPlace.CountryName != "" {
return geoPlace.CountryName
}
return ""
}
}
e["GetFullPlaceInfo"] = func(geoID string, originalName string) string {
if geoID == "" || k.Geonames == nil {
return ""
}
geoPlace := k.Geonames.Place(geoID)
if geoPlace == nil {
return ""
}
// Only show info for places outside Germany
if geoPlace.CountryName == "Germany" || geoPlace.CountryName == "" {
return ""
}
// Get the modern country name
countryName := ""
switch geoPlace.CountryName {
case "France":
countryName = "heutiges Frankreich"
case "United Kingdom":
countryName = "heutiges Großbritannien"
case "Russia":
countryName = "heutiges Russland"
case "Czech Republic", "Czechia":
countryName = "heutiges Tschechien"
case "Netherlands", "The Netherlands":
countryName = "heutige Niederlande"
case "Poland":
countryName = "heutiges Polen"
case "Switzerland":
countryName = "heutige Schweiz"
case "Latvia":
countryName = "heutiges Lettland"
case "Sweden":
countryName = "heutiges Schweden"
case "Austria":
countryName = "heutiges Österreich"
case "Belgium":
countryName = "heutiges Belgien"
case "Slovakia":
countryName = "heutige Slowakei"
case "Finland":
countryName = "heutiges Finnland"
case "Denmark":
countryName = "heutiges Dänemark"
default:
countryName = geoPlace.CountryName
}
// Extract German alternate name (same logic as GetModernPlaceName)
modernName := ""
hasGermanName := false
for _, altName := range geoPlace.AlternateNames {
if altName.Lang == "de" {
hasGermanName = true
if altName.IsPreferredName {
modernName = altName.Name
break
} else if modernName == "" {
modernName = altName.Name
}
}
}
if !hasGermanName {
modernName = geoPlace.ToponymName
}
// Combine country and modern place name
result := countryName
if modernName != "" && strings.ToLower(modernName) != strings.ToLower(originalName) {
result += ", " + modernName
}
return result
}
e["GetModernPlaceName"] = func(geoID string, originalName string) string {
if geoID == "" || k.Geonames == nil {
return ""
}
geoPlace := k.Geonames.Place(geoID)
if geoPlace == nil {
return ""
}
// Only show modern names for places outside Germany
if geoPlace.CountryName == "Germany" || geoPlace.CountryName == "" {
return ""
}
// Extract German alternate name
modernName := ""
hasGermanName := false
for _, altName := range geoPlace.AlternateNames {
if altName.Lang == "de" {
hasGermanName = true
if altName.IsPreferredName {
modernName = altName.Name
break
} else if modernName == "" {
modernName = altName.Name
}
}
}
if !hasGermanName {
modernName = geoPlace.ToponymName
}
// Only return if it's different from the original name
if modernName != "" && strings.ToLower(modernName) != strings.ToLower(originalName) {
return modernName
}
return ""
}
e["LookupPieces"] = k.Library.Pieces.ReverseLookup
e["LookupWorks"] = k.Library.Works.ReverseLookup
e["LookupIssues"] = k.Library.Issues.ReverseLookup
e["LookupAnonymWorks"] = func() []xmlmodels.Work {
var anonymWorks []xmlmodels.Work
for _, work := range k.Library.Works.Array {
// Check if work has no agents
if len(work.AgentRefs) == 0 {
anonymWorks = append(anonymWorks, work)
}
}
return anonymWorks
}
return e
}
func (k *KGPZ) Enrich() error {
if k.Library == nil || k.Library.Agents == nil {
return nil
}
go func() {
k.fsmu.Lock()
defer k.fsmu.Unlock()
// Fetch GND data for agents
data := xmlmodels.AgentsIntoDataset(k.Library.Agents)
k.GND.FetchPersons(data)
k.GND.WriteCache(filepath.Join(k.Config.BaseDIR, k.Config.GNDPath))
// Fetch Geonames data for places
if k.Library.Places != nil {
placeData := xmlmodels.PlacesIntoDataset(k.Library.Places)
k.Geonames.FetchPlaces(placeData)
k.Geonames.WriteCache(filepath.Join(k.Config.BaseDIR, k.Config.GeoPath))
}
}()
return nil
}
// EnrichAndRebuildIndex ensures enrichment completes before rebuilding search index
func (k *KGPZ) EnrichAndRebuildIndex() error {
if k.Library == nil || k.Library.Agents == nil {
return nil
}
go func() {
k.fsmu.Lock()
defer k.fsmu.Unlock()
logging.Info("Starting enrichment process...")
// Fetch GND data for agents
data := xmlmodels.AgentsIntoDataset(k.Library.Agents)
k.GND.FetchPersons(data)
k.GND.WriteCache(filepath.Join(k.Config.BaseDIR, k.Config.GNDPath))
// Fetch Geonames data for places
if k.Library.Places != nil {
placeData := xmlmodels.PlacesIntoDataset(k.Library.Places)
k.Geonames.FetchPlaces(placeData)
k.Geonames.WriteCache(filepath.Join(k.Config.BaseDIR, k.Config.GeoPath))
}
logging.Info("Enrichment complete. Starting search index rebuild...")
// Clear existing indices before rebuilding
k.ClearSearchIndices()
// Rebuild search index after enrichment is complete
k.buildSearchIndexSync()
}()
return nil
}
// ClearSearchIndices removes all existing search indices
func (k *KGPZ) ClearSearchIndices() error {
if k.Search == nil {
return nil
}
return k.Search.ClearAllIndices()
}
// buildSearchIndexSync builds the search index synchronously (no goroutine)
func (k *KGPZ) buildSearchIndexSync() error {
if k.Library == nil || k.Library.Agents == nil || k.Search == nil {
return nil
}
wg := new(sync.WaitGroup)
wg.Add(6)
go func() {
for _, agent := range k.Library.Agents.Array {
err := k.Search.Index(agent, k.Library)
if err != nil {
logging.Error(err, "Error indexing agent")
}
}
wg.Done()
}()
go func() {
for _, place := range k.Library.Places.Array {
err := k.Search.Index(place, k.Library)
if err != nil {
logging.Error(err, "Error indexing place")
}
}
wg.Done()
}()
go func() {
for _, cat := range k.Library.Categories.Array {
err := k.Search.Index(cat, k.Library)
if err != nil {
logging.Error(err, "Error indexing category")
}
}
wg.Done()
}()
go func() {
for _, work := range k.Library.Works.Array {
err := k.Search.Index(work, k.Library)
if err != nil {
logging.Error(err, "Error indexing work")
}
}
wg.Done()
}()
go func() {
for _, issue := range k.Library.Issues.Array {
err := k.Search.Index(issue, k.Library)
if err != nil {
logging.Error(err, "Error indexing issue")
}
}
wg.Done()
}()
go func() {
for _, piece := range k.Library.Pieces.Array {
err := k.Search.Index(piece, k.Library)
if err != nil {
logging.Error(err, "Error indexing piece")
}
}
wg.Done()
}()
wg.Wait()
logging.Info("Search index built.")
return nil
}
func (k *KGPZ) BuildSearchIndex() error {
if k.Library == nil || k.Library.Agents == nil || k.Search == nil {
return nil
}
go func() {
k.fsmu.Lock()
defer k.fsmu.Unlock()
k.buildSearchIndexSync()
}()
return nil
}
func (k *KGPZ) Serialize() error {
// TODO: this is error handling from hell
// Preventing pulling and serializing at the same time
k.fsmu.Lock()
defer k.fsmu.Unlock()
commit := ""
source := xmlprovider.Path
if k.Repo != nil {
commit = k.Repo.Commit
source = xmlprovider.Commit
}
if k.Library == nil {
k.Library = xmlmodels.NewLibrary()
}
err := k.Library.Parse(source, filepath.Join(k.Config.BaseDIR, k.Config.GITPath), commit)
return err
}
func (k *KGPZ) IsDebug() bool {
return k.Config.Debug
}
func (k *KGPZ) GetWebHookSecret() string {
return k.Config.WebHookSecret
}
// SetGitUpdateCallback sets the callback function to be called when git data is updated
func (k *KGPZ) SetGitUpdateCallback(callback GitUpdateCallback) {
k.gitUpdateCallback = callback
}
func (k *KGPZ) Pull() {
if k.Repo == nil {
return
}
logging.Info("Pulling Repository...")
k.fsmu.Lock()
err, changed := k.Repo.Pull()
logging.Error(err, "Error pulling GitProvider")
k.fsmu.Unlock()
if changed {
logging.ObjDebug(&k.Repo, "Remote changed. Reparsing")
k.Serialize()
k.EnrichAndRebuildIndex()
// Rescan pictures after pull
if err := k.initPictures(); err != nil {
logging.Error(err, "Error rescanning pictures directory after pull.")
}
// Notify about git data update
if k.gitUpdateCallback != nil {
k.gitUpdateCallback(
k.Repo.Commit,
k.Repo.Date.Format("2006-01-02T15:04:05Z07:00"),
k.Config.Config.GitURL,
)
}
}
}
func (k *KGPZ) Shutdown() {
k.Repo.Wait()
}