mirror of
https://github.com/Theodor-Springmann-Stiftung/kgpz_web.git
synced 2025-10-28 16:45:32 +00:00
Very basic data enrichment via LOBID/GND
This commit is contained in:
@@ -7,13 +7,13 @@ tmp_dir = "tmp"
|
||||
bin = "./tmp/main"
|
||||
cmd = "go build -tags=\"dev\" -o ./tmp/main ."
|
||||
delay = 1000
|
||||
exclude_dir = ["assets", "views", "tmp", "vendor", "testdata"]
|
||||
exclude_dir = ["assets", "views", "tmp", "vendor", "testdata", "data_git", "cache_gnd"]
|
||||
exclude_file = []
|
||||
exclude_regex = ["_test.go"]
|
||||
exclude_unchanged = false
|
||||
follow_symlink = false
|
||||
full_bin = ""
|
||||
include_dir = [ "views/assets" ]
|
||||
include_dir = []
|
||||
include_ext = ["go", "tpl", "tmpl", "html"]
|
||||
include_file = []
|
||||
kill_delay = "0s"
|
||||
@@ -24,7 +24,7 @@ tmp_dir = "tmp"
|
||||
pre_cmd = []
|
||||
rerun = false
|
||||
rerun_delay = 500
|
||||
send_interrupt = false
|
||||
send_interrupt = true
|
||||
stop_on_error = false
|
||||
|
||||
[color]
|
||||
@@ -43,7 +43,7 @@ tmp_dir = "tmp"
|
||||
|
||||
[proxy]
|
||||
app_port = 8080
|
||||
enabled = false
|
||||
enabled = true
|
||||
proxy_port = 8081
|
||||
|
||||
[screen]
|
||||
|
||||
2
.gitignore
vendored
2
.gitignore
vendored
@@ -6,3 +6,5 @@ cache_gnd/
|
||||
config.json
|
||||
out.log
|
||||
kgpz_web
|
||||
*.log
|
||||
*.out
|
||||
|
||||
43
app/kgpz.go
43
app/kgpz.go
@@ -8,6 +8,7 @@ import (
|
||||
"github.com/Theodor-Springmann-Stiftung/kgpz_web/helpers"
|
||||
"github.com/Theodor-Springmann-Stiftung/kgpz_web/helpers/logging"
|
||||
"github.com/Theodor-Springmann-Stiftung/kgpz_web/providers"
|
||||
"github.com/Theodor-Springmann-Stiftung/kgpz_web/providers/gnd"
|
||||
"github.com/Theodor-Springmann-Stiftung/kgpz_web/providers/xmlprovider"
|
||||
)
|
||||
|
||||
@@ -26,6 +27,7 @@ type KGPZ struct {
|
||||
gmu sync.Mutex
|
||||
Config *providers.ConfigProvider
|
||||
Repo *providers.GitProvider
|
||||
GND *gnd.GNDProvider
|
||||
Library *xmlprovider.Library
|
||||
}
|
||||
|
||||
@@ -38,11 +40,15 @@ func (k *KGPZ) Init() {
|
||||
go k.initRepo()
|
||||
}
|
||||
k.Serialize()
|
||||
k.InitGND()
|
||||
k.Enrich()
|
||||
return
|
||||
}
|
||||
|
||||
k.initRepo()
|
||||
k.Serialize()
|
||||
k.InitGND()
|
||||
k.Enrich()
|
||||
}
|
||||
|
||||
func NewKGPZ(config *providers.ConfigProvider) *KGPZ {
|
||||
@@ -54,6 +60,43 @@ func NewKGPZ(config *providers.ConfigProvider) *KGPZ {
|
||||
return &KGPZ{Config: config}
|
||||
}
|
||||
|
||||
func (k *KGPZ) InitGND() {
|
||||
k.gmu.Lock()
|
||||
defer k.gmu.Unlock()
|
||||
k.lmu.Lock()
|
||||
defer k.lmu.Unlock()
|
||||
if k.GND == nil {
|
||||
k.GND = gnd.NewGNDProvider()
|
||||
}
|
||||
|
||||
if err := k.GND.ReadCache(k.Config.GNDPath); err != nil {
|
||||
logging.Error(err, "Error reading GND cache")
|
||||
}
|
||||
}
|
||||
|
||||
func (k *KGPZ) Enrich() error {
|
||||
if k.GND == nil {
|
||||
k.InitGND()
|
||||
}
|
||||
|
||||
k.lmu.Lock()
|
||||
defer k.lmu.Unlock()
|
||||
k.gmu.Lock()
|
||||
defer k.gmu.Unlock()
|
||||
|
||||
if k.Library == nil || k.Library.Agents == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
agents := k.Library.Agents.Items.Agents
|
||||
go func(agents []xmlprovider.Agent) {
|
||||
k.GND.FetchPersons(agents)
|
||||
k.GND.WriteCache(k.Config.GNDPath)
|
||||
}(agents)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (k *KGPZ) Serialize() {
|
||||
// TODO: this is error handling from hell
|
||||
// There is no need to recreate the whole library if the paths haven't changed
|
||||
|
||||
@@ -57,6 +57,14 @@ func Info(msg ...string) {
|
||||
}
|
||||
}
|
||||
|
||||
func Debug(msg ...string) {
|
||||
if len(msg) > 0 {
|
||||
for _, m := range msg {
|
||||
slog.Debug(m)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func SetDebug() {
|
||||
slog.SetLogLoggerLevel(slog.LevelDebug)
|
||||
}
|
||||
|
||||
214
providers/gnd/gnd.go
Normal file
214
providers/gnd/gnd.go
Normal file
@@ -0,0 +1,214 @@
|
||||
package gnd
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"io"
|
||||
"net/http"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
"github.com/Theodor-Springmann-Stiftung/kgpz_web/helpers/logging"
|
||||
"github.com/Theodor-Springmann-Stiftung/kgpz_web/providers/xmlprovider"
|
||||
)
|
||||
|
||||
const (
|
||||
LOBID_URL = "https://lobid.org/gnd/"
|
||||
)
|
||||
|
||||
type GNDProvider struct {
|
||||
// Mutex is for file reading & writing
|
||||
mu sync.Mutex
|
||||
Persons sync.Map
|
||||
}
|
||||
|
||||
func NewGNDProvider() *GNDProvider {
|
||||
return &GNDProvider{}
|
||||
}
|
||||
|
||||
func (p *GNDProvider) ReadCache(folder string) error {
|
||||
p.mu.Lock()
|
||||
defer p.mu.Unlock()
|
||||
if err := p.readPersons(folder); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (p *GNDProvider) readPersons(folder string) error {
|
||||
info, err := os.Stat(folder)
|
||||
if os.IsNotExist(err) {
|
||||
return os.MkdirAll(folder, 0755)
|
||||
}
|
||||
if err != nil || !info.IsDir() {
|
||||
return err
|
||||
}
|
||||
|
||||
files, err := filepath.Glob(filepath.Join(folder, "*.json"))
|
||||
// TODO: try to recover by recreating the folder
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
wg := sync.WaitGroup{}
|
||||
wg.Add(len(files))
|
||||
|
||||
for _, file := range files {
|
||||
go func(file string) {
|
||||
p.readPerson(file)
|
||||
wg.Done()
|
||||
}(file)
|
||||
}
|
||||
|
||||
wg.Wait()
|
||||
return nil
|
||||
}
|
||||
|
||||
func (p *GNDProvider) readPerson(file string) {
|
||||
person := Person{}
|
||||
// JSON unmarshalling of the file and sanity check:
|
||||
f, err := os.Open(file)
|
||||
if err != nil {
|
||||
logging.Error(err, "Error opening file for reading: "+file)
|
||||
return
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
bytevalue, err := io.ReadAll(f)
|
||||
if err != nil {
|
||||
logging.Error(err, "Error reading file: "+file)
|
||||
return
|
||||
}
|
||||
|
||||
if err := json.Unmarshal(bytevalue, &person); err != nil {
|
||||
logging.Error(err, "Error unmarshalling file:"+file)
|
||||
return
|
||||
}
|
||||
|
||||
if person.KGPZID != "" {
|
||||
p.Persons.Store(person.KGPZID, person)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
func (p *GNDProvider) WriteCache(folder string) error {
|
||||
p.mu.Lock()
|
||||
defer p.mu.Unlock()
|
||||
if err := p.writePersons(folder); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (p *GNDProvider) writePersons(folder string) error {
|
||||
info, err := os.Stat(folder)
|
||||
if err == os.ErrNotExist {
|
||||
return os.MkdirAll(folder, 0755)
|
||||
}
|
||||
if err != nil || !info.IsDir() {
|
||||
return err
|
||||
}
|
||||
|
||||
wg := sync.WaitGroup{}
|
||||
p.Persons.Range(func(key, value interface{}) bool {
|
||||
wg.Add(1)
|
||||
go func(key string, value Person) {
|
||||
p.writePerson(folder, key, value)
|
||||
wg.Done()
|
||||
}(key.(string), value.(Person))
|
||||
return true
|
||||
})
|
||||
|
||||
wg.Wait()
|
||||
return nil
|
||||
}
|
||||
|
||||
func (p *GNDProvider) writePerson(folder, id string, person Person) {
|
||||
// JSON marshalling of the person and sanity check:
|
||||
filepath := filepath.Join(folder, id+".json")
|
||||
f, err := os.Create(filepath)
|
||||
if err != nil {
|
||||
logging.Error(err, "Error creating file for writing: "+id)
|
||||
return
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
bytevalue, err := json.Marshal(person)
|
||||
if err != nil {
|
||||
logging.Error(err, "Error marshalling person: "+id)
|
||||
return
|
||||
}
|
||||
|
||||
if _, err := f.Write(bytevalue); err != nil {
|
||||
logging.Error(err, "Error writing file: "+id)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
func (p *GNDProvider) GetPerson(id string) (Person, error) {
|
||||
person, ok := p.Persons.Load(id)
|
||||
if !ok {
|
||||
return Person{}, nil
|
||||
}
|
||||
return person.(Person), nil
|
||||
}
|
||||
|
||||
func (p *GNDProvider) FetchPersons(persons []xmlprovider.Agent) {
|
||||
wg := sync.WaitGroup{}
|
||||
for _, person := range persons {
|
||||
if person.ID == "" {
|
||||
continue
|
||||
}
|
||||
if _, ok := p.Persons.Load(person.ID); ok {
|
||||
continue
|
||||
}
|
||||
wg.Add(1)
|
||||
go func(person xmlprovider.Agent) {
|
||||
defer wg.Done()
|
||||
if person.GND != "" {
|
||||
p.fetchPerson(person)
|
||||
}
|
||||
}(person)
|
||||
}
|
||||
wg.Wait()
|
||||
}
|
||||
|
||||
func (p *GNDProvider) fetchPerson(person xmlprovider.Agent) {
|
||||
SPLITURL := strings.Split(person.GND, "/")
|
||||
if len(SPLITURL) < 2 {
|
||||
logging.Error(nil, "Error parsing GND ID: "+person.GND)
|
||||
return
|
||||
}
|
||||
|
||||
GNDID := SPLITURL[len(SPLITURL)-1]
|
||||
|
||||
logging.Debug("Fetching person: " + person.ID + " with URL: " + LOBID_URL + GNDID)
|
||||
request, _ := http.NewRequest("GET", LOBID_URL+GNDID, nil)
|
||||
response, err := http.DefaultClient.Do(request)
|
||||
if err != nil {
|
||||
logging.Error(err, "Error fetching person: "+person.ID)
|
||||
return
|
||||
}
|
||||
defer response.Body.Close()
|
||||
|
||||
if response.StatusCode != http.StatusOK {
|
||||
logging.Error(nil, "Error fetching person: "+person.ID+" with status code: "+response.Status)
|
||||
return
|
||||
}
|
||||
|
||||
body, err := io.ReadAll(response.Body)
|
||||
if err != nil {
|
||||
logging.Error(err, "Error reading response body: "+person.ID)
|
||||
return
|
||||
}
|
||||
|
||||
gndPerson := Person{}
|
||||
if err := json.Unmarshal(body, &gndPerson); err != nil {
|
||||
logging.Error(err, "Error unmarshalling response body: "+person.ID)
|
||||
return
|
||||
}
|
||||
|
||||
gndPerson.KGPZID = person.ID
|
||||
p.Persons.Store(person.ID, gndPerson)
|
||||
}
|
||||
@@ -1,4 +1,56 @@
|
||||
package gnd
|
||||
|
||||
type Person struct {
|
||||
KGPZID string `json:"kgpzid"`
|
||||
URL string `json:"id"`
|
||||
DateOfDeath []string `json:"dateOfDeath"`
|
||||
PlaceOfDeath []Entity `json:"placeOfDeath"`
|
||||
BibliographicalOrHistoricalInformation []string `json:"bibliographicalOrHistoricalInformation"`
|
||||
PreferredName string `json:"preferredName"`
|
||||
GndIdentifier string `json:"gndIdentifier"`
|
||||
Wikipedia []Entity `json:"wikipedia"`
|
||||
Depiction []Picture `json:"depiction"`
|
||||
ProfessionOrOccupation []Entity `json:"professionOrOccupation"`
|
||||
PreferredEntityForThePerson []PersonNameEntity `json:"preferredEntityForThePerson"`
|
||||
DateOfBirth []string `json:"dateOfBirth"`
|
||||
PlaceOfBirth []Entity `json:"placeOfBirth"`
|
||||
VariantNameEntityForThePerson []PersonNameEntity `json:"variantNameEntityForThePerson"`
|
||||
VariantName []string `json:"variantName"`
|
||||
SameAs []CrossReferences `json:"sameAs"`
|
||||
}
|
||||
|
||||
type CrossReferences struct {
|
||||
Items Collection `json:"collection"`
|
||||
ID string `json:"id"`
|
||||
}
|
||||
|
||||
type Collection struct {
|
||||
Abbr string `json:"abbr"`
|
||||
Name string `json:"name"`
|
||||
Publisher string `json:"publisher"`
|
||||
Icon string `json:"icon"`
|
||||
ID string `json:"id"`
|
||||
}
|
||||
|
||||
type Link struct {
|
||||
ID string `json:"id"`
|
||||
Label string `json:"label"`
|
||||
}
|
||||
|
||||
type Picture struct {
|
||||
ID string `json:"id"`
|
||||
URL string `json:"url"`
|
||||
Thumbnail string `json:"thumbnail"`
|
||||
}
|
||||
|
||||
type Entity struct {
|
||||
ID string `json:"id"`
|
||||
Label string `json:"label"`
|
||||
}
|
||||
|
||||
type PersonNameEntity struct {
|
||||
Forename []string `json:"forename"`
|
||||
Surname []string `json:"surname"`
|
||||
PersonalName []string `json:"personalName"`
|
||||
NameAddition []string `json:"nameAddition"`
|
||||
}
|
||||
|
||||
@@ -96,6 +96,7 @@ func (l *Library) Serialize() {
|
||||
wg.Wait()
|
||||
}
|
||||
|
||||
// TODO: make Items into a sync.Map
|
||||
func (p *XMLProvider[T]) Serialize() error {
|
||||
// Introduce goroutine for every path, locking on append:
|
||||
var wg sync.WaitGroup
|
||||
@@ -114,7 +115,6 @@ func (p *XMLProvider[T]) Serialize() error {
|
||||
}
|
||||
wg.Wait()
|
||||
|
||||
fmt.Println(p.Items)
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -127,13 +127,22 @@ func (a *XMLProvider[T]) String() string {
|
||||
func UnmarshalFile[T any](filename string, data *T) error {
|
||||
xmlFile, err := os.Open(filename)
|
||||
if err != nil {
|
||||
logging.Error(err, "Could not deserialize file: "+filename)
|
||||
logging.Error(err, "Could not open file: "+filename)
|
||||
return err
|
||||
}
|
||||
defer xmlFile.Close()
|
||||
logging.Info("Deserialization: " + filename)
|
||||
byteValue, _ := io.ReadAll(xmlFile)
|
||||
xml.Unmarshal(byteValue, data)
|
||||
|
||||
logging.Info("Deserialization: " + filename)
|
||||
byteValue, err := io.ReadAll(xmlFile)
|
||||
if err != nil {
|
||||
logging.Error(err, "Could not read file: "+filename)
|
||||
return err
|
||||
}
|
||||
err = xml.Unmarshal(byteValue, data)
|
||||
|
||||
if err != nil {
|
||||
logging.Error(err, "Could not unmarshal file: "+filename)
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -1 +0,0 @@
|
||||
exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1exit status 1
|
||||
@@ -6,7 +6,6 @@ import (
|
||||
"sort"
|
||||
"strconv"
|
||||
|
||||
"github.com/Theodor-Springmann-Stiftung/kgpz_web/helpers/logging"
|
||||
"github.com/Theodor-Springmann-Stiftung/kgpz_web/providers/xmlprovider"
|
||||
)
|
||||
|
||||
@@ -18,6 +17,35 @@ type YearViewModel struct {
|
||||
Issues IssuesByMonth
|
||||
}
|
||||
|
||||
func YearView(year string, lib *xmlprovider.Library) (*YearViewModel, error) {
|
||||
res := YearViewModel{Year: year}
|
||||
res.Issues = make(IssuesByMonth, 12)
|
||||
last := ""
|
||||
for _, issue := range lib.Issues.Items.Issues {
|
||||
if len(issue.Datum.When) < 4 {
|
||||
continue
|
||||
}
|
||||
|
||||
date := issue.Datum.When[0:4]
|
||||
if date != last {
|
||||
res.PushAvailable(date)
|
||||
last = date
|
||||
}
|
||||
|
||||
if date == year {
|
||||
res.PushIssue(issue)
|
||||
}
|
||||
}
|
||||
|
||||
if len(res.Issues) == 0 {
|
||||
return nil, errors.New("No issues found for year " + year)
|
||||
}
|
||||
|
||||
res.SortAvailableYears()
|
||||
|
||||
return &res, nil
|
||||
}
|
||||
|
||||
func (y *YearViewModel) PushIssue(i xmlprovider.Issue) {
|
||||
iv, err := FromIssue(i)
|
||||
if err != nil {
|
||||
@@ -54,34 +82,3 @@ func (y *YearViewModel) SortAvailableYears() {
|
||||
return iint < jint
|
||||
})
|
||||
}
|
||||
|
||||
func YearView(year string, lib *xmlprovider.Library) (*YearViewModel, error) {
|
||||
res := YearViewModel{Year: year}
|
||||
res.Issues = make(IssuesByMonth, 12)
|
||||
last := ""
|
||||
for _, issue := range lib.Issues.Items.Issues {
|
||||
|
||||
logging.ObjDebug(&issue, "Issue")
|
||||
if len(issue.Datum.When) < 4 {
|
||||
continue
|
||||
}
|
||||
|
||||
date := issue.Datum.When[0:4]
|
||||
if date != last {
|
||||
res.PushAvailable(date)
|
||||
last = date
|
||||
}
|
||||
|
||||
if date == year {
|
||||
res.PushIssue(issue)
|
||||
}
|
||||
}
|
||||
|
||||
if len(res.Issues) == 0 {
|
||||
return nil, errors.New("No issues found")
|
||||
}
|
||||
|
||||
res.SortAvailableYears()
|
||||
|
||||
return &res, nil
|
||||
}
|
||||
|
||||
@@ -12,9 +12,9 @@
|
||||
|
||||
<!-- Issues -->
|
||||
{{ range $issue := $month }}
|
||||
<a href="/{{ $y }}/{{ $issue.Number.Chardata }}">
|
||||
<a href="/{{ $y }}/{{ $issue.Number.No }}">
|
||||
<div>
|
||||
{{ $issue.Number.Chardata }}
|
||||
{{ $issue.Number.No }}
|
||||
</div>
|
||||
<div>
|
||||
{{ index $issue.Weekday 1 }}
|
||||
|
||||
Reference in New Issue
Block a user