mirror of
https://github.com/Theodor-Springmann-Stiftung/kgpz_web.git
synced 2025-10-30 09:35:31 +00:00
Added arrays to XMLProvider types for faster iteration
This commit is contained in:
32
app/kgpz.go
32
app/kgpz.go
@@ -23,9 +23,8 @@ const (
|
|||||||
)
|
)
|
||||||
|
|
||||||
type KGPZ struct {
|
type KGPZ struct {
|
||||||
// LMU is here for file system access
|
|
||||||
lmu sync.Mutex
|
|
||||||
// GMU is only here to prevent concurrent pulls
|
// GMU is only here to prevent concurrent pulls
|
||||||
|
// or file system operations while parsing
|
||||||
gmu sync.Mutex
|
gmu sync.Mutex
|
||||||
Config *providers.ConfigProvider
|
Config *providers.ConfigProvider
|
||||||
Repo *providers.GitProvider
|
Repo *providers.GitProvider
|
||||||
@@ -78,28 +77,23 @@ func (k *KGPZ) Enrich() error {
|
|||||||
k.InitGND()
|
k.InitGND()
|
||||||
}
|
}
|
||||||
|
|
||||||
k.lmu.Lock()
|
|
||||||
defer k.lmu.Unlock()
|
|
||||||
|
|
||||||
if k.Library == nil || k.Library.Agents == nil {
|
if k.Library == nil || k.Library.Agents == nil {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// INFO: We pass agents by value since we don't want to block the library
|
// TODO: Library locking is never needed, since the library items, once set, are never changed
|
||||||
agents := k.Library.Agents.Everything()
|
// We only need to check if set
|
||||||
go func(agents []*xmlprovider.Agent) {
|
go func() {
|
||||||
k.GND.FetchPersons(agents)
|
data := gnd.ProviderIntoDataset(k.Library.Agents)
|
||||||
|
k.GND.FetchPersons(data)
|
||||||
k.GND.WriteCache(k.Config.GNDPath)
|
k.GND.WriteCache(k.Config.GNDPath)
|
||||||
}(agents)
|
}()
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (k *KGPZ) Serialize() {
|
func (k *KGPZ) Serialize() {
|
||||||
// TODO: this is error handling from hell
|
// TODO: this is error handling from hell
|
||||||
// There is no need to recreate the whole library if the paths haven't changed
|
|
||||||
// We do it to keep the old data if the new data is missing
|
|
||||||
|
|
||||||
// Preventing pulling and serializing at the same time
|
// Preventing pulling and serializing at the same time
|
||||||
k.gmu.Lock()
|
k.gmu.Lock()
|
||||||
defer k.gmu.Unlock()
|
defer k.gmu.Unlock()
|
||||||
@@ -115,23 +109,15 @@ func (k *KGPZ) Serialize() {
|
|||||||
pieces, err := getXMLFiles(filepath.Join(k.Config.FolderPath, PIECES_DIR))
|
pieces, err := getXMLFiles(filepath.Join(k.Config.FolderPath, PIECES_DIR))
|
||||||
helpers.Assert(err, "Error getting pieces")
|
helpers.Assert(err, "Error getting pieces")
|
||||||
|
|
||||||
k.lmu.Lock()
|
|
||||||
defer k.lmu.Unlock()
|
|
||||||
if k.Library == nil {
|
if k.Library == nil {
|
||||||
lib := xmlprovider.NewLibrary(
|
k.Library = xmlprovider.NewLibrary(
|
||||||
[]string{filepath.Join(k.Config.FolderPath, AGENTS_PATH)},
|
[]string{filepath.Join(k.Config.FolderPath, AGENTS_PATH)},
|
||||||
[]string{filepath.Join(k.Config.FolderPath, PLACES_PATH)},
|
[]string{filepath.Join(k.Config.FolderPath, PLACES_PATH)},
|
||||||
[]string{filepath.Join(k.Config.FolderPath, WORKS_PATH)},
|
[]string{filepath.Join(k.Config.FolderPath, WORKS_PATH)},
|
||||||
[]string{filepath.Join(k.Config.FolderPath, CATEGORIES_PATH)},
|
[]string{filepath.Join(k.Config.FolderPath, CATEGORIES_PATH)},
|
||||||
*issues,
|
*issues,
|
||||||
*pieces)
|
*pieces)
|
||||||
|
|
||||||
lib.Serialize(commit)
|
|
||||||
|
|
||||||
k.Library = lib
|
|
||||||
} else {
|
} else {
|
||||||
// TODO: where to clear the old data?
|
|
||||||
// How to differentiate between deleted data points and stale data points bc of parse errors?
|
|
||||||
k.Library.SetPaths(
|
k.Library.SetPaths(
|
||||||
[]string{filepath.Join(k.Config.FolderPath, AGENTS_PATH)},
|
[]string{filepath.Join(k.Config.FolderPath, AGENTS_PATH)},
|
||||||
[]string{filepath.Join(k.Config.FolderPath, PLACES_PATH)},
|
[]string{filepath.Join(k.Config.FolderPath, PLACES_PATH)},
|
||||||
@@ -139,8 +125,8 @@ func (k *KGPZ) Serialize() {
|
|||||||
[]string{filepath.Join(k.Config.FolderPath, CATEGORIES_PATH)},
|
[]string{filepath.Join(k.Config.FolderPath, CATEGORIES_PATH)},
|
||||||
*issues,
|
*issues,
|
||||||
*pieces)
|
*pieces)
|
||||||
k.Library.Serialize(commit)
|
|
||||||
}
|
}
|
||||||
|
k.Library.Serialize(commit)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (k *KGPZ) IsDebug() bool {
|
func (k *KGPZ) IsDebug() bool {
|
||||||
|
|||||||
@@ -13,7 +13,6 @@ import (
|
|||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/Theodor-Springmann-Stiftung/kgpz_web/helpers/logging"
|
"github.com/Theodor-Springmann-Stiftung/kgpz_web/helpers/logging"
|
||||||
"github.com/Theodor-Springmann-Stiftung/kgpz_web/providers/xmlprovider"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
@@ -77,7 +76,6 @@ func (p *GNDProvider) readPersons(folder string) error {
|
|||||||
|
|
||||||
func (p *GNDProvider) readPerson(file string) {
|
func (p *GNDProvider) readPerson(file string) {
|
||||||
person := Person{}
|
person := Person{}
|
||||||
// JSON unmarshalling of the file and sanity check:
|
|
||||||
f, err := os.Open(file)
|
f, err := os.Open(file)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
logging.Error(err, "Error opening file for reading: "+file)
|
logging.Error(err, "Error opening file for reading: "+file)
|
||||||
@@ -96,8 +94,8 @@ func (p *GNDProvider) readPerson(file string) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
if person.Agent.GND != "" {
|
if person.KGPZURL != "" {
|
||||||
p.Persons.Store(person.Agent.GND, person)
|
p.Persons.Store(person.KGPZURL, person)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -168,14 +166,14 @@ func (p *GNDProvider) Person(id string) *Person {
|
|||||||
return &pers
|
return &pers
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *GNDProvider) FetchPersons(persons []*xmlprovider.Agent) {
|
func (p *GNDProvider) FetchPersons(persons []GNDData) {
|
||||||
wg := sync.WaitGroup{}
|
wg := sync.WaitGroup{}
|
||||||
for _, person := range persons {
|
for _, person := range persons {
|
||||||
if person.ID == "" || person.GND == "" {
|
if person.ID == "" || person.GND == "" {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
// INFO: person already fetched; check for updates??
|
// TODO: person already fetched; check for updates??
|
||||||
if _, ok := p.Persons.Load(person.GND); ok {
|
if _, ok := p.Persons.Load(person.GND); ok {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
@@ -187,27 +185,27 @@ func (p *GNDProvider) FetchPersons(persons []*xmlprovider.Agent) {
|
|||||||
p.errmu.Unlock()
|
p.errmu.Unlock()
|
||||||
|
|
||||||
wg.Add(1)
|
wg.Add(1)
|
||||||
go func(person *xmlprovider.Agent) {
|
go func(person *GNDData) {
|
||||||
defer wg.Done()
|
defer wg.Done()
|
||||||
p.fetchPerson(*person)
|
p.fetchPerson(person.ID, person.GND)
|
||||||
}(person)
|
}(&person)
|
||||||
}
|
}
|
||||||
wg.Wait()
|
wg.Wait()
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *GNDProvider) fetchPerson(person xmlprovider.Agent) {
|
func (p *GNDProvider) fetchPerson(ID, GND string) {
|
||||||
SPLITURL := strings.Split(person.GND, "/")
|
SPLITURL := strings.Split(GND, "/")
|
||||||
if len(SPLITURL) < 2 {
|
if len(SPLITURL) < 2 {
|
||||||
logging.Error(nil, "Error parsing GND ID from: "+person.GND)
|
logging.Error(nil, "Error parsing GND ID from: "+GND)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
GNDID := SPLITURL[len(SPLITURL)-1]
|
GNDID := SPLITURL[len(SPLITURL)-1]
|
||||||
|
|
||||||
logging.Debug("Fetching person: " + person.ID + " with URL: " + LOBID_URL + GNDID)
|
logging.Debug("Fetching person: " + ID + " with URL: " + LOBID_URL + GNDID)
|
||||||
request, err := http.NewRequest("GET", LOBID_URL+GNDID, nil)
|
request, err := http.NewRequest("GET", LOBID_URL+GNDID, nil)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
logging.Error(err, "Error creating request: "+person.ID)
|
logging.Error(err, "Error creating request: "+ID)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -218,17 +216,17 @@ func (p *GNDProvider) fetchPerson(person xmlprovider.Agent) {
|
|||||||
response, err = http.DefaultClient.Do(request)
|
response, err = http.DefaultClient.Do(request)
|
||||||
if err == nil && response.StatusCode < 400 {
|
if err == nil && response.StatusCode < 400 {
|
||||||
if i > 0 {
|
if i > 0 {
|
||||||
logging.Info("Successfully fetched person: " + person.ID + " after " + strconv.Itoa(i) + " retries")
|
logging.Info("Successfully fetched person: " + ID + " after " + strconv.Itoa(i) + " retries")
|
||||||
}
|
}
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
|
|
||||||
time.Sleep(time.Duration(i+1) * time.Second)
|
time.Sleep(time.Duration(i+1) * time.Second)
|
||||||
logging.Error(err, "Retry fetching person: "+person.ID)
|
logging.Error(err, "Retry fetching person: "+ID)
|
||||||
}
|
}
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
logging.Error(err, "Error fetching person: "+person.ID)
|
logging.Error(err, "Error fetching person: "+ID)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -237,29 +235,29 @@ func (p *GNDProvider) fetchPerson(person xmlprovider.Agent) {
|
|||||||
if response.StatusCode != http.StatusOK {
|
if response.StatusCode != http.StatusOK {
|
||||||
if response.StatusCode < 500 {
|
if response.StatusCode < 500 {
|
||||||
p.errmu.Lock()
|
p.errmu.Lock()
|
||||||
p.errs[person.GND] = response.StatusCode
|
p.errs[GND] = response.StatusCode
|
||||||
p.errmu.Unlock()
|
p.errmu.Unlock()
|
||||||
}
|
}
|
||||||
logging.Error(errors.New("Error fetching person: " + person.ID + " with status code: " + http.StatusText(response.StatusCode)))
|
logging.Error(errors.New("Error fetching person: " + ID + " with status code: " + http.StatusText(response.StatusCode)))
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
body, err := io.ReadAll(response.Body)
|
body, err := io.ReadAll(response.Body)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
logging.Error(err, "Error reading response body: "+person.ID)
|
logging.Error(err, "Error reading response body: "+ID)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// For debug purposes: Write response body to file:
|
// For debug purposes: Write response body to file:
|
||||||
// os.WriteFile("gnd_responses/"+person.ID+".json", body, 0644)
|
// os.WriteFile("gnd_responses/"+ID+".json", body, 0644)
|
||||||
|
|
||||||
gndPerson := Person{}
|
gndPerson := Person{}
|
||||||
if err := json.Unmarshal(body, &gndPerson); err != nil {
|
if err := json.Unmarshal(body, &gndPerson); err != nil {
|
||||||
logging.Error(err, "Error unmarshalling response body: "+person.ID)
|
logging.Error(err, "Error unmarshalling response body: "+ID)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
gndPerson.KGPZID = person.ID
|
gndPerson.KGPZID = ID
|
||||||
gndPerson.Agent = person
|
gndPerson.KGPZURL = GND
|
||||||
p.Persons.Store(person.GND, gndPerson)
|
p.Persons.Store(GND, gndPerson)
|
||||||
}
|
}
|
||||||
|
|||||||
17
providers/gnd/helpers.go
Normal file
17
providers/gnd/helpers.go
Normal file
@@ -0,0 +1,17 @@
|
|||||||
|
package gnd
|
||||||
|
|
||||||
|
import "github.com/Theodor-Springmann-Stiftung/kgpz_web/providers/xmlprovider"
|
||||||
|
|
||||||
|
type GNDData struct {
|
||||||
|
ID, GND string
|
||||||
|
}
|
||||||
|
|
||||||
|
func ProviderIntoDataset(provider *xmlprovider.XMLProvider[xmlprovider.Agent]) []GNDData {
|
||||||
|
provider.Lock()
|
||||||
|
defer provider.Unlock()
|
||||||
|
var data []GNDData
|
||||||
|
for _, agent := range provider.Array {
|
||||||
|
data = append(data, GNDData{ID: agent.ID, GND: agent.GND})
|
||||||
|
}
|
||||||
|
return data
|
||||||
|
}
|
||||||
@@ -2,13 +2,11 @@ package gnd
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
|
|
||||||
"github.com/Theodor-Springmann-Stiftung/kgpz_web/providers/xmlprovider"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
type Person struct {
|
type Person struct {
|
||||||
KGPZID string `json:"kgpzid"`
|
KGPZID string `json:"kgpzid"`
|
||||||
Agent xmlprovider.Agent `json:"agent"`
|
KGPZURL string `json:"kgpzurl"`
|
||||||
URL string `json:"id,omitempty"`
|
URL string `json:"id,omitempty"`
|
||||||
DateOfBirth []string `json:"dateOfBirth,omitempty"`
|
DateOfBirth []string `json:"dateOfBirth,omitempty"`
|
||||||
PlaceOfBirth []Entity `json:"placeOfBirth,omitempty"`
|
PlaceOfBirth []Entity `json:"placeOfBirth,omitempty"`
|
||||||
|
|||||||
32
providers/xmlprovider/helpers.go
Normal file
32
providers/xmlprovider/helpers.go
Normal file
@@ -0,0 +1,32 @@
|
|||||||
|
package xmlprovider
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/xml"
|
||||||
|
"io"
|
||||||
|
"os"
|
||||||
|
|
||||||
|
"github.com/Theodor-Springmann-Stiftung/kgpz_web/helpers/logging"
|
||||||
|
)
|
||||||
|
|
||||||
|
func UnmarshalFile[T any](filename string, data T) error {
|
||||||
|
xmlFile, err := os.Open(filename)
|
||||||
|
if err != nil {
|
||||||
|
logging.Error(err, "Could not open file: "+filename)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
defer xmlFile.Close()
|
||||||
|
|
||||||
|
logging.Info("Deserialization: " + filename)
|
||||||
|
byteValue, err := io.ReadAll(xmlFile)
|
||||||
|
if err != nil {
|
||||||
|
logging.Error(err, "Could not read file: "+filename)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
err = xml.Unmarshal(byteValue, &data)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
logging.Error(err, "Could not unmarshal file: "+filename)
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
@@ -29,7 +29,11 @@ type Additional struct {
|
|||||||
Bis int `xml:"bis"`
|
Bis int `xml:"bis"`
|
||||||
}
|
}
|
||||||
|
|
||||||
func (i Issue) GetIDs() []string {
|
func (i Issue) Keys() []string {
|
||||||
|
if len(i.keys) > 0 {
|
||||||
|
return i.keys
|
||||||
|
}
|
||||||
|
|
||||||
res := make([]string, 2)
|
res := make([]string, 2)
|
||||||
date := i.Datum.When
|
date := i.Datum.When
|
||||||
if date != "" {
|
if date != "" {
|
||||||
@@ -40,6 +44,8 @@ func (i Issue) GetIDs() []string {
|
|||||||
res = append(res, i.Datum.When[0:4]+"-"+strconv.Itoa(i.Number.No))
|
res = append(res, i.Datum.When[0:4]+"-"+strconv.Itoa(i.Number.No))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
i.keys = res
|
||||||
|
|
||||||
return res
|
return res
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -4,3 +4,7 @@ type ItemInfo struct {
|
|||||||
Source string
|
Source string
|
||||||
Parse *ParseMeta
|
Parse *ParseMeta
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type KeyedItem struct {
|
||||||
|
keys []string
|
||||||
|
}
|
||||||
|
|||||||
@@ -6,6 +6,7 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
type Library struct {
|
type Library struct {
|
||||||
|
amu sync.Mutex
|
||||||
Agents *XMLProvider[Agent]
|
Agents *XMLProvider[Agent]
|
||||||
Places *XMLProvider[Place]
|
Places *XMLProvider[Place]
|
||||||
Works *XMLProvider[Work]
|
Works *XMLProvider[Work]
|
||||||
@@ -19,6 +20,7 @@ func (l *Library) String() string {
|
|||||||
l.Agents.String(), l.Places.String(), l.Works.String(), l.Categories.String(), l.Issues.String(), l.Pieces.String())
|
l.Agents.String(), l.Places.String(), l.Works.String(), l.Categories.String(), l.Issues.String(), l.Pieces.String())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// INFO: this is the only place where the providers are created. There is no need for locking on access.
|
||||||
func NewLibrary(agentpaths, placepaths, workpaths, categorypaths, issuepaths, piecepaths []string) *Library {
|
func NewLibrary(agentpaths, placepaths, workpaths, categorypaths, issuepaths, piecepaths []string) *Library {
|
||||||
return &Library{
|
return &Library{
|
||||||
Agents: &XMLProvider[Agent]{Paths: agentpaths},
|
Agents: &XMLProvider[Agent]{Paths: agentpaths},
|
||||||
@@ -31,6 +33,8 @@ func NewLibrary(agentpaths, placepaths, workpaths, categorypaths, issuepaths, pi
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (l *Library) SetPaths(agentpaths, placepaths, workpaths, categorypaths, issuepaths, piecepaths []string) {
|
func (l *Library) SetPaths(agentpaths, placepaths, workpaths, categorypaths, issuepaths, piecepaths []string) {
|
||||||
|
l.amu.Lock()
|
||||||
|
defer l.amu.Unlock()
|
||||||
l.Agents.Paths = agentpaths
|
l.Agents.Paths = agentpaths
|
||||||
l.Places.Paths = placepaths
|
l.Places.Paths = placepaths
|
||||||
l.Works.Paths = workpaths
|
l.Works.Paths = workpaths
|
||||||
@@ -93,14 +97,9 @@ func (l *Library) Serialize(commit string) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
wg.Wait()
|
wg.Wait()
|
||||||
|
|
||||||
go func() {
|
|
||||||
l.Cleanup()
|
l.Cleanup()
|
||||||
}()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: Prepare resets the list of failed parses for a new parse.
|
|
||||||
// We need to set the logs accordingly.
|
|
||||||
func (l *Library) Prepare(commit string) {
|
func (l *Library) Prepare(commit string) {
|
||||||
l.Agents.Prepare(commit)
|
l.Agents.Prepare(commit)
|
||||||
l.Places.Prepare(commit)
|
l.Places.Prepare(commit)
|
||||||
@@ -111,10 +110,38 @@ func (l *Library) Prepare(commit string) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (l *Library) Cleanup() {
|
func (l *Library) Cleanup() {
|
||||||
|
wg := sync.WaitGroup{}
|
||||||
|
wg.Add(6)
|
||||||
|
|
||||||
|
go func() {
|
||||||
l.Agents.Cleanup()
|
l.Agents.Cleanup()
|
||||||
|
wg.Done()
|
||||||
|
}()
|
||||||
|
|
||||||
|
go func() {
|
||||||
l.Places.Cleanup()
|
l.Places.Cleanup()
|
||||||
|
wg.Done()
|
||||||
|
}()
|
||||||
|
|
||||||
|
go func() {
|
||||||
l.Works.Cleanup()
|
l.Works.Cleanup()
|
||||||
|
wg.Done()
|
||||||
|
}()
|
||||||
|
|
||||||
|
go func() {
|
||||||
l.Categories.Cleanup()
|
l.Categories.Cleanup()
|
||||||
|
wg.Done()
|
||||||
|
}()
|
||||||
|
|
||||||
|
go func() {
|
||||||
l.Issues.Cleanup()
|
l.Issues.Cleanup()
|
||||||
|
wg.Done()
|
||||||
|
}()
|
||||||
|
|
||||||
|
go func() {
|
||||||
l.Pieces.Cleanup()
|
l.Pieces.Cleanup()
|
||||||
|
wg.Done()
|
||||||
|
}()
|
||||||
|
|
||||||
|
wg.Wait()
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -28,7 +28,11 @@ func (p Piece) String() string {
|
|||||||
return fmt.Sprintf("ID: %s\nIssueRefs: %v\nPlaceRefs: %v\nCategoryRefs: %v\nAgentRefs: %v\nWorkRefs: %v\nPieceRefs: %v\nAdditionalRef: %v\nIncipit: %v\nTitle: %v\nAnnotations: %v\nNotes: %v\n", p.ID, p.IssueRefs, p.PlaceRefs, p.CategoryRefs, p.AgentRefs, p.WorkRefs, p.PieceRefs, p.AdditionalRef, p.Incipit, p.Title, p.Annotations, p.Notes)
|
return fmt.Sprintf("ID: %s\nIssueRefs: %v\nPlaceRefs: %v\nCategoryRefs: %v\nAgentRefs: %v\nWorkRefs: %v\nPieceRefs: %v\nAdditionalRef: %v\nIncipit: %v\nTitle: %v\nAnnotations: %v\nNotes: %v\n", p.ID, p.IssueRefs, p.PlaceRefs, p.CategoryRefs, p.AgentRefs, p.WorkRefs, p.PieceRefs, p.AdditionalRef, p.Incipit, p.Title, p.Annotations, p.Notes)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p Piece) GetIDs() []string {
|
func (p Piece) Keys() []string {
|
||||||
|
if len(p.keys) > 0 {
|
||||||
|
return p.keys
|
||||||
|
}
|
||||||
|
|
||||||
ret := make([]string, 2)
|
ret := make([]string, 2)
|
||||||
if p.ID != "" {
|
if p.ID != "" {
|
||||||
ret = append(ret, p.ID)
|
ret = append(ret, p.ID)
|
||||||
@@ -44,6 +48,9 @@ func (p Piece) GetIDs() []string {
|
|||||||
for _, i := range p.AdditionalRef {
|
for _, i := range p.AdditionalRef {
|
||||||
ret = append(ret, i.Datum+"-"+strconv.Itoa(i.Nr)+"-b-"+uid.String())
|
ret = append(ret, i.Datum+"-"+strconv.Itoa(i.Nr)+"-b-"+uid.String())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
p.keys = ret
|
||||||
|
|
||||||
return ret
|
return ret
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -37,10 +37,15 @@ type Note struct {
|
|||||||
|
|
||||||
type Identifier struct {
|
type Identifier struct {
|
||||||
ID string `xml:"id,attr"`
|
ID string `xml:"id,attr"`
|
||||||
|
KeyedItem
|
||||||
}
|
}
|
||||||
|
|
||||||
func (i Identifier) GetIDs() []string {
|
func (i Identifier) Keys() []string {
|
||||||
return []string{i.ID}
|
if len(i.keys) > 0 {
|
||||||
|
return i.keys
|
||||||
|
}
|
||||||
|
i.keys = []string{i.ID}
|
||||||
|
return i.keys
|
||||||
}
|
}
|
||||||
|
|
||||||
type Reference struct {
|
type Reference struct {
|
||||||
|
|||||||
@@ -1,10 +1,7 @@
|
|||||||
package xmlprovider
|
package xmlprovider
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"encoding/xml"
|
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
|
||||||
"os"
|
|
||||||
"slices"
|
"slices"
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
@@ -19,45 +16,40 @@ type ParseMeta struct {
|
|||||||
|
|
||||||
type XMLItem interface {
|
type XMLItem interface {
|
||||||
fmt.Stringer
|
fmt.Stringer
|
||||||
GetIDs() []string
|
Keys() []string
|
||||||
}
|
}
|
||||||
|
|
||||||
// An XMLProvider is a struct that holds holds serialized XML data of a specific type. It combines multiple parses IF a succeeded parse can not serialize the data from a path.
|
// An XMLProvider is a struct that holds holds serialized XML data of a specific type. It combines multiple parses IF a succeeded parse can not serialize the data from a path.
|
||||||
type XMLProvider[T XMLItem] struct {
|
type XMLProvider[T XMLItem] struct {
|
||||||
Paths []string
|
Paths []string
|
||||||
// INFO: map is type [string]*T
|
// INFO: map is type map[string]*T
|
||||||
Items sync.Map
|
Items sync.Map
|
||||||
// INFO: map is type [string]ItemInfo
|
// INFO: map is type [string]ItemInfo
|
||||||
// It keeps information about parsing status of the items.
|
// It keeps information about parsing status of the items.
|
||||||
Infos sync.Map
|
Infos sync.Map
|
||||||
|
|
||||||
mu sync.Mutex
|
mu sync.Mutex
|
||||||
// TODO: This is not populated yet
|
// TODO: This array is meant to be for iteration purposes, since iteration over the sync.Map is slow.
|
||||||
|
// It is best for this array to be sorted by key of the corresponding item.
|
||||||
Array []T
|
Array []T
|
||||||
|
Previous []T
|
||||||
failed []string
|
failed []string
|
||||||
parses []ParseMeta
|
parses []ParseMeta
|
||||||
}
|
}
|
||||||
|
|
||||||
// INFO: To parse sth, we call Prepare, then Serialize, then Cleanup.
|
// INFO: To parse sth, we call Prepare, then Serialize, then Cleanup.
|
||||||
// Serialize can be called concurretly.
|
// Prepare & Cleanup are called once per parse. Serialize is called for every path.
|
||||||
|
// and can be called concurretly.
|
||||||
func (p *XMLProvider[T]) Prepare(commit string) {
|
func (p *XMLProvider[T]) Prepare(commit string) {
|
||||||
p.mu.Lock()
|
p.mu.Lock()
|
||||||
defer p.mu.Unlock()
|
defer p.mu.Unlock()
|
||||||
|
p.Previous = p.Array
|
||||||
|
p.Array = make([]T, len(p.Previous))
|
||||||
p.failed = make([]string, 0)
|
p.failed = make([]string, 0)
|
||||||
p.parses = append(p.parses, ParseMeta{Commit: commit, Date: time.Now()})
|
p.parses = append(p.parses, ParseMeta{Commit: commit, Date: time.Now()})
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *XMLProvider[T]) Serialize(dataholder XMLRootElement[T], path string) error {
|
func (p *XMLProvider[T]) Serialize(dataholder XMLRootElement[T], path string) error {
|
||||||
if len(p.parses) == 0 {
|
|
||||||
logging.Error(fmt.Errorf("No commit set"), "No commit set")
|
|
||||||
return fmt.Errorf("No commit set")
|
|
||||||
}
|
|
||||||
|
|
||||||
p.mu.Lock()
|
|
||||||
commit := &p.parses[len(p.parses)-1]
|
|
||||||
p.mu.Unlock()
|
|
||||||
|
|
||||||
// Introduce goroutine for every path, locking on append:
|
|
||||||
if err := UnmarshalFile(path, dataholder); err != nil {
|
if err := UnmarshalFile(path, dataholder); err != nil {
|
||||||
logging.Error(err, "Could not unmarshal file: "+path)
|
logging.Error(err, "Could not unmarshal file: "+path)
|
||||||
logging.ParseMessages.LogError(logging.Unknown, path, "", "Could not unmarshal file.")
|
logging.ParseMessages.LogError(logging.Unknown, path, "", "Could not unmarshal file.")
|
||||||
@@ -67,9 +59,18 @@ func (p *XMLProvider[T]) Serialize(dataholder XMLRootElement[T], path string) er
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
p.mu.Lock()
|
||||||
|
if len(p.parses) == 0 {
|
||||||
|
logging.Error(fmt.Errorf("No commit set"), "No commit set")
|
||||||
|
return fmt.Errorf("No commit set")
|
||||||
|
}
|
||||||
|
commit := &p.parses[len(p.parses)-1]
|
||||||
|
p.Array = append(p.Array, dataholder.Children()...)
|
||||||
|
p.mu.Unlock()
|
||||||
|
|
||||||
for _, item := range dataholder.Children() {
|
for _, item := range dataholder.Children() {
|
||||||
// INFO: Mostly it's just one ID, so the double loop is not that bad.
|
// INFO: Mostly it's just one ID, so the double loop is not that bad.
|
||||||
for _, id := range item.GetIDs() {
|
for _, id := range item.Keys() {
|
||||||
p.Infos.Store(id, ItemInfo{Source: path, Parse: commit})
|
p.Infos.Store(id, ItemInfo{Source: path, Parse: commit})
|
||||||
p.Items.Store(id, &item)
|
p.Items.Store(id, &item)
|
||||||
}
|
}
|
||||||
@@ -92,11 +93,20 @@ func (p *XMLProvider[T]) Cleanup() {
|
|||||||
|
|
||||||
lastcommit := &p.parses[len(p.parses)-1]
|
lastcommit := &p.parses[len(p.parses)-1]
|
||||||
todelete := make([]string, 0)
|
todelete := make([]string, 0)
|
||||||
|
toappend := make([]*T, 0)
|
||||||
p.Infos.Range(func(key, value interface{}) bool {
|
p.Infos.Range(func(key, value interface{}) bool {
|
||||||
info := value.(ItemInfo)
|
info := value.(ItemInfo)
|
||||||
if info.Parse != lastcommit {
|
if info.Parse != lastcommit {
|
||||||
if !slices.Contains(p.failed, info.Source) {
|
if !slices.Contains(p.failed, info.Source) {
|
||||||
todelete = append(todelete, key.(string))
|
todelete = append(todelete, key.(string))
|
||||||
|
} else {
|
||||||
|
item, ok := p.Items.Load(key)
|
||||||
|
if ok {
|
||||||
|
i := item.(*T)
|
||||||
|
if !slices.Contains(toappend, i) {
|
||||||
|
toappend = append(toappend, i)
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return true
|
return true
|
||||||
@@ -106,6 +116,10 @@ func (p *XMLProvider[T]) Cleanup() {
|
|||||||
p.Infos.Delete(key)
|
p.Infos.Delete(key)
|
||||||
p.Items.Delete(key)
|
p.Items.Delete(key)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
for _, item := range toappend {
|
||||||
|
p.Array = append(p.Array, *item)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (a *XMLProvider[T]) String() string {
|
func (a *XMLProvider[T]) String() string {
|
||||||
@@ -118,29 +132,6 @@ func (a *XMLProvider[T]) String() string {
|
|||||||
return s
|
return s
|
||||||
}
|
}
|
||||||
|
|
||||||
func UnmarshalFile[T any](filename string, data T) error {
|
|
||||||
xmlFile, err := os.Open(filename)
|
|
||||||
if err != nil {
|
|
||||||
logging.Error(err, "Could not open file: "+filename)
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
defer xmlFile.Close()
|
|
||||||
|
|
||||||
logging.Info("Deserialization: " + filename)
|
|
||||||
byteValue, err := io.ReadAll(xmlFile)
|
|
||||||
if err != nil {
|
|
||||||
logging.Error(err, "Could not read file: "+filename)
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
err = xml.Unmarshal(byteValue, &data)
|
|
||||||
|
|
||||||
if err != nil {
|
|
||||||
logging.Error(err, "Could not unmarshal file: "+filename)
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (p *XMLProvider[T]) Item(id string) *T {
|
func (p *XMLProvider[T]) Item(id string) *T {
|
||||||
item, ok := p.Items.Load(id)
|
item, ok := p.Items.Load(id)
|
||||||
if !ok {
|
if !ok {
|
||||||
@@ -173,15 +164,10 @@ func (p *XMLProvider[T]) FindKey(fn func(string) bool) []*T {
|
|||||||
return items
|
return items
|
||||||
}
|
}
|
||||||
|
|
||||||
// INFO: Do not use this, except when iterating over a collection multiple times (three times or more).
|
func (p *XMLProvider[T]) Lock() {
|
||||||
// Maps are slow to iterate, but many of the Iterations can only be done once, so it doesn´t matter for a
|
p.mu.Lock()
|
||||||
// few thousand objects. We prefer to lookup objects by key and have multiple meaningful keys; along with
|
}
|
||||||
// sensible caching rules to keep the application responsive.
|
|
||||||
func (p *XMLProvider[T]) Everything() []*T {
|
func (p *XMLProvider[T]) Unlock() {
|
||||||
var items []*T
|
p.mu.Unlock()
|
||||||
p.Items.Range(func(key, value interface{}) bool {
|
|
||||||
items = append(items, value.(*T))
|
|
||||||
return true
|
|
||||||
})
|
|
||||||
return items
|
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user