Better reverse link resoving

This commit is contained in:
Simon Martens
2025-01-01 23:49:09 +01:00
parent 7539a2dca7
commit 3f294680c0
13 changed files with 259 additions and 99 deletions

View File

@@ -1,8 +1,8 @@
package xmlmodels
import (
"encoding/json"
"encoding/xml"
"fmt"
)
type Agent struct {
@@ -16,6 +16,11 @@ type Agent struct {
AnnotationNote
}
func (a Agent) String() string {
return fmt.Sprintf("ID: %s\nNames: %v\nSortName: %s\nLife: %s\nGND: %s\nAnnotations: %v\nNotes: %v\n", a.ID, a.Names, a.SortName, a.Life, a.GND, a.Annotations, a.Notes)
func (a Agent) Name() string {
return "agent"
}
func (a Agent) String() string {
data, _ := json.MarshalIndent(a, "", " ")
return string(data)
}

View File

@@ -1,8 +1,8 @@
package xmlmodels
import (
"encoding/json"
"encoding/xml"
"fmt"
)
type Category struct {
@@ -13,6 +13,11 @@ type Category struct {
AnnotationNote
}
func (c Category) String() string {
return fmt.Sprintf("ID: %s\nNames: %v\nSortName: %s\nAnnotations: %v\nNotes: %v\n", c.ID, c.Names, c.SortName, c.Annotations, c.Notes)
func (c Category) Name() string {
return "category"
}
func (c Category) String() string {
data, _ := json.MarshalIndent(c, "", " ")
return string(data)
}

View File

@@ -55,10 +55,9 @@ type Identifier struct {
}
func (i Identifier) Keys() []string {
if len(i.keys) > 0 {
return i.keys
if len(i.keys) == 0 {
i.keys = append(i.keys, i.ID)
}
i.keys = []string{i.ID}
return i.keys
}

View File

@@ -5,16 +5,6 @@ import (
"github.com/Theodor-Springmann-Stiftung/kgpz_web/providers/xmlprovider"
)
const (
AGENTS_PATH = "XML/akteure.xml"
PLACES_PATH = "XML/orte.xml"
WORKS_PATH = "XML/werke.xml"
CATEGORIES_PATH = "XML/kategorien.xml"
ISSUES_DIR = "XML/stuecke/"
PIECES_DIR = "XML/beitraege/"
)
func AgentsIntoDataset(provider *xmlprovider.XMLProvider[Agent]) []gnd.GNDData {
provider.Lock()
defer provider.Unlock()

View File

@@ -1,8 +1,8 @@
package xmlmodels
import (
"encoding/json"
"encoding/xml"
"fmt"
"strconv"
)
@@ -29,6 +29,10 @@ type Additional struct {
Bis int `xml:"bis"`
}
func (i Issue) Name() string {
return "issue"
}
func (i Issue) Keys() []string {
if len(i.keys) > 0 {
return i.keys
@@ -55,5 +59,6 @@ func (i Issue) Reference() string {
}
func (i Issue) String() string {
return fmt.Sprintf("Number: %v, Datum: %v, Von: %d, Bis: %d, Additionals: %v, Identifier: %v, AnnotationNote: %v\n", i.Number, i.Datum, i.Von, i.Bis, i.Additionals, i.Identifier, i.AnnotationNote)
data, _ := json.MarshalIndent(i, "", " ")
return string(data)
}

View File

@@ -3,13 +3,26 @@ package xmlmodels
import (
"fmt"
"path/filepath"
"strings"
"sync"
"time"
"github.com/Theodor-Springmann-Stiftung/kgpz_web/providers/xmlprovider"
)
const (
AGENTS_PATH = "XML/akteure.xml"
PLACES_PATH = "XML/orte.xml"
WORKS_PATH = "XML/werke.xml"
CATEGORIES_PATH = "XML/kategorien.xml"
ISSUES_DIR = "XML/stuecke/"
PIECES_DIR = "XML/beitraege/"
)
type Library struct {
baseDir string
mu sync.Mutex
Parses []xmlprovider.ParseMeta
Agents *xmlprovider.XMLProvider[Agent]
Places *xmlprovider.XMLProvider[Place]
@@ -25,9 +38,8 @@ func (l *Library) String() string {
}
// INFO: this is the only place where the providers are created. There is no need for locking on access.
func NewLibrary(basedir string) *Library {
func NewLibrary() *Library {
return &Library{
baseDir: basedir,
Agents: &xmlprovider.XMLProvider[Agent]{},
Places: &xmlprovider.XMLProvider[Place]{},
Works: &xmlprovider.XMLProvider[Work]{},
@@ -37,97 +49,149 @@ func NewLibrary(basedir string) *Library {
}
}
func (l *Library) Serialize(commit string) {
func (l *Library) Parse(source xmlprovider.ParseSource, baseDir, commit string) error {
// INFO: this lock prevents multiple parses from happening at the same time.
l.mu.Lock()
defer l.mu.Unlock()
wg := sync.WaitGroup{}
meta := xmlprovider.ParseMeta{
Source: source,
BaseDir: baseDir,
Commit: commit,
Date: time.Now(),
}
metamu := sync.Mutex{}
l.Prepare(commit)
l.prepare()
wg.Add(1)
go func() {
l.Places.Serialize(&PlaceRoot{}, filepath.Join(l.baseDir, PLACES_PATH))
err := l.Places.Serialize(&PlaceRoot{}, filepath.Join(meta.BaseDir, PLACES_PATH), meta)
if err != nil {
metamu.Lock()
meta.FailedPaths = append(meta.FailedPaths, filepath.Join(meta.BaseDir, PLACES_PATH))
metamu.Unlock()
}
wg.Done()
}()
wg.Add(1)
go func() {
l.Agents.Serialize(&AgentRoot{}, filepath.Join(l.baseDir, AGENTS_PATH))
err := l.Agents.Serialize(&AgentRoot{}, filepath.Join(meta.BaseDir, AGENTS_PATH), meta)
if err != nil {
metamu.Lock()
meta.FailedPaths = append(meta.FailedPaths, filepath.Join(meta.BaseDir, AGENTS_PATH))
metamu.Unlock()
}
wg.Done()
}()
wg.Add(1)
go func() {
l.Categories.Serialize(&CategoryRoot{}, filepath.Join(l.baseDir, CATEGORIES_PATH))
err := l.Categories.Serialize(&CategoryRoot{}, filepath.Join(meta.BaseDir, CATEGORIES_PATH), meta)
if err != nil {
metamu.Lock()
meta.FailedPaths = append(meta.FailedPaths, filepath.Join(meta.BaseDir, CATEGORIES_PATH))
metamu.Unlock()
}
wg.Done()
}()
wg.Add(1)
go func() {
l.Works.Serialize(&WorkRoot{}, filepath.Join(l.baseDir, WORKS_PATH))
err := l.Works.Serialize(&WorkRoot{}, filepath.Join(meta.BaseDir, WORKS_PATH), meta)
if err != nil {
metamu.Lock()
meta.FailedPaths = append(meta.FailedPaths, filepath.Join(meta.BaseDir, WORKS_PATH))
metamu.Unlock()
}
wg.Done()
}()
issuepaths, _ := xmlprovider.XMLFilesForPath(filepath.Join(l.baseDir, ISSUES_DIR))
issuepaths, _ := xmlprovider.XMLFilesForPath(filepath.Join(meta.BaseDir, ISSUES_DIR))
for _, path := range issuepaths {
wg.Add(1)
go func() {
l.Issues.Serialize(&IssueRoot{}, path)
err := l.Issues.Serialize(&IssueRoot{}, path, meta)
if err != nil {
metamu.Lock()
meta.FailedPaths = append(meta.FailedPaths, path)
metamu.Unlock()
}
wg.Done()
}()
}
piecepaths, _ := xmlprovider.XMLFilesForPath(filepath.Join(l.baseDir, PIECES_DIR))
piecepaths, _ := xmlprovider.XMLFilesForPath(filepath.Join(meta.BaseDir, PIECES_DIR))
for _, path := range piecepaths {
wg.Add(1)
go func() {
l.Pieces.Serialize(&PieceRoot{}, path)
err := l.Pieces.Serialize(&PieceRoot{}, path, meta)
if err != nil {
metamu.Lock()
meta.FailedPaths = append(meta.FailedPaths, path)
metamu.Unlock()
}
wg.Done()
}()
}
wg.Wait()
l.Cleanup()
l.cleanup(meta)
l.Parses = append(l.Parses, meta)
var errors []string
if len(meta.FailedPaths) > 0 {
errors = append(errors, fmt.Sprintf("Failed paths: %v", meta.FailedPaths))
}
if len(errors) > 0 {
return fmt.Errorf("Parsing encountered errors: %v", strings.Join(errors, "; "))
}
return nil
}
func (l *Library) Prepare(commit string) {
l.Agents.Prepare(commit)
l.Places.Prepare(commit)
l.Works.Prepare(commit)
l.Categories.Prepare(commit)
l.Issues.Prepare(commit)
l.Pieces.Prepare(commit)
func (l *Library) prepare() {
l.Agents.Prepare()
l.Places.Prepare()
l.Works.Prepare()
l.Categories.Prepare()
l.Issues.Prepare()
l.Pieces.Prepare()
}
func (l *Library) Cleanup() {
func (l *Library) cleanup(meta xmlprovider.ParseMeta) {
wg := sync.WaitGroup{}
wg.Add(6)
go func() {
l.Agents.Cleanup()
l.Agents.Cleanup(meta)
wg.Done()
}()
go func() {
l.Places.Cleanup()
l.Places.Cleanup(meta)
wg.Done()
}()
go func() {
l.Works.Cleanup()
l.Works.Cleanup(meta)
wg.Done()
}()
go func() {
l.Categories.Cleanup()
l.Categories.Cleanup(meta)
wg.Done()
}()
go func() {
l.Issues.Cleanup()
l.Issues.Cleanup(meta)
wg.Done()
}()
go func() {
l.Pieces.Cleanup()
l.Pieces.Cleanup(meta)
wg.Done()
}()

View File

@@ -1,8 +1,8 @@
package xmlmodels
import (
"encoding/json"
"encoding/xml"
"fmt"
"strconv"
"strings"
@@ -24,8 +24,13 @@ type Piece struct {
AnnotationNote
}
func (p Piece) Name() string {
return "piece"
}
func (p Piece) String() string {
return fmt.Sprintf("ID: %s\nIssueRefs: %v\nPlaceRefs: %v\nCategoryRefs: %v\nAgentRefs: %v\nWorkRefs: %v\nPieceRefs: %v\nIncipit: %v\nTitle: %v\nAnnotations: %v\nNotes: %v\n", p.ID, p.IssueRefs, p.PlaceRefs, p.CategoryRefs, p.AgentRefs, p.WorkRefs, p.PieceRefs, p.Incipit, p.Title, p.Annotations, p.Notes)
data, _ := json.MarshalIndent(p, "", " ")
return string(data)
}
func (p Piece) Keys() []string {

View File

@@ -1,8 +1,8 @@
package xmlmodels
import (
"encoding/json"
"encoding/xml"
"fmt"
)
type Place struct {
@@ -14,6 +14,11 @@ type Place struct {
AnnotationNote
}
func (p Place) String() string {
return fmt.Sprintf("ID: %s\nNames: %v\nSortName: %s\nGeo: %s\nAnnotations: %v\nNotes: %v\n", p.ID, p.Names, p.SortName, p.Geo, p.Annotations, p.Notes)
func (p Place) Name() string {
return "place"
}
func (p Place) String() string {
data, _ := json.MarshalIndent(p, "", " ")
return string(data)
}

View File

@@ -1,8 +1,8 @@
package xmlmodels
import (
"encoding/json"
"encoding/xml"
"fmt"
"strings"
)
@@ -16,6 +16,10 @@ type Work struct {
AnnotationNote
}
func (w Work) Name() string {
return "work"
}
func (p Work) ReferencesAgent(a string) (*AgentRef, bool) {
for _, i := range p.AgentRefs {
if strings.HasPrefix(i.Ref, a) {
@@ -34,5 +38,6 @@ type Citation struct {
}
func (w Work) String() string {
return fmt.Sprintf("URLs: %v, Citation: %v, PreferredTitle: %s, Akteur: %v, Identifier: %v, AnnotationNote: %v\n", w.URLs, w.Citation, w.PreferredTitle, w.AgentRefs, w.Identifier, w.AnnotationNote)
data, _ := json.MarshalIndent(w, "", " ")
return string(data)
}