Better structure of files; introduced XML models

This commit is contained in:
Simon Martens
2025-01-01 17:00:26 +01:00
parent e46d540c01
commit 7539a2dca7
25 changed files with 297 additions and 348 deletions

21
xmlmodels/agents.go Normal file
View File

@@ -0,0 +1,21 @@
package xmlmodels
import (
"encoding/xml"
"fmt"
)
type Agent struct {
XMLName xml.Name `xml:"akteur"`
Names []string `xml:"name"`
SortName string `xml:"sortiername"`
Life string `xml:"lebensdaten"`
GND string `xml:"gnd"`
Org bool `xml:"org,attr"`
Identifier
AnnotationNote
}
func (a Agent) String() string {
return fmt.Sprintf("ID: %s\nNames: %v\nSortName: %s\nLife: %s\nGND: %s\nAnnotations: %v\nNotes: %v\n", a.ID, a.Names, a.SortName, a.Life, a.GND, a.Annotations, a.Notes)
}

18
xmlmodels/categories.go Normal file
View File

@@ -0,0 +1,18 @@
package xmlmodels
import (
"encoding/xml"
"fmt"
)
type Category struct {
XMLName xml.Name `xml:"kategorie"`
Names []string `xml:"name"`
SortName string `xml:"sortiername"`
Identifier
AnnotationNote
}
func (c Category) String() string {
return fmt.Sprintf("ID: %s\nNames: %v\nSortName: %s\nAnnotations: %v\nNotes: %v\n", c.ID, c.Names, c.SortName, c.Annotations, c.Notes)
}

71
xmlmodels/common.go Normal file
View File

@@ -0,0 +1,71 @@
package xmlmodels
import (
"encoding/xml"
"errors"
"github.com/Theodor-Springmann-Stiftung/kgpz_web/helpers/xsdtime"
)
var InvalidDateError = errors.New("Invalid date")
const DateLayout = "2006-01-02"
type KGPZDate struct {
XMLName xml.Name `xml:"datum"`
DateAttributes
Value
}
type DateAttributes struct {
When xsdtime.XSDDate `xml:"when,attr"`
NotBefore xsdtime.XSDDate `xml:"notBefore,attr"`
NotAfter xsdtime.XSDDate `xml:"notAfter,attr"`
From xsdtime.XSDDate `xml:"from,attr"`
To xsdtime.XSDDate `xml:"to,attr"`
Cert string `xml:"cert,attr"`
}
type URL struct {
XMLName xml.Name `xml:"url"`
Address string `xml:"address,attr"`
Value
}
type AnnotationNote struct {
Annotations []Annotation `xml:"anmerkung"`
Notes []Note `xml:"vermerk"`
}
type Annotation struct {
XMLName xml.Name `xml:"anmerkung"`
Value
Inner
}
type Note struct {
XMLName xml.Name `xml:"vermerk"`
Value
Inner
}
type Identifier struct {
ID string `xml:"id,attr"`
keys []string
}
func (i Identifier) Keys() []string {
if len(i.keys) > 0 {
return i.keys
}
i.keys = []string{i.ID}
return i.keys
}
type Value struct {
Chardata string `xml:",chardata"`
}
type Inner struct {
InnerXML string `xml:",innerxml"`
}

26
xmlmodels/helpers.go Normal file
View File

@@ -0,0 +1,26 @@
package xmlmodels
import (
"github.com/Theodor-Springmann-Stiftung/kgpz_web/providers/gnd"
"github.com/Theodor-Springmann-Stiftung/kgpz_web/providers/xmlprovider"
)
const (
AGENTS_PATH = "XML/akteure.xml"
PLACES_PATH = "XML/orte.xml"
WORKS_PATH = "XML/werke.xml"
CATEGORIES_PATH = "XML/kategorien.xml"
ISSUES_DIR = "XML/stuecke/"
PIECES_DIR = "XML/beitraege/"
)
func AgentsIntoDataset(provider *xmlprovider.XMLProvider[Agent]) []gnd.GNDData {
provider.Lock()
defer provider.Unlock()
var data []gnd.GNDData
for _, agent := range provider.Array {
data = append(data, gnd.GNDData{ID: agent.ID, GND: agent.GND})
}
return data
}

59
xmlmodels/issues.go Normal file
View File

@@ -0,0 +1,59 @@
package xmlmodels
import (
"encoding/xml"
"fmt"
"strconv"
)
type Issue struct {
XMLName xml.Name `xml:"stueck"`
Number Nummer `xml:"nummer"`
Datum KGPZDate `xml:"datum"`
Von int `xml:"von"`
Bis int `xml:"bis"`
Additionals []Additional `xml:"beilage"`
Identifier
AnnotationNote
}
type Nummer struct {
No int `xml:",chardata"`
Corrected string `xml:"korrigiert,attr"`
}
type Additional struct {
XMLName xml.Name `xml:"beilage"`
Nummer int `xml:"nummer,attr"`
Von int `xml:"von"`
Bis int `xml:"bis"`
}
func (i Issue) Keys() []string {
if len(i.keys) > 0 {
return i.keys
}
res := make([]string, 2)
date := i.Datum.When.String()
if date != "" {
res = append(res, date)
}
res = append(res, i.Reference())
i.keys = res
return res
}
func (i Issue) Year() int {
return i.Datum.When.Year
}
func (i Issue) Reference() string {
return strconv.Itoa(i.Number.No) + "-" + strconv.Itoa(i.Datum.When.Year)
}
func (i Issue) String() string {
return fmt.Sprintf("Number: %v, Datum: %v, Von: %d, Bis: %d, Additionals: %v, Identifier: %v, AnnotationNote: %v\n", i.Number, i.Datum, i.Von, i.Bis, i.Additionals, i.Identifier, i.AnnotationNote)
}

135
xmlmodels/library.go Normal file
View File

@@ -0,0 +1,135 @@
package xmlmodels
import (
"fmt"
"path/filepath"
"sync"
"github.com/Theodor-Springmann-Stiftung/kgpz_web/providers/xmlprovider"
)
type Library struct {
baseDir string
Agents *xmlprovider.XMLProvider[Agent]
Places *xmlprovider.XMLProvider[Place]
Works *xmlprovider.XMLProvider[Work]
Categories *xmlprovider.XMLProvider[Category]
Issues *xmlprovider.XMLProvider[Issue]
Pieces *xmlprovider.XMLProvider[Piece]
}
func (l *Library) String() string {
return fmt.Sprintf("Agents: %s\nPlaces: %s\nWorks: %s\nCategories: %s\nIssues: %s\nPieces: %s\n",
l.Agents.String(), l.Places.String(), l.Works.String(), l.Categories.String(), l.Issues.String(), l.Pieces.String())
}
// INFO: this is the only place where the providers are created. There is no need for locking on access.
func NewLibrary(basedir string) *Library {
return &Library{
baseDir: basedir,
Agents: &xmlprovider.XMLProvider[Agent]{},
Places: &xmlprovider.XMLProvider[Place]{},
Works: &xmlprovider.XMLProvider[Work]{},
Categories: &xmlprovider.XMLProvider[Category]{},
Issues: &xmlprovider.XMLProvider[Issue]{},
Pieces: &xmlprovider.XMLProvider[Piece]{},
}
}
func (l *Library) Serialize(commit string) {
wg := sync.WaitGroup{}
l.Prepare(commit)
wg.Add(1)
go func() {
l.Places.Serialize(&PlaceRoot{}, filepath.Join(l.baseDir, PLACES_PATH))
wg.Done()
}()
wg.Add(1)
go func() {
l.Agents.Serialize(&AgentRoot{}, filepath.Join(l.baseDir, AGENTS_PATH))
wg.Done()
}()
wg.Add(1)
go func() {
l.Categories.Serialize(&CategoryRoot{}, filepath.Join(l.baseDir, CATEGORIES_PATH))
wg.Done()
}()
wg.Add(1)
go func() {
l.Works.Serialize(&WorkRoot{}, filepath.Join(l.baseDir, WORKS_PATH))
wg.Done()
}()
issuepaths, _ := xmlprovider.XMLFilesForPath(filepath.Join(l.baseDir, ISSUES_DIR))
for _, path := range issuepaths {
wg.Add(1)
go func() {
l.Issues.Serialize(&IssueRoot{}, path)
wg.Done()
}()
}
piecepaths, _ := xmlprovider.XMLFilesForPath(filepath.Join(l.baseDir, PIECES_DIR))
for _, path := range piecepaths {
wg.Add(1)
go func() {
l.Pieces.Serialize(&PieceRoot{}, path)
wg.Done()
}()
}
wg.Wait()
l.Cleanup()
}
func (l *Library) Prepare(commit string) {
l.Agents.Prepare(commit)
l.Places.Prepare(commit)
l.Works.Prepare(commit)
l.Categories.Prepare(commit)
l.Issues.Prepare(commit)
l.Pieces.Prepare(commit)
}
func (l *Library) Cleanup() {
wg := sync.WaitGroup{}
wg.Add(6)
go func() {
l.Agents.Cleanup()
wg.Done()
}()
go func() {
l.Places.Cleanup()
wg.Done()
}()
go func() {
l.Works.Cleanup()
wg.Done()
}()
go func() {
l.Categories.Cleanup()
wg.Done()
}()
go func() {
l.Issues.Cleanup()
wg.Done()
}()
go func() {
l.Pieces.Cleanup()
wg.Done()
}()
wg.Wait()
}

110
xmlmodels/pieces.go Normal file
View File

@@ -0,0 +1,110 @@
package xmlmodels
import (
"encoding/xml"
"fmt"
"strconv"
"strings"
"github.com/google/uuid"
)
type Piece struct {
XMLName xml.Name `xml:"beitrag"`
IssueRefs []IssueRef `xml:"stueck"`
PlaceRefs []PlaceRef `xml:"ort"`
CategoryRefs []CategoryRef `xml:"kategorie"`
AgentRefs []AgentRef `xml:"akteur"`
WorkRefs []WorkRef `xml:"werk"`
PieceRefs []PieceRef `xml:"beitrag"`
Datum []KGPZDate `xml:"datum"`
Incipit []string `xml:"incipit"`
Title []string `xml:"titel"`
Identifier
AnnotationNote
}
func (p Piece) String() string {
return fmt.Sprintf("ID: %s\nIssueRefs: %v\nPlaceRefs: %v\nCategoryRefs: %v\nAgentRefs: %v\nWorkRefs: %v\nPieceRefs: %v\nIncipit: %v\nTitle: %v\nAnnotations: %v\nNotes: %v\n", p.ID, p.IssueRefs, p.PlaceRefs, p.CategoryRefs, p.AgentRefs, p.WorkRefs, p.PieceRefs, p.Incipit, p.Title, p.Annotations, p.Notes)
}
func (p Piece) Keys() []string {
if len(p.keys) > 0 {
return p.keys
}
ret := make([]string, 2)
if p.ID != "" {
ret = append(ret, p.ID)
}
// TODO: sensible IDs
uid := uuid.New()
for _, i := range p.IssueRefs {
ret = append(ret, strconv.Itoa(i.When.Year)+"-"+strconv.Itoa(i.Nr)+"-"+uid.String())
}
p.keys = ret
return ret
}
func (p Piece) ReferencesIssue(y, no int) (*IssueRef, bool) {
for _, i := range p.IssueRefs {
if i.Nr == no {
if i.When.Year == y {
return &i, true
}
}
}
return nil, false
}
func (p Piece) ReferencesAgent(a string) (*AgentRef, bool) {
for _, i := range p.AgentRefs {
if strings.HasPrefix(i.Ref, a) {
return &i, true
}
}
return nil, false
}
func (p Piece) ReferencesWork(id string) (*WorkRef, bool) {
for _, w := range p.WorkRefs {
if w.Ref == id {
return &w, true
}
}
return nil, false
}
// TODO: We can make this fast depending on which category to look for
// but we'll have to define rules for every single category (~35 of them)
func (p Piece) IsCat(k string) bool {
for _, c := range p.CategoryRefs {
if c.Category == k {
return true
}
}
for _, c := range p.WorkRefs {
if c.Category == k {
return true
}
}
for _, c := range p.AgentRefs {
if c.Category == k {
return true
}
}
for _, c := range p.PieceRefs {
if c.Category == k {
return true
}
}
return false
}

19
xmlmodels/places.go Normal file
View File

@@ -0,0 +1,19 @@
package xmlmodels
import (
"encoding/xml"
"fmt"
)
type Place struct {
XMLName xml.Name `xml:"ort"`
Names []string `xml:"name"`
SortName string `xml:"sortiername"`
Geo string `xml:"geonames"`
Identifier
AnnotationNote
}
func (p Place) String() string {
return fmt.Sprintf("ID: %s\nNames: %v\nSortName: %s\nGeo: %s\nAnnotations: %v\nNotes: %v\n", p.ID, p.Names, p.SortName, p.Geo, p.Annotations, p.Notes)
}

47
xmlmodels/references.go Normal file
View File

@@ -0,0 +1,47 @@
package xmlmodels
import "encoding/xml"
type Reference struct {
Ref string `xml:"ref,attr"`
Category string `xml:"kat,attr"`
Unsicher bool `xml:"unsicher,attr"`
Value
}
type AgentRef struct {
XMLName xml.Name `xml:"akteur"`
Reference
}
type IssueRef struct {
XMLName xml.Name `xml:"stueck"`
Nr int `xml:"nr,attr"`
Von int `xml:"von,attr"`
Bis int `xml:"bis,attr"`
Beilage int `xml:"beilage,attr"`
DateAttributes
Reference // Nicht im Schema
}
type PlaceRef struct {
XMLName xml.Name `xml:"ort"`
Reference
}
type CategoryRef struct {
XMLName xml.Name `xml:"kategorie"`
Reference
}
type WorkRef struct {
XMLName xml.Name `xml:"werk"`
Page string `xml:"s,attr"`
Reference
}
type PieceRef struct {
XMLName xml.Name `xml:"beitrag"`
Page string `xml:"s,attr"`
Reference
}

61
xmlmodels/roots.go Normal file
View File

@@ -0,0 +1,61 @@
package xmlmodels
import (
"encoding/xml"
)
// INFO: These are just root elements to hold the data of a file
// They get discarded after a parse.
type AgentRoot struct {
XMLName xml.Name `xml:"akteure"`
Agents []Agent `xml:"akteur"`
}
func (a AgentRoot) Children() []Agent {
return a.Agents
}
type PlaceRoot struct {
XMLName xml.Name `xml:"orte"`
Place []Place `xml:"ort"`
}
func (p PlaceRoot) Children() []Place {
return p.Place
}
type CategoryRoot struct {
XMLName xml.Name `xml:"kategorien"`
Category []Category `xml:"kategorie"`
}
func (c CategoryRoot) Children() []Category {
return c.Category
}
type PieceRoot struct {
XMLName xml.Name `xml:"beitraege"`
Piece []Piece `xml:"beitrag"`
}
func (p PieceRoot) Children() []Piece {
return p.Piece
}
type IssueRoot struct {
XMLName xml.Name `xml:"stuecke"`
Issues []Issue `xml:"stueck"`
}
func (i IssueRoot) Children() []Issue {
return i.Issues
}
type WorkRoot struct {
XMLName xml.Name `xml:"werke"`
Work []Work `xml:"werk"`
}
func (w WorkRoot) Children() []Work {
return w.Work
}

38
xmlmodels/works.go Normal file
View File

@@ -0,0 +1,38 @@
package xmlmodels
import (
"encoding/xml"
"fmt"
"strings"
)
type Work struct {
XMLName xml.Name `xml:"werk"`
URLs []URL `xml:"url"`
Citation Citation `xml:"zitation"`
PreferredTitle string `xml:"preferred"`
AgentRefs []AgentRef `xml:"akteur"`
Identifier
AnnotationNote
}
func (p Work) ReferencesAgent(a string) (*AgentRef, bool) {
for _, i := range p.AgentRefs {
if strings.HasPrefix(i.Ref, a) {
return &i, true
}
}
return nil, false
}
type Citation struct {
XMLName xml.Name `xml:"zitation"`
Title string `xml:"title"`
Year []string `xml:"year"`
Value
Inner
}
func (w Work) String() string {
return fmt.Sprintf("URLs: %v, Citation: %v, PreferredTitle: %s, Akteur: %v, Identifier: %v, AnnotationNote: %v\n", w.URLs, w.Citation, w.PreferredTitle, w.AgentRefs, w.Identifier, w.AnnotationNote)
}