Parselog & Issue start

This commit is contained in:
Simon Martens
2024-11-22 15:45:16 +01:00
parent bc244fbad4
commit 3e4cafb5ee
14 changed files with 417 additions and 130 deletions

View File

@@ -85,7 +85,7 @@ func (k *KGPZ) Enrich() error {
}
// INFO: We pass agents by value since we don't want to block the library
agents := k.Library.Agents.All()
agents := k.Library.Agents.Everything()
go func(agents []xmlprovider.Agent) {
k.GND.FetchPersons(agents)
k.GND.WriteCache(k.Config.GNDPath)
@@ -103,56 +103,43 @@ func (k *KGPZ) Serialize() {
k.gmu.Lock()
defer k.gmu.Unlock()
commit := "staticfile"
if k.Repo != nil {
commit = k.Repo.Commit
}
issues, err := getXMLFiles(filepath.Join(k.Config.FolderPath, ISSUES_DIR))
helpers.Assert(err, "Error getting issues")
pieces, err := getXMLFiles(filepath.Join(k.Config.FolderPath, PIECES_DIR))
helpers.Assert(err, "Error getting pieces")
lib := xmlprovider.NewLibrary(
[]string{filepath.Join(k.Config.FolderPath, AGENTS_PATH)},
[]string{filepath.Join(k.Config.FolderPath, PLACES_PATH)},
[]string{filepath.Join(k.Config.FolderPath, WORKS_PATH)},
[]string{filepath.Join(k.Config.FolderPath, CATEGORIES_PATH)},
*issues,
*pieces)
lib.Serialize()
// TODO: is it neccessary to lock here, sice gmu lock prevents concurrent locking of the library?
k.lmu.Lock()
defer k.lmu.Unlock()
if k.Library == nil {
lib := xmlprovider.NewLibrary(
[]string{filepath.Join(k.Config.FolderPath, AGENTS_PATH)},
[]string{filepath.Join(k.Config.FolderPath, PLACES_PATH)},
[]string{filepath.Join(k.Config.FolderPath, WORKS_PATH)},
[]string{filepath.Join(k.Config.FolderPath, CATEGORIES_PATH)},
*issues,
*pieces)
lib.Serialize(commit)
k.Library = lib
return
} else {
// TODO: where to clear the old data?
// How to differentiate between deleted data points and stale data points bc of parse errors?
k.Library.SetPaths(
[]string{filepath.Join(k.Config.FolderPath, AGENTS_PATH)},
[]string{filepath.Join(k.Config.FolderPath, PLACES_PATH)},
[]string{filepath.Join(k.Config.FolderPath, WORKS_PATH)},
[]string{filepath.Join(k.Config.FolderPath, CATEGORIES_PATH)},
*issues,
*pieces)
k.Library.Serialize(commit)
}
if lib.Agents == nil {
lib.Agents = k.Library.Agents
}
if lib.Places == nil {
lib.Places = k.Library.Places
}
if lib.Works == nil {
lib.Works = k.Library.Works
}
if lib.Categories == nil {
lib.Categories = k.Library.Categories
}
if lib.Issues == nil {
lib.Issues = k.Library.Issues
}
if lib.Pieces == nil {
lib.Pieces = k.Library.Pieces
}
k.Library = lib
}
func (k *KGPZ) IsDebug() bool {

200
helpers/logging/parselog.go Normal file
View File

@@ -0,0 +1,200 @@
package logging
import (
"fmt"
"log/slog"
"sync"
)
// WARNING: do not attempt to set this anywhere besides the init function of this module
var ParseMessages ParseLogger
type XMLEntityType int64
const (
Agent XMLEntityType = iota
Place
Worke
Category
Issue
Piece
)
type ParseErrorLevel int64
const (
Clean ParseErrorLevel = iota
ObjectMessage
InfoMessage
WarningMessage
ErrorMessage
FatalMessage
)
type ParseMessage struct {
XMLType XMLEntityType
XMLPath string
Object fmt.Stringer
Message string
MessageType ParseErrorLevel
}
func (pm ParseMessage) String() string {
if pm.Object != nil {
return fmt.Sprintf("%s: %s\n%s\n%s", pm.XMLType, pm.XMLPath, pm.Object.String(), pm.Message)
}
return fmt.Sprintf("%s: %s\n%s", pm.XMLType, pm.XMLPath, pm.Message)
}
type ParseLogger struct {
mu sync.Mutex
ParseInfo chan ParseMessage
ParseErrors chan ParseMessage
ParseObjects chan fmt.Stringer
messages []ParseMessage
objects []fmt.Stringer
State ParseErrorLevel
subs []func(ParseMessage)
}
func init() {
ParseMessages = ParseLogger{
ParseInfo: make(chan ParseMessage, 100),
ParseErrors: make(chan ParseMessage, 100),
}
ParseMessages.Start()
}
func (pl *ParseLogger) Start() {
go func() {
for {
select {
case msg, ok := <-pl.ParseObjects:
pl.mu.Lock()
pl.objects = append(pl.objects, msg)
pl.mu.Unlock()
if !ok {
pl.ParseObjects = nil
}
case msg, ok := <-pl.ParseInfo:
pl.mu.Lock()
pl.messages = append(pl.messages, msg)
pl.setState(InfoMessage)
pl.mu.Unlock()
if !ok {
pl.ParseInfo = nil
}
case msg, ok := <-pl.ParseErrors:
pl.mu.Lock()
pl.messages = append(pl.messages, msg)
pl.setState(msg.MessageType)
pl.mu.Unlock()
if !ok {
pl.ParseErrors = nil
}
}
if pl.ParseInfo == nil && pl.ParseInfo == nil && pl.ParseObjects == nil {
break
}
}
}()
}
func (pl *ParseLogger) GetMessages() []ParseMessage {
res := make([]ParseMessage, len(pl.messages))
pl.mu.Lock()
defer pl.mu.Unlock()
copy(res, pl.messages)
return res
}
func (pl *ParseLogger) ClearMessages() {
pl.mu.Lock()
defer pl.mu.Unlock()
pl.State = Clean
pl.messages = []ParseMessage{}
}
func (pl *ParseLogger) LogInfo(xmlType XMLEntityType, xmlPath string, object fmt.Stringer, message string) {
pl.ParseInfo <- ParseMessage{
XMLType: xmlType,
XMLPath: xmlPath,
Object: object,
Message: message,
MessageType: InfoMessage,
}
}
func (pl *ParseLogger) LogError(xmlType XMLEntityType, xmlPath string, object fmt.Stringer, message string) {
pl.ParseErrors <- ParseMessage{
XMLType: xmlType,
XMLPath: xmlPath,
Object: object,
Message: message,
MessageType: ErrorMessage,
}
}
func (pl *ParseLogger) LogWarning(xmlType XMLEntityType, xmlPath string, object fmt.Stringer, message string) {
pl.ParseErrors <- ParseMessage{
XMLType: xmlType,
XMLPath: xmlPath,
Object: object,
Message: message,
MessageType: WarningMessage,
}
}
func (pl *ParseLogger) LogFatal(xmlType XMLEntityType, xmlPath string, object fmt.Stringer, message string) {
pl.ParseErrors <- ParseMessage{
XMLType: xmlType,
XMLPath: xmlPath,
Object: object,
Message: message,
}
}
func (pl *ParseLogger) setState(state ParseErrorLevel) {
if state > pl.State {
pl.State = state
}
}
func (pl *ParseLogger) GetState() ParseErrorLevel {
return pl.State
}
func (pl *ParseLogger) Len() int {
pl.mu.Lock()
defer pl.mu.Unlock()
return len(pl.messages)
}
func (pl *ParseLogger) Subscribe(fn func(ParseMessage)) {
pl.mu.Lock()
defer pl.mu.Unlock()
pl.subs = append(pl.subs, fn)
}
func (pl *ParseLogger) ResetSubscriptions() {
pl.mu.Lock()
defer pl.mu.Unlock()
pl.subs = []func(ParseMessage){}
}
func (pl *ParseLogger) PrintObjects() {
pl.mu.Lock()
defer pl.mu.Unlock()
for _, o := range pl.objects {
ObjDebug(&o, "Object")
}
}
func (pl *ParseLogger) PrintMessages() {
pl.mu.Lock()
defer pl.mu.Unlock()
for _, m := range pl.messages {
slog.Debug(m.String())
}
}

View File

@@ -14,6 +14,7 @@ type Agent struct {
Org bool `xml:"org,attr"`
Identifier
AnnotationNote
SerializedItem
}
func (a Agent) String() string {

View File

@@ -11,6 +11,7 @@ type Category struct {
SortName string `xml:"sortiername"`
Identifier
AnnotationNote
SerializedItem
}
func (c Category) String() string {

View File

@@ -15,6 +15,7 @@ type Issue struct {
Additionals []Additional `xml:"beilage"`
Identifier
AnnotationNote
SerializedItem
}
type Nummer struct {

View File

@@ -0,0 +1,19 @@
package xmlprovider
type SerializedItem struct {
Source string
Date string
Commit string
}
func (si SerializedItem) SetSource(s string) {
si.Source = s
}
func (si SerializedItem) SetDate(d string) {
si.Date = d
}
func (si SerializedItem) SetCommit(c string) {
si.Commit = c
}

View File

@@ -21,6 +21,7 @@ type Piece struct {
Title []string `xml:"titel"`
Identifier
AnnotationNote
SerializedItem
}
func (p Piece) String() string {
@@ -35,6 +36,43 @@ func (p Piece) GetIDs() []string {
// TODO: sensible IDs
uid := uuid.New()
ret = append(ret, uid.String())
for _, i := range p.IssueRefs {
ret = append(ret, i.Datum+"-"+i.Nr+"-"+uid.String())
}
for _, i := range p.AdditionalRef {
ret = append(ret, i.Datum+"-"+i.Nr+"-"+uid.String())
}
return ret
}
// TODO: We can make this fast depending on which category to look for
// but we'll have to define rules for every single category (~35 of them)
func (p Piece) IsCat(k string) bool {
for _, c := range p.CategoryRefs {
if c.Category == k {
return true
}
}
for _, c := range p.WorkRefs {
if c.Category == k {
return true
}
}
for _, c := range p.AgentRefs {
if c.Category == k {
return true
}
}
for _, c := range p.PieceRefs {
if c.Category == k {
return true
}
}
return false
}

View File

@@ -12,6 +12,7 @@ type Place struct {
Geo string `xml:"geonames"`
Identifier
AnnotationNote
SerializedItem
}
func (p Place) String() string {

View File

@@ -13,6 +13,7 @@ type Work struct {
Akteur []AgentRef `xml:"akteur"`
Identifier
AnnotationNote
SerializedItem
}
type Citation struct {

View File

@@ -6,6 +6,7 @@ import (
"io"
"os"
"sync"
"time"
"github.com/Theodor-Springmann-Stiftung/kgpz_web/helpers/logging"
)
@@ -13,13 +14,20 @@ import (
type XMLItem interface {
fmt.Stringer
GetIDs() []string
SetSource(string)
SetDate(string)
SetCommit(string)
}
type Collection[T XMLItem] struct {
Collection []T
lock sync.Mutex
}
type XMLProvider[T XMLItem] struct {
Paths []string
// INFO: map is type [string]T
Items sync.Map
mu sync.Mutex
}
type Library struct {
@@ -47,90 +55,87 @@ func NewLibrary(agentpaths, placepaths, workpaths, categorypaths, issuepaths, pi
}
}
func (l *Library) Serialize() {
func (l *Library) SetPaths(agentpaths, placepaths, workpaths, categorypaths, issuepaths, piecepaths []string) {
l.Agents.Paths = agentpaths
l.Places.Paths = placepaths
l.Works.Paths = workpaths
l.Categories.Paths = categorypaths
l.Issues.Paths = issuepaths
l.Pieces.Paths = piecepaths
}
func (l *Library) Serialize(commit string) {
wg := sync.WaitGroup{}
wg.Add(6)
go func() {
defer wg.Done()
lwg := sync.WaitGroup{}
for _, path := range l.Places.Paths {
lwg.Add(1)
go l.Places.Serialize(NewPlaceRoot(), path, &lwg)
}
lwg.Wait()
}()
for _, path := range l.Places.Paths {
wg.Add(1)
go func() {
l.Places.Serialize(NewPlaceRoot(), path, commit)
wg.Done()
}()
}
go func() {
defer wg.Done()
lwg := sync.WaitGroup{}
for _, path := range l.Agents.Paths {
lwg.Add(1)
go l.Agents.Serialize(NewAgentRoot(), path, &lwg)
}
lwg.Wait()
}()
for _, path := range l.Agents.Paths {
wg.Add(1)
go func() {
l.Agents.Serialize(NewAgentRoot(), path, commit)
wg.Done()
}()
}
go func() {
defer wg.Done()
lwg := sync.WaitGroup{}
for _, path := range l.Categories.Paths {
lwg.Add(1)
go l.Categories.Serialize(NewCategoryRoot(), path, &lwg)
}
lwg.Wait()
}()
for _, path := range l.Categories.Paths {
wg.Add(1)
go func() {
l.Categories.Serialize(NewCategoryRoot(), path, commit)
wg.Done()
}()
}
go func() {
defer wg.Done()
lwg := sync.WaitGroup{}
for _, path := range l.Works.Paths {
lwg.Add(1)
go l.Works.Serialize(NewWorkRoot(), path, &lwg)
}
lwg.Wait()
}()
for _, path := range l.Works.Paths {
wg.Add(1)
go func() {
l.Works.Serialize(NewWorkRoot(), path, commit)
wg.Done()
}()
}
go func() {
defer wg.Done()
lwg := sync.WaitGroup{}
for _, path := range l.Issues.Paths {
lwg.Add(1)
go l.Issues.Serialize(NewIssueRoot(), path, &lwg)
}
lwg.Wait()
}()
for _, path := range l.Issues.Paths {
wg.Add(1)
go func() {
l.Issues.Serialize(NewIssueRoot(), path, commit)
wg.Done()
}()
}
go func() {
defer wg.Done()
lwg := sync.WaitGroup{}
for _, path := range l.Pieces.Paths {
lwg.Add(1)
go l.Pieces.Serialize(NewPieceRoot(), path, &lwg)
}
lwg.Wait()
}()
for _, path := range l.Pieces.Paths {
wg.Add(1)
go func() {
l.Pieces.Serialize(NewPieceRoot(), path, commit)
wg.Done()
}()
}
wg.Wait()
}
func (p *XMLProvider[T]) Serialize(dataholder XMLRootElement[T], path string, wg *sync.WaitGroup) error {
func (p *XMLProvider[T]) Serialize(dataholder XMLRootElement[T], path, commit string) error {
date := time.Now().Format("2006-01-02")
// Introduce goroutine for every path, locking on append:
if err := UnmarshalFile(path, dataholder); err != nil {
logging.Error(err, "Could not unmarshal file: "+path)
logging.ParseMessages.ParseErrors <- logging.ParseMessage{MessageType: logging.ErrorMessage, Message: "Could not unmarshal file: " + path}
return err
}
for _, item := range dataholder.Children() {
item.SetSource(path)
item.SetDate(date)
item.SetCommit(commit)
// INFO: Mostly it's just one ID, so the double loop is not that bad.
for _, id := range item.GetIDs() {
p.Items.Store(id, item)
}
}
if wg != nil {
wg.Done()
}
return nil
}
@@ -199,7 +204,9 @@ func (p *XMLProvider[T]) FindKey(fn func(string) bool) []T {
return items
}
func (p *XMLProvider[T]) All() []T {
// INFO: Do not use this, except when iterating over a collection multiple times (three times or more).
// Maps are slow to iterate, but many of the Iterations can only be done once.
func (p *XMLProvider[T]) Everything() []T {
var items []T
p.Items.Range(func(key, value interface{}) bool {
items = append(items, value.(T))

View File

@@ -40,6 +40,14 @@ func (e *Engine) MapFuncs(app *app.KGPZ) error {
e.AddFunc("GetDate", functions.GetDate)
e.AddFunc("MonthName", functions.MonthName)
e.AddFunc("MonthNameShort", functions.MonthNameShort)
e.AddFunc("GetAgent", app.Library.Agents.Item)
e.AddFunc("GetPlace", app.Library.Places.Item)
e.AddFunc("GetWork", app.Library.Works.Item)
e.AddFunc("GetCategory", app.Library.Categories.Item)
e.AddFunc("GetIssue", app.Library.Issues.Item)
e.AddFunc("GetPiece", app.Library.Pieces.Item)
return nil
}

View File

@@ -2,6 +2,7 @@ package viewmodels
import (
"strconv"
"strings"
"github.com/Theodor-Springmann-Stiftung/kgpz_web/helpers/logging"
"github.com/Theodor-Springmann-Stiftung/kgpz_web/providers/xmlprovider"
@@ -24,30 +25,19 @@ func NewSingleIssueView(y string, No string, lib *xmlprovider.Library) (*SingleI
}
sivm := SingleIssueViewModel{IssueViewModel: *ivm}
logging.Info(strconv.Itoa(len(lib.Pieces.All())) + "pieces in library")
logging.Info(strconv.Itoa(len(lib.Pieces.Everything())) + "pieces in library")
lookfor := y + "-" + No
lib.Pieces.Items.Range(func(key, value interface{}) bool {
a := value.(xmlprovider.Piece)
for _, r := range a.IssueRefs {
if r.Datum == y && r.Nr == No {
p, err := NewPieceView(a)
if err != nil {
logging.ObjErr(&a, err)
continue
}
sivm.Pieces = append(sivm.Pieces, p)
}
}
for _, r := range a.AdditionalRef {
if r.Datum == y && r.Nr == No {
p, err := NewPieceView(a)
if err != nil {
logging.ObjErr(&a, err)
continue
}
sivm.Additionals = append(sivm.Additionals, p)
k := key.(string)
if strings.HasPrefix(k, lookfor) {
a := value.(xmlprovider.Piece)
p, err := NewPieceView(a)
if err != nil {
logging.ObjErr(&a, err)
return true
}
sivm.Pieces = append(sivm.Pieces, p)
}
return true
})

View File

@@ -23,18 +23,19 @@ func YearView(year string, lib *xmlprovider.Library) (*YearViewModel, error) {
last := ""
lib.Issues.Items.Range(func(key, value interface{}) bool {
issue := value.(xmlprovider.Issue)
if len(issue.Datum.When) < 4 {
k := key.(string)
if len(k) < 4 {
return true
}
date := issue.Datum.When[0:4]
date := k[0:4]
if date != last {
res.PushAvailable(date)
last = date
}
if date == year {
issue := value.(xmlprovider.Issue)
res.PushIssue(issue)
}
return true

View File

@@ -1 +1,33 @@
Hello from an issue!
Issue found!
{{ $model := .model }}
{{ range $piece := .model.Pieces }}
<div>
Piece!
{{ if gt (len $piece.IssueRefs) 1 }}
{{ len $piece.IssueRefs }} Teile
<ol>
{{ range $issue := $piece.IssueRefs }}
<li>
<a href="/{{- $issue.Datum -}}/{{- $issue.Nr -}}" >
{{- $issue.Datum }} Nr. {{ $issue.Nr -}}
</a>
</li>
{{ end }}
</ol>
{{ end }}
{{ range $agentref := $piece.AgentRefs }}
{{ $agent := GetAgent $agentref.Ref }}
{{ if gt (len $agent.Names) 0 }}
{{ index $agent.Names 0 }}
{{ end }}
{{ if (or (eq $agentref.Category "") (eq $agentref.Category "autor")) }}
<span>Author</span>
{{ end }}
{{ end }}
</div>
{{ end }}