mirror of
https://github.com/Theodor-Springmann-Stiftung/kgpz_web.git
synced 2025-10-29 00:55:32 +00:00
Guge problem: no IDs for the Beiträge
This commit is contained in:
@@ -1,13 +1,16 @@
|
||||
package xmlmodels
|
||||
|
||||
import (
|
||||
"crypto/sha256"
|
||||
"encoding/hex"
|
||||
"encoding/json"
|
||||
"encoding/xml"
|
||||
"fmt"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/Theodor-Springmann-Stiftung/kgpz_web/providers/xmlprovider"
|
||||
"github.com/google/uuid"
|
||||
)
|
||||
|
||||
const (
|
||||
@@ -34,6 +37,69 @@ func (p Piece) String() string {
|
||||
return string(data)
|
||||
}
|
||||
|
||||
// generateContentBasedID creates a deterministic ID based on piece content
|
||||
func (p Piece) generateContentBasedID() string {
|
||||
var parts []string
|
||||
|
||||
// Add title if available
|
||||
if len(p.Title) > 0 && p.Title[0] != "" {
|
||||
parts = append(parts, "title:"+strings.ToLower(strings.TrimSpace(p.Title[0])))
|
||||
}
|
||||
|
||||
// Add incipit if available
|
||||
if len(p.Incipit) > 0 && p.Incipit[0] != "" {
|
||||
incipit := strings.ToLower(strings.TrimSpace(p.Incipit[0]))
|
||||
// Limit incipit to first 50 characters to avoid overly long IDs
|
||||
if len(incipit) > 50 {
|
||||
incipit = incipit[:50]
|
||||
}
|
||||
parts = append(parts, "incipit:"+incipit)
|
||||
}
|
||||
|
||||
// Add author references
|
||||
var authors []string
|
||||
for _, agent := range p.AgentRefs {
|
||||
if agent.Category == "" || agent.Category == "autor" {
|
||||
authors = append(authors, agent.Ref)
|
||||
}
|
||||
}
|
||||
sort.Strings(authors) // Ensure consistent ordering
|
||||
if len(authors) > 0 {
|
||||
parts = append(parts, "authors:"+strings.Join(authors, ","))
|
||||
}
|
||||
|
||||
// Add categories
|
||||
var categories []string
|
||||
for _, cat := range p.CategoryRefs {
|
||||
if cat.Category != "" {
|
||||
categories = append(categories, cat.Category)
|
||||
}
|
||||
}
|
||||
sort.Strings(categories) // Ensure consistent ordering
|
||||
if len(categories) > 0 {
|
||||
parts = append(parts, "categories:"+strings.Join(categories, ","))
|
||||
}
|
||||
|
||||
// If we have no meaningful content, create a minimal hash from issue refs
|
||||
if len(parts) == 0 {
|
||||
// Use issue references as fallback content
|
||||
for _, issue := range p.IssueRefs {
|
||||
parts = append(parts, fmt.Sprintf("issue:%d-%d-%d-%d", issue.When.Year, issue.Nr, issue.Von, issue.Bis))
|
||||
}
|
||||
// If still no content, use a generic identifier
|
||||
if len(parts) == 0 {
|
||||
parts = append(parts, "unknown-piece")
|
||||
}
|
||||
}
|
||||
|
||||
// Create hash of combined content
|
||||
content := strings.Join(parts, "|")
|
||||
hash := sha256.Sum256([]byte(content))
|
||||
|
||||
// Return first 12 characters of hex hash for reasonable ID length
|
||||
return hex.EncodeToString(hash[:])[:12]
|
||||
}
|
||||
|
||||
func (p Piece) Categories() map[string]bool {
|
||||
cats := make(map[string]bool)
|
||||
for _, c := range p.CategoryRefs {
|
||||
@@ -70,24 +136,23 @@ func (p Piece) Categories() map[string]bool {
|
||||
}
|
||||
|
||||
func (p Piece) Keys() []string {
|
||||
if len(p.keys) > 0 {
|
||||
return p.keys
|
||||
}
|
||||
|
||||
// Always regenerate keys to ensure we use the new content-based logic
|
||||
ret := make([]string, 0, 3)
|
||||
|
||||
// Primary ID: Use existing ID if available, otherwise content-based ID
|
||||
var primaryID string
|
||||
if p.ID != "" {
|
||||
ret = append(ret, p.ID)
|
||||
primaryID = p.ID
|
||||
} else {
|
||||
primaryID = p.generateContentBasedID()
|
||||
}
|
||||
ret = append(ret, primaryID)
|
||||
|
||||
// TODO: sensible IDs
|
||||
uid := uuid.New()
|
||||
ret = append(ret, uid.String())
|
||||
|
||||
// Create issue-specific keys using the primary ID for lookup
|
||||
for _, i := range p.IssueRefs {
|
||||
ret = append(ret, strconv.Itoa(i.When.Year)+"-"+strconv.Itoa(i.Nr)+"-"+uid.String())
|
||||
ret = append(ret, strconv.Itoa(i.When.Year)+"-"+strconv.Itoa(i.Nr)+"-"+primaryID)
|
||||
}
|
||||
|
||||
p.keys = ret
|
||||
return ret
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user