mirror of
https://github.com/Theodor-Springmann-Stiftung/musenalm.git
synced 2025-10-29 09:15:33 +00:00
127 lines
3.2 KiB
Go
127 lines
3.2 KiB
Go
package dbmodels
|
||
|
||
import (
|
||
"encoding/json"
|
||
"regexp"
|
||
"sort"
|
||
"strconv"
|
||
"strings"
|
||
)
|
||
|
||
// CollectionInfo holds only the ID, a list of single references, and the Recorded flag.
|
||
type CollectionInfo struct {
|
||
Collection *Content
|
||
Singles []int
|
||
Recorded bool
|
||
}
|
||
|
||
func (ci CollectionInfo) String() string {
|
||
marshalled, _ := json.Marshal(ci)
|
||
return string(marshalled)
|
||
}
|
||
|
||
// parseAnnotation detects "nicht erfasst" references (Recorded=false),
|
||
// then finds all "INr" references (both single values and ranges).
|
||
// Ranges like "100-105" are fully expanded to singles. Duplicates are removed.
|
||
// Any references not in `inos` are ignored.
|
||
func ParseAnnotation(c *Content, annotation string, inos []int) CollectionInfo {
|
||
ci := CollectionInfo{
|
||
Collection: c,
|
||
Singles: []int{},
|
||
Recorded: true, // Default
|
||
}
|
||
|
||
// 1) Detect phrases like "nicht erfasst", "nicht aufgenommen", etc.
|
||
notRecordedPatterns := []string{"erfasst", "aufgenommen", "verzeichnet", "registriert"}
|
||
lowerAnn := strings.ToLower(annotation)
|
||
if strings.Contains(lowerAnn, "nicht") {
|
||
for _, kw := range notRecordedPatterns {
|
||
if strings.Contains(lowerAnn, kw) {
|
||
ci.Recorded = false
|
||
break
|
||
}
|
||
}
|
||
}
|
||
|
||
// We'll keep singles in a map for deduplication
|
||
singlesMap := make(map[int]struct{})
|
||
|
||
// 2) Regex that matches "INr" plus the numeric portion (including dash / punctuation).
|
||
re := regexp.MustCompile(`(?i)\bINr[.:]?\s+([\d,\-\s–—;/.]+)`)
|
||
matches := re.FindAllStringSubmatch(annotation, -1)
|
||
|
||
// Regex to unify different dash characters into a simple '-'
|
||
dashRegex := regexp.MustCompile(`[–—−‒]`)
|
||
|
||
// Helper to expand a range, e.g. 10615–10621 => 10615..10621
|
||
expandRange := func(fromVal, toVal int) {
|
||
// If reversed, its a typo
|
||
if fromVal > toVal {
|
||
return
|
||
}
|
||
for v := fromVal; v <= toVal; v++ {
|
||
if inList(v, inos) {
|
||
singlesMap[v] = struct{}{}
|
||
}
|
||
}
|
||
}
|
||
|
||
for _, m := range matches {
|
||
numericChunk := m[1]
|
||
|
||
// Replace typographic dashes with ASCII hyphen
|
||
numericChunk = dashRegex.ReplaceAllString(numericChunk, "-")
|
||
|
||
// Also unify semicolons or slashes to commas
|
||
extraDelims := regexp.MustCompile(`[;/]+`)
|
||
numericChunk = extraDelims.ReplaceAllString(numericChunk, ",")
|
||
|
||
// Now split on commas
|
||
parts := strings.Split(numericChunk, ",")
|
||
for _, p := range parts {
|
||
p = strings.TrimSpace(p)
|
||
if p == "" {
|
||
continue
|
||
}
|
||
// If we see a hyphen, treat it as a range
|
||
if strings.Contains(p, "-") {
|
||
rangeParts := strings.SplitN(p, "-", 2)
|
||
if len(rangeParts) == 2 {
|
||
fromStr := strings.TrimSpace(rangeParts[0])
|
||
toStr := strings.TrimSpace(rangeParts[1])
|
||
if fromVal, errFrom := strconv.Atoi(fromStr); errFrom == nil {
|
||
if toVal, errTo := strconv.Atoi(toStr); errTo == nil {
|
||
expandRange(fromVal, toVal)
|
||
}
|
||
}
|
||
}
|
||
} else {
|
||
// Single integer reference
|
||
if val, err := strconv.Atoi(p); err == nil {
|
||
if inList(val, inos) {
|
||
singlesMap[val] = struct{}{}
|
||
}
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
// Flatten the map into a sorted slice
|
||
for s := range singlesMap {
|
||
ci.Singles = append(ci.Singles, s)
|
||
}
|
||
sort.Ints(ci.Singles)
|
||
|
||
return ci
|
||
}
|
||
|
||
// inList checks membership in `inos`
|
||
func inList(x int, list []int) bool {
|
||
for _, item := range list {
|
||
if item == x {
|
||
return true
|
||
}
|
||
}
|
||
return false
|
||
}
|