mirror of
https://github.com/Theodor-Springmann-Stiftung/kgpz_web.git
synced 2025-10-29 17:15:31 +00:00
Search Index Build
This commit is contained in:
@@ -41,6 +41,7 @@ type Config struct {
|
||||
GITPath string `json:"git_path" envconfig:"GIT_PATH"`
|
||||
GNDPath string `json:"gnd_path" envconfig:"GND_PATH"`
|
||||
GeoPath string `json:"geo_path" envconfig:"GEO_PATH"`
|
||||
SearchPath string `json:"search_path" envconfig:"SEARCH_PATH"`
|
||||
ImgPath string `json:"img_path" envconfig:"IMG_PATH"`
|
||||
WebHookEndpoint string `json:"webhook_endpoint" envconfig:"WEBHOOK_ENDPOINT"`
|
||||
WebHookSecret string `json:"webhook_secret" envconfig:"WEBHOOK_SECRET"`
|
||||
@@ -123,6 +124,10 @@ func readDefaults(cfg *Config) *Config {
|
||||
cfg.ImgPath = DEFAULT_IMG_DIR
|
||||
}
|
||||
|
||||
if strings.TrimSpace(cfg.SearchPath) == "" {
|
||||
cfg.SearchPath = DEFAULT_SEARCH_CACHE_DIR
|
||||
}
|
||||
|
||||
return cfg
|
||||
}
|
||||
|
||||
|
||||
129
providers/search/searchprovider.go
Normal file
129
providers/search/searchprovider.go
Normal file
@@ -0,0 +1,129 @@
|
||||
package searchprovider
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"path/filepath"
|
||||
"sync"
|
||||
|
||||
"github.com/Theodor-Springmann-Stiftung/kgpz_web/xmlmodels"
|
||||
"github.com/blevesearch/bleve/v2"
|
||||
"github.com/blevesearch/bleve/v2/analysis/analyzer/custom"
|
||||
"github.com/blevesearch/bleve/v2/analysis/char/html"
|
||||
"github.com/blevesearch/bleve/v2/analysis/char/regexp"
|
||||
"github.com/blevesearch/bleve/v2/analysis/token/lowercase"
|
||||
"github.com/blevesearch/bleve/v2/analysis/token/ngram"
|
||||
"github.com/blevesearch/bleve/v2/analysis/token/unicodenorm"
|
||||
"github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode"
|
||||
"github.com/blevesearch/bleve/v2/mapping"
|
||||
)
|
||||
|
||||
var NoKeyError = errors.New("Missing ID key.")
|
||||
var NoLibError = errors.New("Missing library.")
|
||||
|
||||
type ISearchable interface {
|
||||
Keys() []string
|
||||
Readable(lib *xmlmodels.Library) map[string]interface{}
|
||||
Type() string
|
||||
}
|
||||
|
||||
type SearchProvider struct {
|
||||
indeces sync.Map
|
||||
basepath string
|
||||
}
|
||||
|
||||
func NewSearchProvider(basepath string) (*SearchProvider, error) {
|
||||
sp := &SearchProvider{basepath: basepath}
|
||||
return sp, nil
|
||||
}
|
||||
|
||||
func (sp *SearchProvider) Index(item ISearchable, lib *xmlmodels.Library) error {
|
||||
keys := item.Keys()
|
||||
if len(keys) == 0 {
|
||||
return NoKeyError
|
||||
}
|
||||
if lib == nil {
|
||||
return NoLibError
|
||||
}
|
||||
|
||||
i, err := sp.FindCreateIndex(item.Type())
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return i.Index(keys[0], item.Readable(lib))
|
||||
}
|
||||
|
||||
func (sp *SearchProvider) FindCreateIndex(typ string) (bleve.Index, error) {
|
||||
index, ok := sp.indeces.Load(typ)
|
||||
if ok {
|
||||
i := index.(bleve.Index)
|
||||
return i, nil
|
||||
}
|
||||
|
||||
fp := filepath.Join(sp.basepath, typ+".bleve")
|
||||
ind, err := bleve.Open(fp)
|
||||
if err == bleve.ErrorIndexPathDoesNotExist {
|
||||
mapping, err := default_mapping()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
ind, err = bleve.New(filepath.Join(fp), mapping)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
sp.indeces.Store(typ, ind)
|
||||
|
||||
return ind, nil
|
||||
}
|
||||
|
||||
func default_mapping() (*mapping.IndexMappingImpl, error) {
|
||||
indexMapping := bleve.NewIndexMapping()
|
||||
|
||||
customunicodeFilter := map[string]interface{}{
|
||||
"type": unicodenorm.Name,
|
||||
"form": unicodenorm.NFKD,
|
||||
}
|
||||
|
||||
customCharFilterConfig := map[string]interface{}{
|
||||
"type": regexp.Name,
|
||||
"regexp": `[[:punct:]]+`, // Removes all punctuation characters
|
||||
"replace": "",
|
||||
}
|
||||
|
||||
customNgramFilterConfig := map[string]interface{}{
|
||||
"type": ngram.Name,
|
||||
"min": 1, // minimum n-gram size
|
||||
"max": 20, // maximum n-gram size
|
||||
}
|
||||
|
||||
customNgramAnalyzer := map[string]interface{}{
|
||||
"type": custom.Name,
|
||||
"tokenizer": unicode.Name,
|
||||
"char_filters": []string{"removePunctuation", html.Name},
|
||||
"token_filters": []string{lowercase.Name, "customNgramFilter", "customUnicodeCharFilter"},
|
||||
}
|
||||
|
||||
err := indexMapping.AddCustomTokenFilter("customNgramFilter", customNgramFilterConfig)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
err = indexMapping.AddCustomCharFilter("removePunctuation", customCharFilterConfig)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
err = indexMapping.AddCustomTokenFilter("customUnicodeCharFilter", customunicodeFilter)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
err = indexMapping.AddCustomAnalyzer("customNgramAnalyzer", customNgramAnalyzer)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
indexMapping.DefaultAnalyzer = "customNgramAnalyzer"
|
||||
return indexMapping, nil
|
||||
}
|
||||
@@ -2,8 +2,11 @@ package xmlprovider
|
||||
|
||||
import "fmt"
|
||||
|
||||
type XMLItem interface {
|
||||
type IXMLItem interface {
|
||||
fmt.Stringer
|
||||
// INFO:
|
||||
// - Keys should be unique
|
||||
// - Keys[0] has the special meaning of the primary key (for FTS etc.)
|
||||
Keys() []string
|
||||
Name() string
|
||||
}
|
||||
@@ -12,13 +15,13 @@ type ILibrary interface {
|
||||
Parse(meta ParseMeta) error
|
||||
}
|
||||
|
||||
type ResolvingMap[T XMLItem] map[string][]Resolved[T]
|
||||
type ResolvingMap[T IXMLItem] map[string][]Resolved[T]
|
||||
|
||||
type ReferenceResolver[T XMLItem] interface {
|
||||
type ReferenceResolver[T IXMLItem] interface {
|
||||
References() ResolvingMap[T]
|
||||
}
|
||||
|
||||
type Resolved[T XMLItem] struct {
|
||||
type Resolved[T IXMLItem] struct {
|
||||
Item *T
|
||||
Reference string
|
||||
Category string
|
||||
|
||||
@@ -7,13 +7,13 @@ import (
|
||||
"sync"
|
||||
)
|
||||
|
||||
type Resolver[T XMLItem] struct {
|
||||
type Resolver[T IXMLItem] struct {
|
||||
// INFO: map[type][ID]
|
||||
index map[string]map[string][]Resolved[T]
|
||||
mu sync.RWMutex
|
||||
}
|
||||
|
||||
func NewResolver[T XMLItem]() *Resolver[T] {
|
||||
func NewResolver[T IXMLItem]() *Resolver[T] {
|
||||
return &Resolver[T]{index: make(map[string]map[string][]Resolved[T])}
|
||||
}
|
||||
|
||||
|
||||
@@ -34,7 +34,7 @@ func (p ParseMeta) Failed(path string) bool {
|
||||
}
|
||||
|
||||
// An XMLProvider is a struct that holds holds serialized XML data of a specific type. It combines multiple parses IF a succeeded parse can not serialize the data from a path.
|
||||
type XMLProvider[T XMLItem] struct {
|
||||
type XMLProvider[T IXMLItem] struct {
|
||||
// INFO: map is type map[string]*T
|
||||
Items sync.Map
|
||||
// INFO: map is type [string]ItemInfo
|
||||
@@ -50,7 +50,7 @@ type XMLProvider[T XMLItem] struct {
|
||||
Array []T
|
||||
}
|
||||
|
||||
func NewXMLProvider[T XMLItem]() *XMLProvider[T] {
|
||||
func NewXMLProvider[T IXMLItem]() *XMLProvider[T] {
|
||||
return &XMLProvider[T]{Resolver: *NewResolver[T]()}
|
||||
}
|
||||
|
||||
@@ -141,7 +141,7 @@ func (p *XMLProvider[T]) addResolvable(item T) {
|
||||
}
|
||||
}
|
||||
|
||||
func (p *XMLProvider[T]) ReverseLookup(item XMLItem) []Resolved[T] {
|
||||
func (p *XMLProvider[T]) ReverseLookup(item IXMLItem) []Resolved[T] {
|
||||
// INFO: this runs just once for the first key
|
||||
ret := make([]Resolved[T], 0)
|
||||
keys := item.Keys()
|
||||
|
||||
@@ -5,7 +5,7 @@ import (
|
||||
"strings"
|
||||
)
|
||||
|
||||
func Sort[T XMLItem](i, j T) int {
|
||||
func Sort[T IXMLItem](i, j T) int {
|
||||
|
||||
keys_a := i.Keys()
|
||||
keys_b := j.Keys()
|
||||
|
||||
Reference in New Issue
Block a user