Files
kgpz_web/providers/search/searchprovider.go
2025-02-17 21:42:20 +01:00

130 lines
3.2 KiB
Go

package searchprovider
import (
"errors"
"path/filepath"
"sync"
"github.com/Theodor-Springmann-Stiftung/kgpz_web/xmlmodels"
"github.com/blevesearch/bleve/v2"
"github.com/blevesearch/bleve/v2/analysis/analyzer/custom"
"github.com/blevesearch/bleve/v2/analysis/char/html"
"github.com/blevesearch/bleve/v2/analysis/char/regexp"
"github.com/blevesearch/bleve/v2/analysis/token/lowercase"
"github.com/blevesearch/bleve/v2/analysis/token/ngram"
"github.com/blevesearch/bleve/v2/analysis/token/unicodenorm"
"github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode"
"github.com/blevesearch/bleve/v2/mapping"
)
var NoKeyError = errors.New("Missing ID key.")
var NoLibError = errors.New("Missing library.")
type ISearchable interface {
Keys() []string
Readable(lib *xmlmodels.Library) map[string]interface{}
Type() string
}
type SearchProvider struct {
indeces sync.Map
basepath string
}
func NewSearchProvider(basepath string) (*SearchProvider, error) {
sp := &SearchProvider{basepath: basepath}
return sp, nil
}
func (sp *SearchProvider) Index(item ISearchable, lib *xmlmodels.Library) error {
keys := item.Keys()
if len(keys) == 0 {
return NoKeyError
}
if lib == nil {
return NoLibError
}
i, err := sp.FindCreateIndex(item.Type())
if err != nil {
return err
}
return i.Index(keys[0], item.Readable(lib))
}
func (sp *SearchProvider) FindCreateIndex(typ string) (bleve.Index, error) {
index, ok := sp.indeces.Load(typ)
if ok {
i := index.(bleve.Index)
return i, nil
}
fp := filepath.Join(sp.basepath, typ+".bleve")
ind, err := bleve.Open(fp)
if err == bleve.ErrorIndexPathDoesNotExist {
mapping, err := default_mapping()
if err != nil {
return nil, err
}
ind, err = bleve.New(filepath.Join(fp), mapping)
if err != nil {
return nil, err
}
}
sp.indeces.Store(typ, ind)
return ind, nil
}
func default_mapping() (*mapping.IndexMappingImpl, error) {
indexMapping := bleve.NewIndexMapping()
customunicodeFilter := map[string]interface{}{
"type": unicodenorm.Name,
"form": unicodenorm.NFKD,
}
customCharFilterConfig := map[string]interface{}{
"type": regexp.Name,
"regexp": `[[:punct:]]+`, // Removes all punctuation characters
"replace": "",
}
customNgramFilterConfig := map[string]interface{}{
"type": ngram.Name,
"min": 1, // minimum n-gram size
"max": 20, // maximum n-gram size
}
customNgramAnalyzer := map[string]interface{}{
"type": custom.Name,
"tokenizer": unicode.Name,
"char_filters": []string{"removePunctuation", html.Name},
"token_filters": []string{lowercase.Name, "customNgramFilter", "customUnicodeCharFilter"},
}
err := indexMapping.AddCustomTokenFilter("customNgramFilter", customNgramFilterConfig)
if err != nil {
return nil, err
}
err = indexMapping.AddCustomCharFilter("removePunctuation", customCharFilterConfig)
if err != nil {
return nil, err
}
err = indexMapping.AddCustomTokenFilter("customUnicodeCharFilter", customunicodeFilter)
if err != nil {
return nil, err
}
err = indexMapping.AddCustomAnalyzer("customNgramAnalyzer", customNgramAnalyzer)
if err != nil {
return nil, err
}
indexMapping.DefaultAnalyzer = "customNgramAnalyzer"
return indexMapping, nil
}