mirror of
https://github.com/Theodor-Springmann-Stiftung/kgpz_web.git
synced 2025-10-28 16:45:32 +00:00
130 lines
3.2 KiB
Go
130 lines
3.2 KiB
Go
package searchprovider
|
|
|
|
import (
|
|
"errors"
|
|
"path/filepath"
|
|
"sync"
|
|
|
|
"github.com/Theodor-Springmann-Stiftung/kgpz_web/xmlmodels"
|
|
"github.com/blevesearch/bleve/v2"
|
|
"github.com/blevesearch/bleve/v2/analysis/analyzer/custom"
|
|
"github.com/blevesearch/bleve/v2/analysis/char/html"
|
|
"github.com/blevesearch/bleve/v2/analysis/char/regexp"
|
|
"github.com/blevesearch/bleve/v2/analysis/token/lowercase"
|
|
"github.com/blevesearch/bleve/v2/analysis/token/ngram"
|
|
"github.com/blevesearch/bleve/v2/analysis/token/unicodenorm"
|
|
"github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode"
|
|
"github.com/blevesearch/bleve/v2/mapping"
|
|
)
|
|
|
|
var NoKeyError = errors.New("Missing ID key.")
|
|
var NoLibError = errors.New("Missing library.")
|
|
|
|
type ISearchable interface {
|
|
Keys() []string
|
|
Readable(lib *xmlmodels.Library) map[string]interface{}
|
|
Type() string
|
|
}
|
|
|
|
type SearchProvider struct {
|
|
indeces sync.Map
|
|
basepath string
|
|
}
|
|
|
|
func NewSearchProvider(basepath string) (*SearchProvider, error) {
|
|
sp := &SearchProvider{basepath: basepath}
|
|
return sp, nil
|
|
}
|
|
|
|
func (sp *SearchProvider) Index(item ISearchable, lib *xmlmodels.Library) error {
|
|
keys := item.Keys()
|
|
if len(keys) == 0 {
|
|
return NoKeyError
|
|
}
|
|
if lib == nil {
|
|
return NoLibError
|
|
}
|
|
|
|
i, err := sp.FindCreateIndex(item.Type())
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
return i.Index(keys[0], item.Readable(lib))
|
|
}
|
|
|
|
func (sp *SearchProvider) FindCreateIndex(typ string) (bleve.Index, error) {
|
|
index, ok := sp.indeces.Load(typ)
|
|
if ok {
|
|
i := index.(bleve.Index)
|
|
return i, nil
|
|
}
|
|
|
|
fp := filepath.Join(sp.basepath, typ+".bleve")
|
|
ind, err := bleve.Open(fp)
|
|
if err == bleve.ErrorIndexPathDoesNotExist {
|
|
mapping, err := default_mapping()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
ind, err = bleve.New(filepath.Join(fp), mapping)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
}
|
|
sp.indeces.Store(typ, ind)
|
|
|
|
return ind, nil
|
|
}
|
|
|
|
func default_mapping() (*mapping.IndexMappingImpl, error) {
|
|
indexMapping := bleve.NewIndexMapping()
|
|
|
|
customunicodeFilter := map[string]interface{}{
|
|
"type": unicodenorm.Name,
|
|
"form": unicodenorm.NFKD,
|
|
}
|
|
|
|
customCharFilterConfig := map[string]interface{}{
|
|
"type": regexp.Name,
|
|
"regexp": `[[:punct:]]+`, // Removes all punctuation characters
|
|
"replace": "",
|
|
}
|
|
|
|
customNgramFilterConfig := map[string]interface{}{
|
|
"type": ngram.Name,
|
|
"min": 1, // minimum n-gram size
|
|
"max": 20, // maximum n-gram size
|
|
}
|
|
|
|
customNgramAnalyzer := map[string]interface{}{
|
|
"type": custom.Name,
|
|
"tokenizer": unicode.Name,
|
|
"char_filters": []string{"removePunctuation", html.Name},
|
|
"token_filters": []string{lowercase.Name, "customNgramFilter", "customUnicodeCharFilter"},
|
|
}
|
|
|
|
err := indexMapping.AddCustomTokenFilter("customNgramFilter", customNgramFilterConfig)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
err = indexMapping.AddCustomCharFilter("removePunctuation", customCharFilterConfig)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
err = indexMapping.AddCustomTokenFilter("customUnicodeCharFilter", customunicodeFilter)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
err = indexMapping.AddCustomAnalyzer("customNgramAnalyzer", customNgramAnalyzer)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
indexMapping.DefaultAnalyzer = "customNgramAnalyzer"
|
|
return indexMapping, nil
|
|
}
|