package searchprovider import ( "errors" "path/filepath" "sync" "github.com/Theodor-Springmann-Stiftung/kgpz_web/xmlmodels" "github.com/blevesearch/bleve/v2" "github.com/blevesearch/bleve/v2/analysis/analyzer/custom" "github.com/blevesearch/bleve/v2/analysis/char/html" "github.com/blevesearch/bleve/v2/analysis/char/regexp" "github.com/blevesearch/bleve/v2/analysis/token/lowercase" "github.com/blevesearch/bleve/v2/analysis/token/ngram" "github.com/blevesearch/bleve/v2/analysis/token/unicodenorm" "github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode" "github.com/blevesearch/bleve/v2/mapping" ) var NoKeyError = errors.New("Missing ID key.") var NoLibError = errors.New("Missing library.") type ISearchable interface { Keys() []string Readable(lib *xmlmodels.Library) map[string]interface{} Type() string } type SearchProvider struct { indeces sync.Map basepath string } func NewSearchProvider(basepath string) (*SearchProvider, error) { sp := &SearchProvider{basepath: basepath} return sp, nil } func (sp *SearchProvider) Index(item ISearchable, lib *xmlmodels.Library) error { keys := item.Keys() if len(keys) == 0 { return NoKeyError } if lib == nil { return NoLibError } i, err := sp.FindCreateIndex(item.Type()) if err != nil { return err } return i.Index(keys[0], item.Readable(lib)) } func (sp *SearchProvider) FindCreateIndex(typ string) (bleve.Index, error) { index, ok := sp.indeces.Load(typ) if ok { i := index.(bleve.Index) return i, nil } fp := filepath.Join(sp.basepath, typ+".bleve") ind, err := bleve.Open(fp) if err == bleve.ErrorIndexPathDoesNotExist { mapping, err := default_mapping() if err != nil { return nil, err } ind, err = bleve.New(filepath.Join(fp), mapping) if err != nil { return nil, err } } sp.indeces.Store(typ, ind) return ind, nil } func default_mapping() (*mapping.IndexMappingImpl, error) { indexMapping := bleve.NewIndexMapping() customunicodeFilter := map[string]interface{}{ "type": unicodenorm.Name, "form": unicodenorm.NFKD, } customCharFilterConfig := map[string]interface{}{ "type": regexp.Name, "regexp": `[[:punct:]]+`, // Removes all punctuation characters "replace": "", } customNgramFilterConfig := map[string]interface{}{ "type": ngram.Name, "min": 1, // minimum n-gram size "max": 20, // maximum n-gram size } customNgramAnalyzer := map[string]interface{}{ "type": custom.Name, "tokenizer": unicode.Name, "char_filters": []string{"removePunctuation", html.Name}, "token_filters": []string{lowercase.Name, "customNgramFilter", "customUnicodeCharFilter"}, } err := indexMapping.AddCustomTokenFilter("customNgramFilter", customNgramFilterConfig) if err != nil { return nil, err } err = indexMapping.AddCustomCharFilter("removePunctuation", customCharFilterConfig) if err != nil { return nil, err } err = indexMapping.AddCustomTokenFilter("customUnicodeCharFilter", customunicodeFilter) if err != nil { return nil, err } err = indexMapping.AddCustomAnalyzer("customNgramAnalyzer", customNgramAnalyzer) if err != nil { return nil, err } indexMapping.DefaultAnalyzer = "customNgramAnalyzer" return indexMapping, nil }