mirror of
				https://github.com/Theodor-Springmann-Stiftung/kgpz_web.git
				synced 2025-11-04 11:45:31 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			164 lines
		
	
	
		
			3.8 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
			
		
		
	
	
			164 lines
		
	
	
		
			3.8 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
package searchprovider
 | 
						|
 | 
						|
import (
 | 
						|
	"errors"
 | 
						|
	"path/filepath"
 | 
						|
	"sync"
 | 
						|
 | 
						|
	"github.com/Theodor-Springmann-Stiftung/kgpz_web/xmlmodels"
 | 
						|
	"github.com/blevesearch/bleve/v2"
 | 
						|
	"github.com/blevesearch/bleve/v2/analysis/analyzer/custom"
 | 
						|
	"github.com/blevesearch/bleve/v2/analysis/char/html"
 | 
						|
	"github.com/blevesearch/bleve/v2/analysis/char/regexp"
 | 
						|
	"github.com/blevesearch/bleve/v2/analysis/token/lowercase"
 | 
						|
	"github.com/blevesearch/bleve/v2/analysis/token/ngram"
 | 
						|
	"github.com/blevesearch/bleve/v2/analysis/token/unicodenorm"
 | 
						|
	"github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode"
 | 
						|
	"github.com/blevesearch/bleve/v2/mapping"
 | 
						|
)
 | 
						|
 | 
						|
var NoKeyError = errors.New("Missing ID key.")
 | 
						|
var NoLibError = errors.New("Missing library.")
 | 
						|
 | 
						|
type ISearchable interface {
 | 
						|
	Keys() []string
 | 
						|
	Readable(lib *xmlmodels.Library) map[string]interface{}
 | 
						|
	Type() string
 | 
						|
}
 | 
						|
 | 
						|
type SearchProvider struct {
 | 
						|
	indeces  sync.Map
 | 
						|
	basepath string
 | 
						|
}
 | 
						|
 | 
						|
func NewSearchProvider(basepath string) (*SearchProvider, error) {
 | 
						|
	sp := &SearchProvider{basepath: basepath}
 | 
						|
	return sp, nil
 | 
						|
}
 | 
						|
 | 
						|
func (sp *SearchProvider) Index(item ISearchable, lib *xmlmodels.Library) error {
 | 
						|
	keys := item.Keys()
 | 
						|
	if len(keys) == 0 {
 | 
						|
		return NoKeyError
 | 
						|
	}
 | 
						|
	if lib == nil {
 | 
						|
		return NoLibError
 | 
						|
	}
 | 
						|
 | 
						|
	i, err := sp.FindCreateIndex(item.Type())
 | 
						|
	if err != nil {
 | 
						|
		return err
 | 
						|
	}
 | 
						|
 | 
						|
	read := item.Readable(lib)
 | 
						|
	return i.Index(keys[0], read)
 | 
						|
}
 | 
						|
 | 
						|
// TODO: this is sloppy
 | 
						|
func (sp *SearchProvider) LoadIndeces() error {
 | 
						|
	files, err := filepath.Glob(filepath.Join(sp.basepath, "*.bleve"))
 | 
						|
	if err != nil {
 | 
						|
		return err
 | 
						|
	}
 | 
						|
 | 
						|
	if len(files) == 0 {
 | 
						|
		return errors.New("No indeces found.")
 | 
						|
	}
 | 
						|
 | 
						|
	for _, file := range files {
 | 
						|
		index, err := bleve.Open(file)
 | 
						|
		if err != nil {
 | 
						|
			return err
 | 
						|
		}
 | 
						|
		typ := filepath.Base(file)
 | 
						|
		typ = typ[:len(typ)-6]
 | 
						|
		sp.indeces.Store(typ, index)
 | 
						|
	}
 | 
						|
	return nil
 | 
						|
}
 | 
						|
 | 
						|
func (sp *SearchProvider) FindCreateIndex(typ string) (bleve.Index, error) {
 | 
						|
	index, ok := sp.indeces.Load(typ)
 | 
						|
	if ok {
 | 
						|
		i := index.(bleve.Index)
 | 
						|
		return i, nil
 | 
						|
	}
 | 
						|
 | 
						|
	fp := filepath.Join(sp.basepath, typ+".bleve")
 | 
						|
	ind, err := bleve.Open(fp)
 | 
						|
	if err == bleve.ErrorIndexPathDoesNotExist {
 | 
						|
		mapping, err := default_mapping()
 | 
						|
		if err != nil {
 | 
						|
			return nil, err
 | 
						|
		}
 | 
						|
		ind, err = bleve.New(filepath.Join(fp), mapping)
 | 
						|
		if err != nil {
 | 
						|
			return nil, err
 | 
						|
		}
 | 
						|
	}
 | 
						|
	sp.indeces.Store(typ, ind)
 | 
						|
 | 
						|
	return ind, nil
 | 
						|
}
 | 
						|
 | 
						|
func (sp *SearchProvider) GetIndex(typ string) (bleve.Index, error) {
 | 
						|
	index, ok := sp.indeces.Load(typ)
 | 
						|
	if !ok {
 | 
						|
		return nil, errors.New("Index not found.")
 | 
						|
	}
 | 
						|
 | 
						|
	i := index.(bleve.Index)
 | 
						|
	return i, nil
 | 
						|
}
 | 
						|
 | 
						|
func default_mapping() (*mapping.IndexMappingImpl, error) {
 | 
						|
	indexMapping := bleve.NewIndexMapping()
 | 
						|
 | 
						|
	customunicodeFilter := map[string]interface{}{
 | 
						|
		"type": unicodenorm.Name,
 | 
						|
		"form": unicodenorm.NFKD,
 | 
						|
	}
 | 
						|
 | 
						|
	customCharFilterConfig := map[string]interface{}{
 | 
						|
		"type":    regexp.Name,
 | 
						|
		"regexp":  `[[:punct:]]+`, // Removes all punctuation characters
 | 
						|
		"replace": "",
 | 
						|
	}
 | 
						|
 | 
						|
	customNgramFilterConfig := map[string]interface{}{
 | 
						|
		"type": ngram.Name,
 | 
						|
		"min":  1,  // minimum n-gram size
 | 
						|
		"max":  20, // maximum n-gram size
 | 
						|
	}
 | 
						|
 | 
						|
	customNgramAnalyzer := map[string]interface{}{
 | 
						|
		"type":          custom.Name,
 | 
						|
		"tokenizer":     unicode.Name,
 | 
						|
		"char_filters":  []string{"removePunctuation", html.Name},
 | 
						|
		"token_filters": []string{lowercase.Name, "customNgramFilter", "customUnicodeCharFilter"},
 | 
						|
	}
 | 
						|
 | 
						|
	err := indexMapping.AddCustomTokenFilter("customNgramFilter", customNgramFilterConfig)
 | 
						|
	if err != nil {
 | 
						|
		return nil, err
 | 
						|
	}
 | 
						|
 | 
						|
	err = indexMapping.AddCustomCharFilter("removePunctuation", customCharFilterConfig)
 | 
						|
	if err != nil {
 | 
						|
		return nil, err
 | 
						|
	}
 | 
						|
 | 
						|
	err = indexMapping.AddCustomTokenFilter("customUnicodeCharFilter", customunicodeFilter)
 | 
						|
	if err != nil {
 | 
						|
		return nil, err
 | 
						|
	}
 | 
						|
 | 
						|
	err = indexMapping.AddCustomAnalyzer("customNgramAnalyzer", customNgramAnalyzer)
 | 
						|
	if err != nil {
 | 
						|
		return nil, err
 | 
						|
	}
 | 
						|
 | 
						|
	indexMapping.DefaultAnalyzer = "customNgramAnalyzer"
 | 
						|
	return indexMapping, nil
 | 
						|
}
 |