mirror of
				https://github.com/Theodor-Springmann-Stiftung/kgpz_web.git
				synced 2025-10-29 09:05:30 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			191 lines
		
	
	
		
			4.3 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
			
		
		
	
	
			191 lines
		
	
	
		
			4.3 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
| package searchprovider
 | |
| 
 | |
| import (
 | |
| 	"errors"
 | |
| 	"os"
 | |
| 	"path/filepath"
 | |
| 	"sync"
 | |
| 
 | |
| 	"github.com/Theodor-Springmann-Stiftung/kgpz_web/xmlmodels"
 | |
| 	"github.com/blevesearch/bleve/v2"
 | |
| 	"github.com/blevesearch/bleve/v2/analysis/analyzer/custom"
 | |
| 	"github.com/blevesearch/bleve/v2/analysis/char/html"
 | |
| 	"github.com/blevesearch/bleve/v2/analysis/char/regexp"
 | |
| 	"github.com/blevesearch/bleve/v2/analysis/token/lowercase"
 | |
| 	"github.com/blevesearch/bleve/v2/analysis/token/ngram"
 | |
| 	"github.com/blevesearch/bleve/v2/analysis/token/unicodenorm"
 | |
| 	"github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode"
 | |
| 	"github.com/blevesearch/bleve/v2/mapping"
 | |
| )
 | |
| 
 | |
| var NoKeyError = errors.New("Missing ID key.")
 | |
| var NoLibError = errors.New("Missing library.")
 | |
| 
 | |
| type ISearchable interface {
 | |
| 	Keys() []string
 | |
| 	Readable(lib *xmlmodels.Library) map[string]interface{}
 | |
| 	Type() string
 | |
| }
 | |
| 
 | |
| type SearchProvider struct {
 | |
| 	indeces  sync.Map
 | |
| 	basepath string
 | |
| }
 | |
| 
 | |
| func NewSearchProvider(basepath string) (*SearchProvider, error) {
 | |
| 	sp := &SearchProvider{basepath: basepath}
 | |
| 	return sp, nil
 | |
| }
 | |
| 
 | |
| func (sp *SearchProvider) Index(item ISearchable, lib *xmlmodels.Library) error {
 | |
| 	keys := item.Keys()
 | |
| 	if len(keys) == 0 {
 | |
| 		return NoKeyError
 | |
| 	}
 | |
| 	if lib == nil {
 | |
| 		return NoLibError
 | |
| 	}
 | |
| 
 | |
| 	i, err := sp.FindCreateIndex(item.Type())
 | |
| 	if err != nil {
 | |
| 		return err
 | |
| 	}
 | |
| 
 | |
| 	read := item.Readable(lib)
 | |
| 	return i.Index(keys[0], read)
 | |
| }
 | |
| 
 | |
| // TODO: this is sloppy
 | |
| func (sp *SearchProvider) LoadIndeces() error {
 | |
| 	files, err := filepath.Glob(filepath.Join(sp.basepath, "*.bleve"))
 | |
| 	if err != nil {
 | |
| 		return err
 | |
| 	}
 | |
| 
 | |
| 	if len(files) == 0 {
 | |
| 		return errors.New("No indeces found.")
 | |
| 	}
 | |
| 
 | |
| 	for _, file := range files {
 | |
| 		index, err := bleve.Open(file)
 | |
| 		if err != nil {
 | |
| 			return err
 | |
| 		}
 | |
| 		typ := filepath.Base(file)
 | |
| 		typ = typ[:len(typ)-6]
 | |
| 		sp.indeces.Store(typ, index)
 | |
| 	}
 | |
| 	return nil
 | |
| }
 | |
| 
 | |
| func (sp *SearchProvider) FindCreateIndex(typ string) (bleve.Index, error) {
 | |
| 	index, ok := sp.indeces.Load(typ)
 | |
| 	if ok {
 | |
| 		i := index.(bleve.Index)
 | |
| 		return i, nil
 | |
| 	}
 | |
| 
 | |
| 	fp := filepath.Join(sp.basepath, typ+".bleve")
 | |
| 	ind, err := bleve.Open(fp)
 | |
| 	if err == bleve.ErrorIndexPathDoesNotExist {
 | |
| 		mapping, err := default_mapping()
 | |
| 		if err != nil {
 | |
| 			return nil, err
 | |
| 		}
 | |
| 		ind, err = bleve.New(filepath.Join(fp), mapping)
 | |
| 		if err != nil {
 | |
| 			return nil, err
 | |
| 		}
 | |
| 	}
 | |
| 	sp.indeces.Store(typ, ind)
 | |
| 
 | |
| 	return ind, nil
 | |
| }
 | |
| 
 | |
| func (sp *SearchProvider) GetIndex(typ string) (bleve.Index, error) {
 | |
| 	index, ok := sp.indeces.Load(typ)
 | |
| 	if !ok {
 | |
| 		return nil, errors.New("Index not found.")
 | |
| 	}
 | |
| 
 | |
| 	i := index.(bleve.Index)
 | |
| 	return i, nil
 | |
| }
 | |
| 
 | |
| func default_mapping() (*mapping.IndexMappingImpl, error) {
 | |
| 	indexMapping := bleve.NewIndexMapping()
 | |
| 
 | |
| 	customunicodeFilter := map[string]interface{}{
 | |
| 		"type": unicodenorm.Name,
 | |
| 		"form": unicodenorm.NFKD,
 | |
| 	}
 | |
| 
 | |
| 	customCharFilterConfig := map[string]interface{}{
 | |
| 		"type":    regexp.Name,
 | |
| 		"regexp":  `[[:punct:]]+`, // Removes all punctuation characters
 | |
| 		"replace": "",
 | |
| 	}
 | |
| 
 | |
| 	customNgramFilterConfig := map[string]interface{}{
 | |
| 		"type": ngram.Name,
 | |
| 		"min":  1,  // minimum n-gram size
 | |
| 		"max":  20, // maximum n-gram size
 | |
| 	}
 | |
| 
 | |
| 	customNgramAnalyzer := map[string]interface{}{
 | |
| 		"type":          custom.Name,
 | |
| 		"tokenizer":     unicode.Name,
 | |
| 		"char_filters":  []string{"removePunctuation", html.Name},
 | |
| 		"token_filters": []string{lowercase.Name, "customNgramFilter", "customUnicodeCharFilter"},
 | |
| 	}
 | |
| 
 | |
| 	err := indexMapping.AddCustomTokenFilter("customNgramFilter", customNgramFilterConfig)
 | |
| 	if err != nil {
 | |
| 		return nil, err
 | |
| 	}
 | |
| 
 | |
| 	err = indexMapping.AddCustomCharFilter("removePunctuation", customCharFilterConfig)
 | |
| 	if err != nil {
 | |
| 		return nil, err
 | |
| 	}
 | |
| 
 | |
| 	err = indexMapping.AddCustomTokenFilter("customUnicodeCharFilter", customunicodeFilter)
 | |
| 	if err != nil {
 | |
| 		return nil, err
 | |
| 	}
 | |
| 
 | |
| 	err = indexMapping.AddCustomAnalyzer("customNgramAnalyzer", customNgramAnalyzer)
 | |
| 	if err != nil {
 | |
| 		return nil, err
 | |
| 	}
 | |
| 
 | |
| 	indexMapping.DefaultAnalyzer = "customNgramAnalyzer"
 | |
| 	return indexMapping, nil
 | |
| }
 | |
| 
 | |
| // ClearAllIndices closes and removes all search indices
 | |
| func (sp *SearchProvider) ClearAllIndices() error {
 | |
| 	// Close all open indices
 | |
| 	sp.indeces.Range(func(key, value interface{}) bool {
 | |
| 		if index, ok := value.(bleve.Index); ok {
 | |
| 			index.Close()
 | |
| 		}
 | |
| 		return true
 | |
| 	})
 | |
| 
 | |
| 	// Clear the sync.Map
 | |
| 	sp.indeces = sync.Map{}
 | |
| 
 | |
| 	// Remove all .bleve directories from disk
 | |
| 	files, err := filepath.Glob(filepath.Join(sp.basepath, "*.bleve"))
 | |
| 	if err != nil {
 | |
| 		return err
 | |
| 	}
 | |
| 
 | |
| 	for _, file := range files {
 | |
| 		os.RemoveAll(file)
 | |
| 	}
 | |
| 
 | |
| 	return nil
 | |
| }
 | 
