package dbmodels import ( "errors" "strconv" "strings" "unicode" "github.com/Theodor-Springmann-Stiftung/musenalm/helpers/datatypes" "github.com/pocketbase/dbx" "github.com/pocketbase/pocketbase/core" "golang.org/x/text/cases" "golang.org/x/text/language" ) const ( FTS5_PREFIX = "fts5_" DIVIDER_STR = "; " ) var SERIES_FTS5_FIELDS = []string{ SERIES_TITLE_FIELD, SERIES_PSEUDONYMS_FIELD, REFERENCES_FIELD, ANNOTATION_FIELD, COMMENT_FIELD, } var AGENTS_FTS5_FIELDS = []string{ AGENTS_NAME_FIELD, AGENTS_BIOGRAPHICAL_DATA_FIELD, AGENTS_PSEUDONYMS_FIELD, ANNOTATION_FIELD, COMMENT_FIELD, REFERENCES_FIELD, } var PLACES_FTS5_FIELDS = []string{ PLACES_NAME_FIELD, PLACES_PSEUDONYMS_FIELD, URI_FIELD, ANNOTATION_FIELD, COMMENT_FIELD, } var ITEMS_FTS5_FIELDS = []string{ ITEMS_LOCATION_FIELD, ITEMS_OWNER_FIELD, ITEMS_MEDIA_FIELD, ITEMS_CONDITION_FIELD, ITEMS_IDENTIFIER_FIELD, URI_FIELD, ANNOTATION_FIELD, COMMENT_FIELD, } var ENTRIES_FTS5_FIELDS = []string{ PREFERRED_TITLE_FIELD, VARIANT_TITLE_FIELD, PARALLEL_TITLE_FIELD, TITLE_STMT_FIELD, SUBTITLE_STMT_FIELD, INCIPIT_STMT_FIELD, RESPONSIBILITY_STMT_FIELD, PUBLICATION_STMT_FIELD, PLACE_STMT_FIELD, EDITION_FIELD, YEAR_FIELD, EXTENT_FIELD, DIMENSIONS_FIELD, REFERENCES_FIELD, PLACES_TABLE, AGENTS_TABLE, SERIES_TABLE, MUSENALMID_FIELD, ANNOTATION_FIELD, COMMENT_FIELD, } var CONTENTS_FTS5_FIELDS = []string{ PREFERRED_TITLE_FIELD, VARIANT_TITLE_FIELD, PARALLEL_TITLE_FIELD, TITLE_STMT_FIELD, SUBTITLE_STMT_FIELD, INCIPIT_STMT_FIELD, RESPONSIBILITY_STMT_FIELD, PUBLICATION_STMT_FIELD, PLACE_STMT_FIELD, YEAR_FIELD, EXTENT_FIELD, DIMENSIONS_FIELD, ENTRIES_TABLE, AGENTS_TABLE, MUSENALMID_FIELD, MUSENALM_INHALTE_TYPE_FIELD, ANNOTATION_FIELD, COMMENT_FIELD, } var ErrInvalidQuery = errors.New("invalid input into the search function") type Query struct { Include []string // Phrases that should be matched Exclude []string // Phrases that should not be matched UnsafeI []string // Phrases < 3 characters UnsafeE []string // Phrases < 3 characters excluded } // Parses query strings like // word another "this is a phrase" -notthis aword -alsonotthis -"also not this" // into seperate phrases func NormalizeQuery(query string) Query { query = datatypes.NormalizeString(query) // TODO: how to normalize, which unicode normalization to use? // query = datatypes.RemovePunctuation(query) query = cases.Lower(language.German).String(query) var include []string var exclude []string var unsafeI []string var unsafeE []string isInQuotes := false isExcluded := false var cToken strings.Builder at := func() { if cToken.Len() == 0 { return } t := cToken.String() if len(t) < 3 && isExcluded { unsafeE = append(unsafeE, t) return } else if len(t) < 3 { unsafeI = append(unsafeI, t) return } if len(t) >= 3 && isExcluded { exclude = append(exclude, t) return } else if len(t) >= 3 { include = append(include, t) return } } reset := func() { isInQuotes = false isExcluded = false cToken.Reset() } addToken := func() { at() reset() } for _, r := range query { if r == '"' { if isInQuotes { addToken() } else if cToken.Len() == 0 { isInQuotes = true } // INFO: - is punctuation, so the order of cases is important } else if r == 45 && cToken.Len() == 0 { isExcluded = true } else if unicode.IsSpace(r) && !isInQuotes { addToken() } else if unicode.IsPunct(r) && !isInQuotes { addToken() } else { cToken.WriteRune(r) } } if cToken.Len() > 0 { at() } return Query{ Include: include, Exclude: exclude, UnsafeI: unsafeI, UnsafeE: unsafeE, } } // INFO: Takes in fields and a Query object func IntoQueryRequests(f []string, q Query) []FTS5QueryRequest { ret := []FTS5QueryRequest{} if len(q.Include) > 0 { ret = append(ret, FTS5QueryRequest{ Fields: f, Query: q.Include, OP: OP_AND, }) } if len(q.Exclude) > 0 { ret = append(ret, FTS5QueryRequest{ Fields: f, Query: q.Exclude, OP: OP_NOT, }) } return ret } func FTS5Search(app core.App, table string, mapfq ...FTS5QueryRequest) ([]*FTS5IDQueryResult, error) { if mapfq == nil || len(mapfq) == 0 || table == "" { return nil, ErrInvalidQuery } q := NewFTS5Query().From(table).SelectID() for _, v := range mapfq { for _, que := range v.Query { switch v.OP { case OP_AND: q.AndMatch(v.Fields, que) case OP_OR: q.OrMatch(v.Fields, que) case OP_NOT: q.NotMatch(v.Fields, que) case NONE: q.AndMatch(v.Fields, que) } } } querystring := q.Query() if querystring == "" { return nil, ErrInvalidQuery } res := []*FTS5IDQueryResult{} err := app.DB().NewQuery(querystring).All(&res) if err != nil { return nil, err } return res, nil } func CreateFTS5TableQuery(tablename string, fields ...string) string { if len(fields) == 0 { return "" } str := "CREATE VIRTUAL TABLE IF NOT EXISTS " + FTS5TableName(tablename) + " USING fts5(" + ID_FIELD + ", " for i, f := range fields { str += f if i < len(fields)-1 { str += ", " } } str += ", tokenize = 'trigram')" return str } func CreateFTS5Tables(app core.App) error { err1 := createFTS5Table(app, AGENTS_TABLE, AGENTS_FTS5_FIELDS) err2 := createFTS5Table(app, PLACES_TABLE, PLACES_FTS5_FIELDS) err3 := createFTS5Table(app, SERIES_TABLE, SERIES_FTS5_FIELDS) err4 := createFTS5Table(app, ITEMS_TABLE, ITEMS_FTS5_FIELDS) err5 := createFTS5Table(app, ENTRIES_TABLE, ENTRIES_FTS5_FIELDS) err6 := createFTS5Table(app, CONTENTS_TABLE, CONTENTS_FTS5_FIELDS) return errors.Join(err1, err2, err3, err4, err5, err6) } func DropFTS5Tables(app core.App) error { err1 := dropFTS5Table(app, FTS5TableName(AGENTS_TABLE)) err2 := dropFTS5Table(app, FTS5TableName(PLACES_TABLE)) err3 := dropFTS5Table(app, FTS5TableName(SERIES_TABLE)) err4 := dropFTS5Table(app, FTS5TableName(ITEMS_TABLE)) err5 := dropFTS5Table(app, FTS5TableName(ENTRIES_TABLE)) err6 := dropFTS5Table(app, FTS5TableName(CONTENTS_TABLE)) return errors.Join(err1, err2, err3, err4, err5, err6) } func FTS5TableName(table string) string { return FTS5_PREFIX + table } func createFTS5Table(app core.App, table string, fields []string) error { query := CreateFTS5TableQuery(table, fields...) _, err := app.DB().NewQuery(query).Execute() return err } func dropFTS5Table(app core.App, table string) error { query := "DROP TABLE IF EXISTS " + table _, err := app.DB().NewQuery(query).Execute() return err } func InsertFTS5Agent(app core.App, agent *Agent) error { query := FTS5InsertQuery(app, AGENTS_TABLE, AGENTS_FTS5_FIELDS) return BulkInsertFTS5Agent(query, agent) } func BulkInsertFTS5Agent(query *dbx.Query, agent *Agent) error { return InsertFTS5Record( query, agent.Id, AGENTS_FTS5_FIELDS, FTS5ValuesAgent(agent)..., ) } func InsertFTS5Place(app core.App, place *Place) error { query := FTS5InsertQuery(app, PLACES_TABLE, PLACES_FTS5_FIELDS) return BulkInsertFTS5Place(query, place) } func BulkInsertFTS5Place(query *dbx.Query, place *Place) error { return InsertFTS5Record( query, place.Id, PLACES_FTS5_FIELDS, FTS5ValuesPlace(place)..., ) } func InsertFTS5Series(app core.App, series *Series) error { query := FTS5InsertQuery(app, SERIES_TABLE, SERIES_FTS5_FIELDS) return BulkInsertFTS5Series(query, series) } func BulkInsertFTS5Series(query *dbx.Query, series *Series) error { return InsertFTS5Record( query, series.Id, SERIES_FTS5_FIELDS, FTS5ValuesSeries(series)..., ) } func InsertFTS5Item(app core.App, item *Item) error { query := FTS5InsertQuery(app, ITEMS_TABLE, ITEMS_FTS5_FIELDS) return BulkInsertFTS5Item(query, item) } func BulkInsertFTS5Item(query *dbx.Query, item *Item) error { return InsertFTS5Record( query, item.Id, ITEMS_FTS5_FIELDS, FTS5ValuesItem(item)..., ) } func InsertFTS5Entry(app core.App, entry *Entry, places []*Place, agents []*Agent, series []*Series) error { query := FTS5InsertQuery(app, ENTRIES_TABLE, ENTRIES_FTS5_FIELDS) return BulkInsertFTS5Entry(query, entry, places, agents, series) } func BulkInsertFTS5Entry(query *dbx.Query, entry *Entry, places []*Place, agents []*Agent, series []*Series) error { return InsertFTS5Record( query, entry.Id, ENTRIES_FTS5_FIELDS, FTS5ValuesEntry(entry, places, agents, series)..., ) } func InsertFTS5Content(app core.App, content *Content, entry *Entry, agents []*Agent) error { query := FTS5InsertQuery(app, CONTENTS_TABLE, CONTENTS_FTS5_FIELDS) return BulkInsertFTS5Content(query, content, entry, agents) } func BulkInsertFTS5Content(query *dbx.Query, content *Content, entry *Entry, agents []*Agent) error { return InsertFTS5Record( query, content.Id, CONTENTS_FTS5_FIELDS, FTS5ValuesContent(content, entry, agents)..., ) } func FTS5ValuesContent(content *Content, entry *Entry, agents []*Agent) []string { agentstring := "" if agents != nil { agentstring = datatypes.SliceJoin(agents, DIVIDER_STR, func(agent *Agent) string { if agent == nil { return "" } return agent.Name() }) } entrystring := entry.PreferredTitle() if entry.Year() != 0 { entrystring += "; " + strconv.Itoa(entry.Year()) } else { entrystring += "; [o.J.]" } typestring := "" for _, typ := range content.MusenalmType() { typestring += typ + " " } return []string{ content.PreferredTitle(), content.VariantTitle(), content.ParallelTitle(), content.TitleStmt(), content.SubtitleStmt(), content.IncipitStmt(), content.ResponsibilityStmt(), content.PublicationStmt(), content.PlaceStmt(), strconv.Itoa(content.Year()), content.Extent(), content.Dimensions(), entrystring, agentstring, strconv.Itoa(content.MusenalmID()), typestring, datatypes.DeleteTags(content.Annotation()), datatypes.DeleteTags(content.Comment()), } } func FTS5ValuesEntry(entry *Entry, places []*Place, agents []*Agent, series []*Series) []string { placestring := "" if places != nil { placestring = datatypes.SliceJoin(places, DIVIDER_STR, func(place *Place) string { if place == nil { return "" } return place.Name() }) } agentstring := "" if agents != nil { agentstring = datatypes.SliceJoin(agents, DIVIDER_STR, func(agent *Agent) string { if agent == nil { return "" } return agent.Name() }) } seriesstring := "" if series != nil { seriesstring = datatypes.SliceJoin(series, DIVIDER_STR, func(series *Series) string { if series == nil { return "" } return series.Title() }) } return []string{ entry.PreferredTitle(), entry.VariantTitle(), entry.ParallelTitle(), entry.TitleStmt(), entry.SubtitleStmt(), entry.IncipitStmt(), entry.ResponsibilityStmt(), entry.PublicationStmt(), entry.PlaceStmt(), entry.Edition(), strconv.Itoa(entry.Year()), entry.Extent(), entry.Dimensions(), entry.References(), placestring, agentstring, seriesstring, strconv.Itoa(entry.MusenalmID()), datatypes.DeleteTags(entry.Annotation()), datatypes.DeleteTags(entry.Comment()), } } func FTS5ValuesItem(item *Item) []string { return []string{ item.Location(), item.Owner(), strings.Join(item.Media(), DIVIDER_STR), item.Condition(), item.Identifier(), item.Uri(), datatypes.DeleteTags(item.Annotation()), datatypes.DeleteTags(item.Comment()), } } func FTS5ValuesSeries(series *Series) []string { return []string{ series.Title(), series.Pseudonyms(), series.References(), datatypes.DeleteTags(series.Annotation()), datatypes.DeleteTags(series.Comment()), } } func FTS5ValuesPlace(place *Place) []string { return []string{ place.Name(), place.Pseudonyms(), place.URI(), datatypes.DeleteTags(place.Annotation()), datatypes.DeleteTags(place.Comment()), } } func FTS5ValuesAgent(agent *Agent) []string { return []string{ agent.Name(), agent.BiographicalData(), agent.Pseudonyms(), datatypes.DeleteTags(agent.Annotation()), datatypes.DeleteTags(agent.Comment()), agent.References(), } } func FTS5ValuesItems(item *Item) []string { return []string{ item.Location(), item.Owner(), strings.Join(item.Media(), DIVIDER_STR), item.Condition(), item.Identifier(), item.Uri(), datatypes.DeleteTags(item.Annotation()), datatypes.DeleteTags(item.Comment()), } } func FTS5InsertQuery(app core.App, table string, fields []string) *dbx.Query { tn := FTS5TableName(table) query := "INSERT INTO " + tn + " (" + ID_FIELD + ", " + strings.Join(fields, ", ") + ") VALUES ({:" + ID_FIELD + "}, {:" + strings.Join(fields, "}, {:") + "})" return app.DB().NewQuery(query).Prepare() } func InsertFTS5Record(query *dbx.Query, id string, fields []string, values ...string) error { if len(fields) != len(values) { return errors.New("fields and values must have the same length") } params := dbx.Params{ID_FIELD: id} for i, v := range fields { params[v] = values[i] } _, err := query.Bind(params).Execute() return err } func DeleteFTS5Data(app core.App) error { err1 := deleteTableContents(app, FTS5TableName(AGENTS_TABLE)) err2 := deleteTableContents(app, FTS5TableName(SERIES_TABLE)) err3 := deleteTableContents(app, FTS5TableName(ENTRIES_TABLE)) err4 := deleteTableContents(app, FTS5TableName(PLACES_TABLE)) err5 := deleteTableContents(app, FTS5TableName(ITEMS_TABLE)) err6 := deleteTableContents(app, FTS5TableName(CONTENTS_TABLE)) return errors.Join(err1, err2, err3, err4, err5, err6) } func deleteTableContents(app core.App, table string) error { _, err := app.DB().NewQuery("DELETE FROM " + table).Execute() if err != nil { return err } return nil } func DeleteFTS5Entry(app core.App, entryID string) error { query := "DELETE FROM " + FTS5TableName(ENTRIES_TABLE) + " WHERE " + ID_FIELD + " = {:id}" _, err := app.DB().NewQuery(query).Bind(dbx.Params{"id": entryID}).Execute() return err } func UpdateFTS5Entry(app core.App, entry *Entry, places []*Place, agents []*Agent, series []*Series) error { if err := DeleteFTS5Entry(app, entry.Id); err != nil { return err } return InsertFTS5Entry(app, entry, places, agents, series) } func UpdateFTS5EntryAndRelatedContents(app core.App, entry *Entry, places []*Place, agents []*Agent, series []*Series) error { // Update the entry itself if err := UpdateFTS5Entry(app, entry, places, agents, series); err != nil { return err } // Update all contents that belong to this entry contents, err := Contents_Entry(app, entry.Id) if err == nil { for _, content := range contents { // Load all agents for this content contentAgents := []*Agent{} agentRels, err := RContentsAgents_Content(app, content.Id) if err == nil { for _, rel := range agentRels { agent, err := Agents_ID(app, rel.Agent()) if err == nil && agent != nil { contentAgents = append(contentAgents, agent) } } } if err := UpdateFTS5Content(app, content, entry, contentAgents); err != nil { app.Logger().Error("Failed to update FTS5 for content", "content_id", content.Id, "error", err) } } } return nil } func DeleteFTS5Agent(app core.App, agentID string) error { query := "DELETE FROM " + FTS5TableName(AGENTS_TABLE) + " WHERE " + ID_FIELD + " = {:id}" _, err := app.DB().NewQuery(query).Bind(dbx.Params{"id": agentID}).Execute() return err } func UpdateFTS5Agent(app core.App, agent *Agent) error { if err := DeleteFTS5Agent(app, agent.Id); err != nil { return err } return InsertFTS5Agent(app, agent) } func UpdateFTS5AgentAndRelated(app core.App, agent *Agent) error { // Update the agent itself if err := UpdateFTS5Agent(app, agent); err != nil { return err } // Update all entries related to this agent entryRelations, err := REntriesAgents_Agent(app, agent.Id) if err == nil { for _, relation := range entryRelations { entry, err := Entries_ID(app, relation.Entry()) if err != nil { app.Logger().Error("Failed to load entry for FTS5 update", "entry_id", relation.Entry(), "error", err) continue } // Load all related data for this entry places := []*Place{} for _, placeID := range entry.Places() { place, err := Places_ID(app, placeID) if err == nil && place != nil { places = append(places, place) } } agents := []*Agent{} agentRels, err := REntriesAgents_Entry(app, entry.Id) if err == nil { for _, rel := range agentRels { ag, err := Agents_ID(app, rel.Agent()) if err == nil && ag != nil { agents = append(agents, ag) } } } series := []*Series{} seriesRels, err := REntriesSeries_Entry(app, entry.Id) if err == nil { for _, rel := range seriesRels { s, err := Series_ID(app, rel.Series()) if err == nil && s != nil { series = append(series, s) } } } if err := UpdateFTS5Entry(app, entry, places, agents, series); err != nil { app.Logger().Error("Failed to update FTS5 for entry", "entry_id", entry.Id, "error", err) } } } // Update all contents related to this agent contentRelations, err := RContentsAgents_Agent(app, agent.Id) if err == nil { for _, relation := range contentRelations { contents, err := Contents_IDs(app, []any{relation.Content()}) if err != nil || len(contents) == 0 { app.Logger().Error("Failed to load content for FTS5 update", "content_id", relation.Content(), "error", err) continue } content := contents[0] // Load the parent entry entry, err := Entries_ID(app, content.Entry()) if err != nil { app.Logger().Error("Failed to load entry for content FTS5 update", "entry_id", content.Entry(), "error", err) continue } // Load all agents for this content agents := []*Agent{} agentRels, err := RContentsAgents_Content(app, content.Id) if err == nil { for _, rel := range agentRels { ag, err := Agents_ID(app, rel.Agent()) if err == nil && ag != nil { agents = append(agents, ag) } } } if err := UpdateFTS5Content(app, content, entry, agents); err != nil { app.Logger().Error("Failed to update FTS5 for content", "content_id", content.Id, "error", err) } } } return nil } func DeleteFTS5Content(app core.App, contentID string) error { query := "DELETE FROM " + FTS5TableName(CONTENTS_TABLE) + " WHERE " + ID_FIELD + " = {:id}" _, err := app.DB().NewQuery(query).Bind(dbx.Params{"id": contentID}).Execute() return err } func UpdateFTS5Content(app core.App, content *Content, entry *Entry, agents []*Agent) error { if err := DeleteFTS5Content(app, content.Id); err != nil { return err } return InsertFTS5Content(app, content, entry, agents) } func DeleteFTS5Place(app core.App, placeID string) error { query := "DELETE FROM " + FTS5TableName(PLACES_TABLE) + " WHERE " + ID_FIELD + " = {:id}" _, err := app.DB().NewQuery(query).Bind(dbx.Params{"id": placeID}).Execute() return err } func UpdateFTS5Place(app core.App, place *Place) error { if err := DeleteFTS5Place(app, place.Id); err != nil { return err } return InsertFTS5Place(app, place) } func UpdateFTS5PlaceAndRelatedEntries(app core.App, place *Place) error { // Update the place itself if err := UpdateFTS5Place(app, place); err != nil { return err } // Find all entries that reference this place entries := []*Entry{} err := app.RecordQuery(ENTRIES_TABLE). Where(dbx.NewExp( PLACES_TABLE+" = {:id} OR (json_valid("+PLACES_TABLE+") = 1 AND EXISTS (SELECT 1 FROM json_each("+PLACES_TABLE+") WHERE value = {:id}))", dbx.Params{"id": place.Id}, )). All(&entries) if err == nil { for _, entry := range entries { // Load all related data for this entry places := []*Place{} for _, placeID := range entry.Places() { p, err := Places_ID(app, placeID) if err == nil && p != nil { places = append(places, p) } } agents := []*Agent{} agentRels, err := REntriesAgents_Entry(app, entry.Id) if err == nil { for _, rel := range agentRels { agent, err := Agents_ID(app, rel.Agent()) if err == nil && agent != nil { agents = append(agents, agent) } } } series := []*Series{} seriesRels, err := REntriesSeries_Entry(app, entry.Id) if err == nil { for _, rel := range seriesRels { s, err := Series_ID(app, rel.Series()) if err == nil && s != nil { series = append(series, s) } } } // Only update the entry itself, not contents (contents don't store place data) if err := UpdateFTS5Entry(app, entry, places, agents, series); err != nil { app.Logger().Error("Failed to update FTS5 for entry", "entry_id", entry.Id, "error", err) } } } return nil } func DeleteFTS5Series(app core.App, seriesID string) error { query := "DELETE FROM " + FTS5TableName(SERIES_TABLE) + " WHERE " + ID_FIELD + " = {:id}" _, err := app.DB().NewQuery(query).Bind(dbx.Params{"id": seriesID}).Execute() return err } func UpdateFTS5Series(app core.App, series *Series) error { if err := DeleteFTS5Series(app, series.Id); err != nil { return err } return InsertFTS5Series(app, series) } func UpdateFTS5SeriesAndRelatedEntries(app core.App, series *Series) error { // Update the series itself if err := UpdateFTS5Series(app, series); err != nil { return err } // Find all entries that reference this series relations, err := REntriesSeries_Seriess(app, []any{series.Id}) if err == nil { for _, relation := range relations { entry, err := Entries_ID(app, relation.Entry()) if err != nil { app.Logger().Error("Failed to load entry for FTS5 update", "entry_id", relation.Entry(), "error", err) continue } // Load all related data for this entry places := []*Place{} for _, placeID := range entry.Places() { place, err := Places_ID(app, placeID) if err == nil && place != nil { places = append(places, place) } } agents := []*Agent{} agentRels, err := REntriesAgents_Entry(app, entry.Id) if err == nil { for _, rel := range agentRels { agent, err := Agents_ID(app, rel.Agent()) if err == nil && agent != nil { agents = append(agents, agent) } } } allSeries := []*Series{} seriesRels, err := REntriesSeries_Entry(app, entry.Id) if err == nil { for _, rel := range seriesRels { s, err := Series_ID(app, rel.Series()) if err == nil && s != nil { allSeries = append(allSeries, s) } } } // Only update the entry itself, not contents (contents don't store series data) if err := UpdateFTS5Entry(app, entry, places, agents, allSeries); err != nil { app.Logger().Error("Failed to update FTS5 for entry", "entry_id", entry.Id, "error", err) } } } return nil }