diff --git a/HaWeb/Models/CollectedItem.cs b/HaWeb/Models/CollectedItem.cs index 2d4054b..8058e3e 100644 --- a/HaWeb/Models/CollectedItem.cs +++ b/HaWeb/Models/CollectedItem.cs @@ -7,14 +7,23 @@ public class CollectedItem : ISearchable { public string Index { get; private set; } public string Collection { get; private set; } public string? SearchText { get; private set; } + public Dictionary? Fields { get; private set; } public XElement ELement { get; private set; } public IXMLRoot Root { get; private set; } - public CollectedItem(string index, XElement element, IXMLRoot root, string collection, string? searchtext = null) { + public CollectedItem( + string index, + XElement element, + IXMLRoot root, + string collection, + Dictionary? fields, + string? searchtext = null + ) { this.Index = index; this.SearchText = searchtext; this.Collection = collection; this.Root = root; this.ELement = element; + this.Fields = fields; } } \ No newline at end of file diff --git a/HaWeb/Models/ItemsCollection.cs b/HaWeb/Models/ItemsCollection.cs new file mode 100644 index 0000000..d77438d --- /dev/null +++ b/HaWeb/Models/ItemsCollection.cs @@ -0,0 +1,47 @@ +namespace HaWeb.Models; +using HaWeb.XMLParser; + +public class ItemsCollection { + public string Name { get; private set; } + public Dictionary Items { get; private set; } + public bool Searchable { get; private set; } + public IXMLRoot Root { get; private set; } + public Func, Dictionary>?>? GroupingsGeneration { get; private set; } + public Func, Dictionary>?>? SortingsGeneration { get; private set; } + + public Dictionary>? Groupings { get; private set; } + public Dictionary>? Sortings { get; private set; } + + public ItemsCollection( + string name, + bool searchable, + IXMLRoot root, + Func, Dictionary>?>? groupingsFunc = null, + Func, Dictionary>?>? sortingsFunc = null + ) { + this.Name = name; + this.Searchable = searchable; + this.Root = root; + this.GroupingsGeneration = groupingsFunc; + this.SortingsGeneration = sortingsFunc; + this.Items = new Dictionary(); + } + + public void GenerateGroupings( + Func, Dictionary>?>? groupingsFunc = null + ) { + if (groupingsFunc != null) + this.GroupingsGeneration = groupingsFunc; + if (this.GroupingsGeneration != null && this.Items.Any()) + this.Groupings = GroupingsGeneration(this.Items.Values.ToList()); + } + + public void GenerateSortings( + Func, Dictionary>?>? sortingsFunc = null + ) { + if (sortingsFunc != null) + this.SortingsGeneration = sortingsFunc; + if (this.SortingsGeneration != null && this.Items.Any()) + this.Sortings = SortingsGeneration(this.Items.Values.ToList()); + } +} diff --git a/HaWeb/SearchHelpers/ISearchable.cs b/HaWeb/SearchHelpers/ISearchable.cs index 5a58e92..59e300c 100644 --- a/HaWeb/SearchHelpers/ISearchable.cs +++ b/HaWeb/SearchHelpers/ISearchable.cs @@ -2,5 +2,5 @@ namespace HaWeb.SearchHelpers; public interface ISearchable { public string Index { get; } - public string SearchText { get; } + public string? SearchText { get; } } \ No newline at end of file diff --git a/HaWeb/SearchHelpers/SearchRules.cs b/HaWeb/SearchHelpers/SearchRules.cs index bc24fd8..b2a6513 100644 --- a/HaWeb/SearchHelpers/SearchRules.cs +++ b/HaWeb/SearchHelpers/SearchRules.cs @@ -16,7 +16,7 @@ public class SearchRules { sb.Append(t); var sw = reader.State.SearchWord; if (sb.Length >= sw.Length) { - if (sb.ToString().ToLower().Contains(sw)) { + if (sb.ToString().ToUpperInvariant().Contains(sw)) { if (reader.State.Results == null) reader.State.Results = new List<(string Page, string Line)>(); reader.State.Results.Add((reader.CurrentPage, reader.CurrentLine)); diff --git a/HaWeb/SearchHelpers/StringHelpers.cs b/HaWeb/SearchHelpers/StringHelpers.cs index 01979b1..17f42cb 100644 --- a/HaWeb/SearchHelpers/StringHelpers.cs +++ b/HaWeb/SearchHelpers/StringHelpers.cs @@ -29,7 +29,7 @@ public static class StringHelpers { } } else { skipped = false; - if (toLower) output.Append(char.ToLower(c)); + if (toLower) output.Append(char.ToUpperInvariant(c)); else output.Append(c); } } diff --git a/HaWeb/Settings/XMLRoots/CommentRoot.cs b/HaWeb/Settings/XMLRoots/CommentRoot.cs index 4168455..e9ffe8b 100644 --- a/HaWeb/Settings/XMLRoots/CommentRoot.cs +++ b/HaWeb/Settings/XMLRoots/CommentRoot.cs @@ -7,15 +7,27 @@ public class CommentRoot : HaWeb.XMLParser.IXMLRoot { public string Type { get; } = "Register"; public string Prefix { get; } = "register"; public string[] XPathContainer { get; } = { ".//data//kommentare/kommcat", ".//kommentare/kommcat" }; - public (string Key, string xPath, Func KeyFunc, bool Searchable)[]? XPathCollection { get; } = { - ("comments-register", "/opus/data/kommentare/kommcat[@value='neuzeit']/kommentar", GetKey, true), - ("comments-register", "/opus/kommentare/kommcat[@value='neuzeit']/kommentar", GetKey, true), - ("comments-edition", "/opus/data/kommentare/kommcat[@value='editionen']/kommentar", GetKey, true), - ("comments-edition", "/opus/kommentare/kommcat[@value='editionen']/kommentar", GetKey, true), - ("comments-forschung", "/opus/data/kommentare/kommcat[@value='forschung']/kommentar", GetKey, true), - ("comments-forschung", "/opus/kommentare/kommcat[@value='forschung']/kommentar", GetKey, true), - ("comments-bibel", "/opus/data/kommentare/kommcat[@value='bibel']/kommentar", GetKey, false), - ("comments-bibel", "/opus/kommentare/kommcat[@value='bibel']/kommentar", GetKey, false), + public ( + string Key, + string xPath, + Func GenerateKey, + Func?>? GenerateDataFields, + Func, Dictionary>?>? GroupingsGeneration, + Func, Dictionary>?>? SortingsGeneration, + bool Searchable + )[]? Collections { get; } = { + ("comments-register", "/opus/data/kommentare/kommcat[@value='neuzeit']/kommentar", GetKey, null, null, null, true), + ("comments-register", "/opus/kommentare/kommcat[@value='neuzeit']/kommentar", GetKey, null, null, null, true), + ("subcomments-register", "/opus/data/kommentare/kommcat[@value='neuzeit']/kommentar/subsection", GetKey, null, null, null, true), + ("subcomments-register", "/opus/kommentare/kommcat[@value='neuzeit']/kommentar/subsection", GetKey, null, null, null, true), + ("comments-edition", "/opus/data/kommentare/kommcat[@value='editionen']/kommentar", GetKey, null, null, null, true), + ("comments-edition", "/opus/kommentare/kommcat[@value='editionen']/kommentar", GetKey, null, null, null, true), + ("comments-forschung", "/opus/data/kommentare/kommcat[@value='forschung']/kommentar", GetKey, null, null, null, true), + ("comments-forschung", "/opus/kommentare/kommcat[@value='forschung']/kommentar", GetKey, null, null, null, true), + ("comments-bibel", "/opus/data/kommentare/kommcat[@value='bibel']/kommentar", GetKey, null, null, null, false), + ("comments-bibel", "/opus/kommentare/kommcat[@value='bibel']/kommentar", GetKey, null, null, null, false), + ("subcomments-bibel", "/opus/data/kommentare/kommcat[@value='bibel']/kommentar/subsection", GetKey, null, null, null, false), + ("subcomments-bibel", "/opus/kommentare/kommcat[@value='bibel']/kommentar/subsection", GetKey, null, null, null, false), }; public Predicate IsCollectedObject { get; } = (elem) => { diff --git a/HaWeb/Settings/XMLRoots/DescriptionsRoot.cs b/HaWeb/Settings/XMLRoots/DescriptionsRoot.cs index 981cf6d..3b8eb5e 100644 --- a/HaWeb/Settings/XMLRoots/DescriptionsRoot.cs +++ b/HaWeb/Settings/XMLRoots/DescriptionsRoot.cs @@ -7,9 +7,17 @@ public class DescriptionsRoot : HaWeb.XMLParser.IXMLRoot { public string Type { get; } = "Metadaten"; public string Prefix { get; } = "metadaten"; public string[] XPathContainer { get; } = { ".//data/descriptions", ".//descriptions" }; - public (string Key, string xPath, Func KeyFunc, bool Searchable)[]? XPathCollection { get; } = { - ("metas", "/opus/descriptions/letterDesc", GetKey, false), - ("metas", "/opus/data/descriptions/letterDesc", GetKey, false) + public ( + string Key, + string xPath, + Func GenerateKey, + Func?>? GenerateDataFields, + Func, Dictionary>?>? GroupingsGeneration, + Func, Dictionary>?>? SortingsGeneration, + bool Searchable + )[]? Collections { get; } = { + ("metas", "/opus/descriptions/letterDesc", GetKey, null, null, null, false), + ("metas", "/opus/data/descriptions/letterDesc", GetKey, null, null, null, false) }; public Predicate IsCollectedObject { get; } = (elem) => { diff --git a/HaWeb/Settings/XMLRoots/DocumentRoot.cs b/HaWeb/Settings/XMLRoots/DocumentRoot.cs index 569e804..8eb9774 100644 --- a/HaWeb/Settings/XMLRoots/DocumentRoot.cs +++ b/HaWeb/Settings/XMLRoots/DocumentRoot.cs @@ -8,9 +8,17 @@ public class DocumentRoot : HaWeb.XMLParser.IXMLRoot { public string Type { get; } = "Brieftext"; public string Prefix { get; } = "brieftext"; public string[] XPathContainer { get; } = { ".//data/document", ".//document" }; - public (string Key, string xPath, Func KeyFunc, bool Searchable)[]? XPathCollection { get; } = { - ("letters", "/opus/data/document/letterText", GetKey, true), - ("letters", "/opus/document/letterText", GetKey, true) + public ( + string Key, + string xPath, + Func GenerateKey, + Func?>? GenerateDataFields, + Func, Dictionary>?>? GroupingsGeneration, + Func, Dictionary>?>? SortingsGeneration, + bool Searchable + )[]? Collections { get; } = { + ("letters", "/opus/data/document/letterText", GetKey, null, null, null, true), + ("letters", "/opus/document/letterText", GetKey, null, null, null, true) }; public Predicate IsCollectedObject { get; } = (elem) => { diff --git a/HaWeb/Settings/XMLRoots/EditsRoot.cs b/HaWeb/Settings/XMLRoots/EditsRoot.cs index 1fa1faf..d26f0dd 100644 --- a/HaWeb/Settings/XMLRoots/EditsRoot.cs +++ b/HaWeb/Settings/XMLRoots/EditsRoot.cs @@ -7,9 +7,17 @@ public class EditsRoot : HaWeb.XMLParser.IXMLRoot { public string Type { get; } = "Texteingriffe"; public string Prefix { get; } = "texteingriffe"; public string[] XPathContainer { get; } = { ".//data/edits", ".//edits" }; - public (string Key, string xPath, Func KeyFunc, bool Searchable)[]? XPathCollection { get; } = { - ("edits", "/data/edits/editreason", GetKey, true), - ("edits", "/edits/editreason", GetKey, true) + public ( + string Key, + string xPath, + Func GenerateKey, + Func?>? GenerateDataFields, + Func, Dictionary>?>? GroupingsGeneration, + Func, Dictionary>?>? SortingsGeneration, + bool Searchable + )[]? Collections { get; } = { + ("edits", "/data/edits/editreason", GetKey, null, null, null, true), + ("edits", "/edits/editreason", GetKey, null, null, null, true) }; public Predicate IsCollectedObject { get; } = (elem) => { diff --git a/HaWeb/Settings/XMLRoots/MarginalsRoot.cs b/HaWeb/Settings/XMLRoots/MarginalsRoot.cs index e847c34..d714358 100644 --- a/HaWeb/Settings/XMLRoots/MarginalsRoot.cs +++ b/HaWeb/Settings/XMLRoots/MarginalsRoot.cs @@ -7,9 +7,17 @@ public class MarginalsRoot : HaWeb.XMLParser.IXMLRoot { public string Type { get; } = "Stellenkommentar"; public string Prefix { get; } = "stellenkommentar"; public string[] XPathContainer { get; } = { ".//data/marginalien", ".//marginalien" }; - public (string Key, string xPath, Func KeyFunc, bool Searchable)[]? XPathCollection { get; } = { - ("marginals", "/data/marginalien/marginal", GetKey, true), - ("marginals", "/marginalien/marginal", GetKey, true) + public ( + string Key, + string xPath, + Func GenerateKey, + Func?>? GenerateDataFields, + Func, Dictionary>?>? GroupingsGeneration, + Func, Dictionary>?>? SortingsGeneration, + bool Searchable + )[]? Collections { get; } = { + ("marginals", "/data/marginalien/marginal", GetKey, null, null, null, true), + ("marginals", "/marginalien/marginal", GetKey, null, null, null, true) }; public Predicate IsCollectedObject { get; } = (elem) => { diff --git a/HaWeb/Settings/XMLRoots/ReferencesRoot.cs b/HaWeb/Settings/XMLRoots/ReferencesRoot.cs index d04a534..1abac90 100644 --- a/HaWeb/Settings/XMLRoots/ReferencesRoot.cs +++ b/HaWeb/Settings/XMLRoots/ReferencesRoot.cs @@ -7,13 +7,21 @@ public class ReferencesRoot : HaWeb.XMLParser.IXMLRoot { public string Type { get; } = "Personen / Orte"; public string Prefix { get; } = "personenorte"; public string[] XPathContainer { get; } = { ".//data/definitions", ".//definitions" }; - public (string Key, string xPath, Func KeyFunc, bool Searchable)[]? XPathCollection { get; } = { - ("person-definitions", "/opus/data/definitions/personDefs/personDef", GetKey, false), - ("person-definitions", "/opus/definitions/personDefs/personDef", GetKey, false), - ("hand-definitions", "/opus/data/definitions/handDefs/handDef", GetKey, false), - ("hand-definitions", "/opus/definitions/handDefs/handDef", GetKey, false), - ("location-definitions", "/opus/data/definitions/locationDefs/locationDef", GetKey, false), - ("location-definitions", "/opus/definitions/locationDefs/locationDef", GetKey, false) + public ( + string Key, + string xPath, + Func GenerateKey, + Func?>? GenerateDataFields, + Func, Dictionary>?>? GroupingsGeneration, + Func, Dictionary>?>? SortingsGeneration, + bool Searchable + )[]? Collections { get; } = { + ("person-definitions", "/opus/data/definitions/personDefs/personDef", GetKey, null, null, null, false), + ("person-definitions", "/opus/definitions/personDefs/personDef", GetKey, null, null, null, false), + ("hand-definitions", "/opus/data/definitions/handDefs/handDef", GetKey, null, null, null, false), + ("hand-definitions", "/opus/definitions/handDefs/handDef", GetKey, null, null, null, false), + ("location-definitions", "/opus/data/definitions/locationDefs/locationDef", GetKey, null, null, null, false), + ("location-definitions", "/opus/definitions/locationDefs/locationDef", GetKey, null, null, null, false) }; public Predicate IsCollectedObject { get; } = (elem) => { diff --git a/HaWeb/Settings/XMLRoots/TraditionsRoot.cs b/HaWeb/Settings/XMLRoots/TraditionsRoot.cs index 1ad2ab5..8f34bee 100644 --- a/HaWeb/Settings/XMLRoots/TraditionsRoot.cs +++ b/HaWeb/Settings/XMLRoots/TraditionsRoot.cs @@ -7,9 +7,17 @@ public class TraditionsRoot : HaWeb.XMLParser.IXMLRoot { public string Type { get; } = "Überlieferung"; public string Prefix { get; } = "ueberlieferung"; public string[] XPathContainer { get; } = { ".//data/traditions", ".//traditions" }; - public (string Key, string xPath, Func KeyFunc, bool Searchable)[]? XPathCollection { get; } = { - ("tradition", "/opus/data/traditions/letterTradition", GetKey, true), - ("tradition", "/opus/traditions/letterTradition", GetKey, true) + public ( + string Key, + string xPath, + Func GenerateKey, + Func?>? GenerateDataFields, + Func, Dictionary>?>? GroupingsGeneration, + Func, Dictionary>?>? SortingsGeneration, + bool Searchable + )[]? Collections { get; } = { + ("tradition", "/opus/data/traditions/letterTradition", GetKey, null, null, null, true), + ("tradition", "/opus/traditions/letterTradition", GetKey, null, null, null, true) }; public Predicate IsCollectedObject { get; } = (elem) => { diff --git a/HaWeb/XMLParser/IXMLRoot.cs b/HaWeb/XMLParser/IXMLRoot.cs index 7f17f56..015d2a8 100644 --- a/HaWeb/XMLParser/IXMLRoot.cs +++ b/HaWeb/XMLParser/IXMLRoot.cs @@ -16,16 +16,20 @@ public interface IXMLRoot { // Collections of Elements to be created from this Root // Key: the key under which the element(s) will be files // xPath: the (absolute) XPath to the element(s) - // KeyFunc: How to extrect an identifier for the single element in the collection - // LookupsFunc: Function to generate metadata fields for the object, which will then in turn be a possibility to seach and filter without parsing // Searchable: Will the element be indexed for full-text-search? + // GenerateKey: How to extrect an identifier for the single element in the collection + // GenerateDataFields: Generate a dict of data associated with each of the collected Elements input: XElement output: Dictonary + // GroupingsGeneration: datafields by which dictorary-like groups should be held in memory input: List output: Dictonary> + // SortingsGeneration: datafields by which a sorting should be held in memory input: List output: ordered List public abstract ( string Key, string xPath, - Func KeyFunc, - // Func> LookupsFunc, + Func GenerateKey, + Func?>? GenerateDataFields, + Func, Dictionary>?>? GroupingsGeneration, + Func, Dictionary>?>? SortingsGeneration, bool Searchable - )[]? XPathCollection { get; } + )[]? Collections { get; } // Determines child objects to be collected // (deprecated see collections above; only used internally) diff --git a/HaWeb/XMLParser/XMLService.cs b/HaWeb/XMLParser/XMLService.cs index 74435cf..438ae44 100644 --- a/HaWeb/XMLParser/XMLService.cs +++ b/HaWeb/XMLParser/XMLService.cs @@ -15,8 +15,8 @@ public class XMLService : IXMLService { private Stack>? _InProduction; - private Dictionary> _collectedProduction; - private Dictionary> _collectedUsed; + private Dictionary _collectedProduction; + private Dictionary _collectedUsed; public XMLService() { // Getting all classes which implement IXMLRoot for possible document endpoints @@ -65,37 +65,47 @@ public class XMLService : IXMLService { int concurrencyLevel = numProcs * 2; int startingSize = 2909; int startingSizeAllCollections = 23; - var ret = new ConcurrentDictionary>(concurrencyLevel, startingSizeAllCollections); - // Note Parallelization brings almost nothing to the table (on a laptop) here and below. - // Parallel.ForEach(_Roots, (root) => { + var ret = new ConcurrentDictionary(concurrencyLevel, startingSizeAllCollections); foreach (var root in _Roots) { - if (root.Value.XPathCollection != null) - foreach (var coll in root.Value.XPathCollection) { + if (root.Value.Collections != null) + foreach (var coll in root.Value.Collections) { var elem = document.XPathSelectElements(coll.xPath); if (elem != null && elem.Any()) { - if (!ret.ContainsKey(coll.Key)) - ret[coll.Key] = new ConcurrentDictionary(concurrencyLevel, startingSize); + var items = new ConcurrentDictionary(concurrencyLevel, startingSize); Parallel.ForEach(elem, (e) => { - // foreach(var e in elem) { - var k = coll.KeyFunc(e); + var k = coll.GenerateKey(e); if (k != null) { var searchtext = coll.Searchable ? StringHelpers.NormalizeWhiteSpace(e.ToString(), ' ', false) : null; - ret[coll.Key][k] = new CollectedItem(k, e, root.Value, coll.Key, searchtext); + var datafileds = coll.GenerateDataFields != null ? + coll.GenerateDataFields(e) : + null; + items[k] = new CollectedItem(k, e, root.Value, coll.Key, datafileds, searchtext); } - // } }); + if (items.Any()) { + if (!ret.ContainsKey(coll.Key)) + ret[coll.Key] = new ItemsCollection(coll.Key, coll.Searchable, root.Value, coll.GroupingsGeneration, coll.SortingsGeneration); + foreach (var item in items) + ret[coll.Key].Items.Add(item.Key, item.Value); + } } } } - // }); - _collectedProduction = ret.ToDictionary(x => x.Key, y => y.Value.ToDictionary(z => z.Key, f => f.Value, null), null); + + if (ret.Any()) { + Parallel.ForEach(ret, (collection) => { + collection.Value.GenerateGroupings(); + collection.Value.GenerateSortings(); + }); + } + _collectedProduction = ret.ToDictionary(x => x.Key, y => y.Value); } public List<(string Index, List<(string Page, string Line, string Preview)> Results)>? SearchCollection(string collection, string searchword, IReaderService reader) { if (!_collectedProduction.ContainsKey(collection)) return null; - var searchableObjects = _collectedProduction[collection]; + var searchableObjects = _collectedProduction[collection].Items; var res = new ConcurrentBag<(string Index, List<(string Page, string Line, string preview)> Results)>(); var sw = StringHelpers.NormalizeWhiteSpace(searchword.Trim()); Parallel.ForEach(searchableObjects, (obj) => {