See last commit.

This commit is contained in:
schnulller
2022-06-25 23:01:52 +02:00
parent 65e18f136d
commit 056ab77888
14 changed files with 186 additions and 56 deletions

View File

@@ -7,14 +7,23 @@ public class CollectedItem : ISearchable {
public string Index { get; private set; }
public string Collection { get; private set; }
public string? SearchText { get; private set; }
public Dictionary<string, string[]>? Fields { get; private set; }
public XElement ELement { get; private set; }
public IXMLRoot Root { get; private set; }
public CollectedItem(string index, XElement element, IXMLRoot root, string collection, string? searchtext = null) {
public CollectedItem(
string index,
XElement element,
IXMLRoot root,
string collection,
Dictionary<string, string[]>? fields,
string? searchtext = null
) {
this.Index = index;
this.SearchText = searchtext;
this.Collection = collection;
this.Root = root;
this.ELement = element;
this.Fields = fields;
}
}

View File

@@ -0,0 +1,47 @@
namespace HaWeb.Models;
using HaWeb.XMLParser;
public class ItemsCollection {
public string Name { get; private set; }
public Dictionary<string, CollectedItem> Items { get; private set; }
public bool Searchable { get; private set; }
public IXMLRoot Root { get; private set; }
public Func<List<CollectedItem>, Dictionary<string, Lookup<string, CollectedItem>>?>? GroupingsGeneration { get; private set; }
public Func<List<CollectedItem>, Dictionary<string, List<CollectedItem>>?>? SortingsGeneration { get; private set; }
public Dictionary<string, Lookup<string, CollectedItem>>? Groupings { get; private set; }
public Dictionary<string, List<CollectedItem>>? Sortings { get; private set; }
public ItemsCollection(
string name,
bool searchable,
IXMLRoot root,
Func<List<CollectedItem>, Dictionary<string, Lookup<string, CollectedItem>>?>? groupingsFunc = null,
Func<List<CollectedItem>, Dictionary<string, List<CollectedItem>>?>? sortingsFunc = null
) {
this.Name = name;
this.Searchable = searchable;
this.Root = root;
this.GroupingsGeneration = groupingsFunc;
this.SortingsGeneration = sortingsFunc;
this.Items = new Dictionary<string, CollectedItem>();
}
public void GenerateGroupings(
Func<List<CollectedItem>, Dictionary<string, Lookup<string, CollectedItem>>?>? groupingsFunc = null
) {
if (groupingsFunc != null)
this.GroupingsGeneration = groupingsFunc;
if (this.GroupingsGeneration != null && this.Items.Any())
this.Groupings = GroupingsGeneration(this.Items.Values.ToList());
}
public void GenerateSortings(
Func<List<CollectedItem>, Dictionary<string, List<CollectedItem>>?>? sortingsFunc = null
) {
if (sortingsFunc != null)
this.SortingsGeneration = sortingsFunc;
if (this.SortingsGeneration != null && this.Items.Any())
this.Sortings = SortingsGeneration(this.Items.Values.ToList());
}
}

View File

@@ -2,5 +2,5 @@ namespace HaWeb.SearchHelpers;
public interface ISearchable {
public string Index { get; }
public string SearchText { get; }
public string? SearchText { get; }
}

View File

@@ -16,7 +16,7 @@ public class SearchRules {
sb.Append(t);
var sw = reader.State.SearchWord;
if (sb.Length >= sw.Length) {
if (sb.ToString().ToLower().Contains(sw)) {
if (sb.ToString().ToUpperInvariant().Contains(sw)) {
if (reader.State.Results == null)
reader.State.Results = new List<(string Page, string Line)>();
reader.State.Results.Add((reader.CurrentPage, reader.CurrentLine));

View File

@@ -29,7 +29,7 @@ public static class StringHelpers {
}
} else {
skipped = false;
if (toLower) output.Append(char.ToLower(c));
if (toLower) output.Append(char.ToUpperInvariant(c));
else output.Append(c);
}
}

View File

@@ -7,15 +7,27 @@ public class CommentRoot : HaWeb.XMLParser.IXMLRoot {
public string Type { get; } = "Register";
public string Prefix { get; } = "register";
public string[] XPathContainer { get; } = { ".//data//kommentare/kommcat", ".//kommentare/kommcat" };
public (string Key, string xPath, Func<XElement, string?> KeyFunc, bool Searchable)[]? XPathCollection { get; } = {
("comments-register", "/opus/data/kommentare/kommcat[@value='neuzeit']/kommentar", GetKey, true),
("comments-register", "/opus/kommentare/kommcat[@value='neuzeit']/kommentar", GetKey, true),
("comments-edition", "/opus/data/kommentare/kommcat[@value='editionen']/kommentar", GetKey, true),
("comments-edition", "/opus/kommentare/kommcat[@value='editionen']/kommentar", GetKey, true),
("comments-forschung", "/opus/data/kommentare/kommcat[@value='forschung']/kommentar", GetKey, true),
("comments-forschung", "/opus/kommentare/kommcat[@value='forschung']/kommentar", GetKey, true),
("comments-bibel", "/opus/data/kommentare/kommcat[@value='bibel']/kommentar", GetKey, false),
("comments-bibel", "/opus/kommentare/kommcat[@value='bibel']/kommentar", GetKey, false),
public (
string Key,
string xPath,
Func<XElement, string?> GenerateKey,
Func<XElement, Dictionary<string, string[]>?>? GenerateDataFields,
Func<List<CollectedItem>, Dictionary<string, Lookup<string, CollectedItem>>?>? GroupingsGeneration,
Func<List<CollectedItem>, Dictionary<string, List<CollectedItem>>?>? SortingsGeneration,
bool Searchable
)[]? Collections { get; } = {
("comments-register", "/opus/data/kommentare/kommcat[@value='neuzeit']/kommentar", GetKey, null, null, null, true),
("comments-register", "/opus/kommentare/kommcat[@value='neuzeit']/kommentar", GetKey, null, null, null, true),
("subcomments-register", "/opus/data/kommentare/kommcat[@value='neuzeit']/kommentar/subsection", GetKey, null, null, null, true),
("subcomments-register", "/opus/kommentare/kommcat[@value='neuzeit']/kommentar/subsection", GetKey, null, null, null, true),
("comments-edition", "/opus/data/kommentare/kommcat[@value='editionen']/kommentar", GetKey, null, null, null, true),
("comments-edition", "/opus/kommentare/kommcat[@value='editionen']/kommentar", GetKey, null, null, null, true),
("comments-forschung", "/opus/data/kommentare/kommcat[@value='forschung']/kommentar", GetKey, null, null, null, true),
("comments-forschung", "/opus/kommentare/kommcat[@value='forschung']/kommentar", GetKey, null, null, null, true),
("comments-bibel", "/opus/data/kommentare/kommcat[@value='bibel']/kommentar", GetKey, null, null, null, false),
("comments-bibel", "/opus/kommentare/kommcat[@value='bibel']/kommentar", GetKey, null, null, null, false),
("subcomments-bibel", "/opus/data/kommentare/kommcat[@value='bibel']/kommentar/subsection", GetKey, null, null, null, false),
("subcomments-bibel", "/opus/kommentare/kommcat[@value='bibel']/kommentar/subsection", GetKey, null, null, null, false),
};
public Predicate<XElement> IsCollectedObject { get; } = (elem) => {

View File

@@ -7,9 +7,17 @@ public class DescriptionsRoot : HaWeb.XMLParser.IXMLRoot {
public string Type { get; } = "Metadaten";
public string Prefix { get; } = "metadaten";
public string[] XPathContainer { get; } = { ".//data/descriptions", ".//descriptions" };
public (string Key, string xPath, Func<XElement, string?> KeyFunc, bool Searchable)[]? XPathCollection { get; } = {
("metas", "/opus/descriptions/letterDesc", GetKey, false),
("metas", "/opus/data/descriptions/letterDesc", GetKey, false)
public (
string Key,
string xPath,
Func<XElement, string?> GenerateKey,
Func<XElement, Dictionary<string, string[]>?>? GenerateDataFields,
Func<List<CollectedItem>, Dictionary<string, Lookup<string, CollectedItem>>?>? GroupingsGeneration,
Func<List<CollectedItem>, Dictionary<string, List<CollectedItem>>?>? SortingsGeneration,
bool Searchable
)[]? Collections { get; } = {
("metas", "/opus/descriptions/letterDesc", GetKey, null, null, null, false),
("metas", "/opus/data/descriptions/letterDesc", GetKey, null, null, null, false)
};
public Predicate<XElement> IsCollectedObject { get; } = (elem) => {

View File

@@ -8,9 +8,17 @@ public class DocumentRoot : HaWeb.XMLParser.IXMLRoot {
public string Type { get; } = "Brieftext";
public string Prefix { get; } = "brieftext";
public string[] XPathContainer { get; } = { ".//data/document", ".//document" };
public (string Key, string xPath, Func<XElement, string?> KeyFunc, bool Searchable)[]? XPathCollection { get; } = {
("letters", "/opus/data/document/letterText", GetKey, true),
("letters", "/opus/document/letterText", GetKey, true)
public (
string Key,
string xPath,
Func<XElement, string?> GenerateKey,
Func<XElement, Dictionary<string, string[]>?>? GenerateDataFields,
Func<List<CollectedItem>, Dictionary<string, Lookup<string, CollectedItem>>?>? GroupingsGeneration,
Func<List<CollectedItem>, Dictionary<string, List<CollectedItem>>?>? SortingsGeneration,
bool Searchable
)[]? Collections { get; } = {
("letters", "/opus/data/document/letterText", GetKey, null, null, null, true),
("letters", "/opus/document/letterText", GetKey, null, null, null, true)
};
public Predicate<XElement> IsCollectedObject { get; } = (elem) => {

View File

@@ -7,9 +7,17 @@ public class EditsRoot : HaWeb.XMLParser.IXMLRoot {
public string Type { get; } = "Texteingriffe";
public string Prefix { get; } = "texteingriffe";
public string[] XPathContainer { get; } = { ".//data/edits", ".//edits" };
public (string Key, string xPath, Func<XElement, string?> KeyFunc, bool Searchable)[]? XPathCollection { get; } = {
("edits", "/data/edits/editreason", GetKey, true),
("edits", "/edits/editreason", GetKey, true)
public (
string Key,
string xPath,
Func<XElement, string?> GenerateKey,
Func<XElement, Dictionary<string, string[]>?>? GenerateDataFields,
Func<List<CollectedItem>, Dictionary<string, Lookup<string, CollectedItem>>?>? GroupingsGeneration,
Func<List<CollectedItem>, Dictionary<string, List<CollectedItem>>?>? SortingsGeneration,
bool Searchable
)[]? Collections { get; } = {
("edits", "/data/edits/editreason", GetKey, null, null, null, true),
("edits", "/edits/editreason", GetKey, null, null, null, true)
};
public Predicate<XElement> IsCollectedObject { get; } = (elem) => {

View File

@@ -7,9 +7,17 @@ public class MarginalsRoot : HaWeb.XMLParser.IXMLRoot {
public string Type { get; } = "Stellenkommentar";
public string Prefix { get; } = "stellenkommentar";
public string[] XPathContainer { get; } = { ".//data/marginalien", ".//marginalien" };
public (string Key, string xPath, Func<XElement, string?> KeyFunc, bool Searchable)[]? XPathCollection { get; } = {
("marginals", "/data/marginalien/marginal", GetKey, true),
("marginals", "/marginalien/marginal", GetKey, true)
public (
string Key,
string xPath,
Func<XElement, string?> GenerateKey,
Func<XElement, Dictionary<string, string[]>?>? GenerateDataFields,
Func<List<CollectedItem>, Dictionary<string, Lookup<string, CollectedItem>>?>? GroupingsGeneration,
Func<List<CollectedItem>, Dictionary<string, List<CollectedItem>>?>? SortingsGeneration,
bool Searchable
)[]? Collections { get; } = {
("marginals", "/data/marginalien/marginal", GetKey, null, null, null, true),
("marginals", "/marginalien/marginal", GetKey, null, null, null, true)
};
public Predicate<XElement> IsCollectedObject { get; } = (elem) => {

View File

@@ -7,13 +7,21 @@ public class ReferencesRoot : HaWeb.XMLParser.IXMLRoot {
public string Type { get; } = "Personen / Orte";
public string Prefix { get; } = "personenorte";
public string[] XPathContainer { get; } = { ".//data/definitions", ".//definitions" };
public (string Key, string xPath, Func<XElement, string?> KeyFunc, bool Searchable)[]? XPathCollection { get; } = {
("person-definitions", "/opus/data/definitions/personDefs/personDef", GetKey, false),
("person-definitions", "/opus/definitions/personDefs/personDef", GetKey, false),
("hand-definitions", "/opus/data/definitions/handDefs/handDef", GetKey, false),
("hand-definitions", "/opus/definitions/handDefs/handDef", GetKey, false),
("location-definitions", "/opus/data/definitions/locationDefs/locationDef", GetKey, false),
("location-definitions", "/opus/definitions/locationDefs/locationDef", GetKey, false)
public (
string Key,
string xPath,
Func<XElement, string?> GenerateKey,
Func<XElement, Dictionary<string, string[]>?>? GenerateDataFields,
Func<List<CollectedItem>, Dictionary<string, Lookup<string, CollectedItem>>?>? GroupingsGeneration,
Func<List<CollectedItem>, Dictionary<string, List<CollectedItem>>?>? SortingsGeneration,
bool Searchable
)[]? Collections { get; } = {
("person-definitions", "/opus/data/definitions/personDefs/personDef", GetKey, null, null, null, false),
("person-definitions", "/opus/definitions/personDefs/personDef", GetKey, null, null, null, false),
("hand-definitions", "/opus/data/definitions/handDefs/handDef", GetKey, null, null, null, false),
("hand-definitions", "/opus/definitions/handDefs/handDef", GetKey, null, null, null, false),
("location-definitions", "/opus/data/definitions/locationDefs/locationDef", GetKey, null, null, null, false),
("location-definitions", "/opus/definitions/locationDefs/locationDef", GetKey, null, null, null, false)
};
public Predicate<XElement> IsCollectedObject { get; } = (elem) => {

View File

@@ -7,9 +7,17 @@ public class TraditionsRoot : HaWeb.XMLParser.IXMLRoot {
public string Type { get; } = "Überlieferung";
public string Prefix { get; } = "ueberlieferung";
public string[] XPathContainer { get; } = { ".//data/traditions", ".//traditions" };
public (string Key, string xPath, Func<XElement, string?> KeyFunc, bool Searchable)[]? XPathCollection { get; } = {
("tradition", "/opus/data/traditions/letterTradition", GetKey, true),
("tradition", "/opus/traditions/letterTradition", GetKey, true)
public (
string Key,
string xPath,
Func<XElement, string?> GenerateKey,
Func<XElement, Dictionary<string, string[]>?>? GenerateDataFields,
Func<List<CollectedItem>, Dictionary<string, Lookup<string, CollectedItem>>?>? GroupingsGeneration,
Func<List<CollectedItem>, Dictionary<string, List<CollectedItem>>?>? SortingsGeneration,
bool Searchable
)[]? Collections { get; } = {
("tradition", "/opus/data/traditions/letterTradition", GetKey, null, null, null, true),
("tradition", "/opus/traditions/letterTradition", GetKey, null, null, null, true)
};
public Predicate<XElement> IsCollectedObject { get; } = (elem) => {

View File

@@ -16,16 +16,20 @@ public interface IXMLRoot {
// Collections of Elements to be created from this Root
// Key: the key under which the element(s) will be files
// xPath: the (absolute) XPath to the element(s)
// KeyFunc: How to extrect an identifier for the single element in the collection
// LookupsFunc: Function to generate metadata fields for the object, which will then in turn be a possibility to seach and filter without parsing
// Searchable: Will the element be indexed for full-text-search?
// GenerateKey: How to extrect an identifier for the single element in the collection
// GenerateDataFields: Generate a dict of data associated with each of the collected Elements input: XElement output: Dictonary<string>
// GroupingsGeneration: datafields by which dictorary-like groups should be held in memory input: List<CollectedItem> output: Dictonary<string, Lookup<string, CollectedItem[]>>
// SortingsGeneration: datafields by which a sorting should be held in memory input: List<CollectedItem> output: ordered List<CollectedItem>
public abstract (
string Key,
string xPath,
Func<XElement, string?> KeyFunc,
// Func<XElement, Dictionary<string, string[]>> LookupsFunc,
Func<XElement, string?> GenerateKey,
Func<XElement, Dictionary<string, string[]>?>? GenerateDataFields,
Func<List<CollectedItem>, Dictionary<string, Lookup<string, CollectedItem>>?>? GroupingsGeneration,
Func<List<CollectedItem>, Dictionary<string, List<CollectedItem>>?>? SortingsGeneration,
bool Searchable
)[]? XPathCollection { get; }
)[]? Collections { get; }
// Determines child objects to be collected
// (deprecated see collections above; only used internally)

View File

@@ -15,8 +15,8 @@ public class XMLService : IXMLService {
private Stack<Dictionary<string, FileList?>>? _InProduction;
private Dictionary<string, Dictionary<string, CollectedItem>> _collectedProduction;
private Dictionary<string, Dictionary<string, CollectedItem>> _collectedUsed;
private Dictionary<string, ItemsCollection> _collectedProduction;
private Dictionary<string, ItemsCollection> _collectedUsed;
public XMLService() {
// Getting all classes which implement IXMLRoot for possible document endpoints
@@ -65,37 +65,47 @@ public class XMLService : IXMLService {
int concurrencyLevel = numProcs * 2;
int startingSize = 2909;
int startingSizeAllCollections = 23;
var ret = new ConcurrentDictionary<string, ConcurrentDictionary<string, CollectedItem>>(concurrencyLevel, startingSizeAllCollections);
// Note Parallelization brings almost nothing to the table (on a laptop) here and below.
// Parallel.ForEach(_Roots, (root) => {
var ret = new ConcurrentDictionary<string, ItemsCollection>(concurrencyLevel, startingSizeAllCollections);
foreach (var root in _Roots) {
if (root.Value.XPathCollection != null)
foreach (var coll in root.Value.XPathCollection) {
if (root.Value.Collections != null)
foreach (var coll in root.Value.Collections) {
var elem = document.XPathSelectElements(coll.xPath);
if (elem != null && elem.Any()) {
if (!ret.ContainsKey(coll.Key))
ret[coll.Key] = new ConcurrentDictionary<string, CollectedItem>(concurrencyLevel, startingSize);
var items = new ConcurrentDictionary<string, CollectedItem>(concurrencyLevel, startingSize);
Parallel.ForEach(elem, (e) => {
// foreach(var e in elem) {
var k = coll.KeyFunc(e);
var k = coll.GenerateKey(e);
if (k != null) {
var searchtext = coll.Searchable ?
StringHelpers.NormalizeWhiteSpace(e.ToString(), ' ', false) :
null;
ret[coll.Key][k] = new CollectedItem(k, e, root.Value, coll.Key, searchtext);
var datafileds = coll.GenerateDataFields != null ?
coll.GenerateDataFields(e) :
null;
items[k] = new CollectedItem(k, e, root.Value, coll.Key, datafileds, searchtext);
}
// }
});
if (items.Any()) {
if (!ret.ContainsKey(coll.Key))
ret[coll.Key] = new ItemsCollection(coll.Key, coll.Searchable, root.Value, coll.GroupingsGeneration, coll.SortingsGeneration);
foreach (var item in items)
ret[coll.Key].Items.Add(item.Key, item.Value);
}
}
}
}
if (ret.Any()) {
Parallel.ForEach(ret, (collection) => {
collection.Value.GenerateGroupings();
collection.Value.GenerateSortings();
});
}
}
}
// });
_collectedProduction = ret.ToDictionary(x => x.Key, y => y.Value.ToDictionary(z => z.Key, f => f.Value, null), null);
_collectedProduction = ret.ToDictionary(x => x.Key, y => y.Value);
}
public List<(string Index, List<(string Page, string Line, string Preview)> Results)>? SearchCollection(string collection, string searchword, IReaderService reader) {
if (!_collectedProduction.ContainsKey(collection)) return null;
var searchableObjects = _collectedProduction[collection];
var searchableObjects = _collectedProduction[collection].Items;
var res = new ConcurrentBag<(string Index, List<(string Page, string Line, string preview)> Results)>();
var sw = StringHelpers.NormalizeWhiteSpace(searchword.Trim());
Parallel.ForEach(searchableObjects, (obj) => {