See last commit.

This commit is contained in:
schnulller
2022-06-25 23:01:52 +02:00
parent 65e18f136d
commit 056ab77888
14 changed files with 186 additions and 56 deletions

View File

@@ -16,16 +16,20 @@ public interface IXMLRoot {
// Collections of Elements to be created from this Root
// Key: the key under which the element(s) will be files
// xPath: the (absolute) XPath to the element(s)
// KeyFunc: How to extrect an identifier for the single element in the collection
// LookupsFunc: Function to generate metadata fields for the object, which will then in turn be a possibility to seach and filter without parsing
// Searchable: Will the element be indexed for full-text-search?
// GenerateKey: How to extrect an identifier for the single element in the collection
// GenerateDataFields: Generate a dict of data associated with each of the collected Elements input: XElement output: Dictonary<string>
// GroupingsGeneration: datafields by which dictorary-like groups should be held in memory input: List<CollectedItem> output: Dictonary<string, Lookup<string, CollectedItem[]>>
// SortingsGeneration: datafields by which a sorting should be held in memory input: List<CollectedItem> output: ordered List<CollectedItem>
public abstract (
string Key,
string xPath,
Func<XElement, string?> KeyFunc,
// Func<XElement, Dictionary<string, string[]>> LookupsFunc,
Func<XElement, string?> GenerateKey,
Func<XElement, Dictionary<string, string[]>?>? GenerateDataFields,
Func<List<CollectedItem>, Dictionary<string, Lookup<string, CollectedItem>>?>? GroupingsGeneration,
Func<List<CollectedItem>, Dictionary<string, List<CollectedItem>>?>? SortingsGeneration,
bool Searchable
)[]? XPathCollection { get; }
)[]? Collections { get; }
// Determines child objects to be collected
// (deprecated see collections above; only used internally)

View File

@@ -15,8 +15,8 @@ public class XMLService : IXMLService {
private Stack<Dictionary<string, FileList?>>? _InProduction;
private Dictionary<string, Dictionary<string, CollectedItem>> _collectedProduction;
private Dictionary<string, Dictionary<string, CollectedItem>> _collectedUsed;
private Dictionary<string, ItemsCollection> _collectedProduction;
private Dictionary<string, ItemsCollection> _collectedUsed;
public XMLService() {
// Getting all classes which implement IXMLRoot for possible document endpoints
@@ -65,37 +65,47 @@ public class XMLService : IXMLService {
int concurrencyLevel = numProcs * 2;
int startingSize = 2909;
int startingSizeAllCollections = 23;
var ret = new ConcurrentDictionary<string, ConcurrentDictionary<string, CollectedItem>>(concurrencyLevel, startingSizeAllCollections);
// Note Parallelization brings almost nothing to the table (on a laptop) here and below.
// Parallel.ForEach(_Roots, (root) => {
var ret = new ConcurrentDictionary<string, ItemsCollection>(concurrencyLevel, startingSizeAllCollections);
foreach (var root in _Roots) {
if (root.Value.XPathCollection != null)
foreach (var coll in root.Value.XPathCollection) {
if (root.Value.Collections != null)
foreach (var coll in root.Value.Collections) {
var elem = document.XPathSelectElements(coll.xPath);
if (elem != null && elem.Any()) {
if (!ret.ContainsKey(coll.Key))
ret[coll.Key] = new ConcurrentDictionary<string, CollectedItem>(concurrencyLevel, startingSize);
var items = new ConcurrentDictionary<string, CollectedItem>(concurrencyLevel, startingSize);
Parallel.ForEach(elem, (e) => {
// foreach(var e in elem) {
var k = coll.KeyFunc(e);
var k = coll.GenerateKey(e);
if (k != null) {
var searchtext = coll.Searchable ?
StringHelpers.NormalizeWhiteSpace(e.ToString(), ' ', false) :
null;
ret[coll.Key][k] = new CollectedItem(k, e, root.Value, coll.Key, searchtext);
var datafileds = coll.GenerateDataFields != null ?
coll.GenerateDataFields(e) :
null;
items[k] = new CollectedItem(k, e, root.Value, coll.Key, datafileds, searchtext);
}
// }
});
if (items.Any()) {
if (!ret.ContainsKey(coll.Key))
ret[coll.Key] = new ItemsCollection(coll.Key, coll.Searchable, root.Value, coll.GroupingsGeneration, coll.SortingsGeneration);
foreach (var item in items)
ret[coll.Key].Items.Add(item.Key, item.Value);
}
}
}
}
// });
_collectedProduction = ret.ToDictionary(x => x.Key, y => y.Value.ToDictionary(z => z.Key, f => f.Value, null), null);
if (ret.Any()) {
Parallel.ForEach(ret, (collection) => {
collection.Value.GenerateGroupings();
collection.Value.GenerateSortings();
});
}
_collectedProduction = ret.ToDictionary(x => x.Key, y => y.Value);
}
public List<(string Index, List<(string Page, string Line, string Preview)> Results)>? SearchCollection(string collection, string searchword, IReaderService reader) {
if (!_collectedProduction.ContainsKey(collection)) return null;
var searchableObjects = _collectedProduction[collection];
var searchableObjects = _collectedProduction[collection].Items;
var res = new ConcurrentBag<(string Index, List<(string Page, string Line, string preview)> Results)>();
var sw = StringHelpers.NormalizeWhiteSpace(searchword.Trim());
Parallel.ForEach(searchableObjects, (obj) => {