namespace HaWeb.XMLParser; using System.Xml.Linq; using System.Xml.XPath; using Microsoft.AspNetCore.Mvc.ModelBinding; using HaWeb.Models; using HaWeb.SearchHelpers; using System.Collections.Concurrent; using System.Threading.Tasks; using System.Text; using HaXMLReader.Interfaces; public class XMLService : IXMLService { private Dictionary? _Used; private Dictionary? _Roots; private Dictionary? _Collections; private Stack>? _InProduction; private Dictionary _collectedProduction; private Dictionary _collectedUsed; public XMLService() { // Getting all classes which implement IXMLRoot for possible document endpoints var roottypes = _GetAllTypesThatImplementInterface().ToList(); roottypes.ForEach( x => { if (this._Roots == null) this._Roots = new Dictionary(); var instance = (IXMLRoot)Activator.CreateInstance(x)!; if (instance != null) this._Roots.Add(instance.Prefix, instance); }); var collectiontypes = _GetAllTypesThatImplementInterface().ToList(); collectiontypes.ForEach( x => { if (this._Collections == null) this._Collections = new Dictionary(); var instance = (IXMLCollection)Activator.CreateInstance(x)!; if (instance != null && instance.IsGlobal()) this._Collections.Add(instance.Key, instance); }); if (_Roots == null || !_Roots.Any()) throw new Exception("No classes for upload endpoints were found!"); if (_Collections == null || !_Collections.Any()) throw new Exception("No classes for object collection were found!"); } public IXMLRoot? GetRoot(string name) { if (_Roots == null) return null; _Roots.TryGetValue(name, out var root); return root; } public List? GetRootsList() => this._Roots == null ? null : this._Roots.Values.ToList(); public Dictionary? GetRootsDictionary() => this._Roots == null ? null : this._Roots; public Dictionary? GetInProduction() { if (_InProduction == null) return null; return this._InProduction.Peek(); } public void SetInProduction() { if (_Used == null) return; var inProduction = new Dictionary(); foreach (var category in _Used) { if (category.Value == null || category.Value.GetFileList() == null || !category.Value.GetFileList()!.Any()) return; inProduction.Add(category.Key, category.Value); } if(_InProduction == null) _InProduction = new Stack>(); _InProduction.Push(inProduction); } public void SetInProduction(XDocument document) { if (document == null || _Roots == null) return; int numProcs = Environment.ProcessorCount; int concurrencyLevel = numProcs * 2; int startingSize = 2909; int startingSizeAllCollections = 23; var ret = new ConcurrentDictionary(concurrencyLevel, startingSizeAllCollections); if (_Collections != null) foreach (var coll in _Collections) { var elem = coll.Value.xPath.Aggregate(new List(), (x, y) => { x.AddRange(document.XPathSelectElements(y).ToList()); return x; } ); if (elem != null && elem.Any()) { var items = new ConcurrentDictionary(concurrencyLevel, startingSize); Parallel.ForEach(elem, (e) => { var k = coll.Value.GenerateKey(e); if (k != null) { var searchtext = coll.Value.Searchable ? StringHelpers.NormalizeWhiteSpace(e.ToString(), ' ', false) : null; var datafileds = coll.Value.GenerateDataFields != null ? coll.Value.GenerateDataFields(e) : null; items[k] = new CollectedItem(k, e, coll.Value, datafileds, searchtext); } }); if (items.Any()) { if (!ret.ContainsKey(coll.Key)) ret[coll.Key] = new ItemsCollection(coll.Key, coll.Value); foreach (var item in items) ret[coll.Key].Items.Add(item.Key, item.Value); } } } if (ret.Any()) { Parallel.ForEach(ret, (collection) => { collection.Value.GenerateGroupings(); collection.Value.GenerateSortings(); }); } _collectedProduction = ret.ToDictionary(x => x.Key, y => y.Value); } public List<(string Index, List<(string Page, string Line, string Preview)> Results)>? SearchCollection(string collection, string searchword, IReaderService reader) { if (!_collectedProduction.ContainsKey(collection)) return null; var searchableObjects = _collectedProduction[collection].Items; var res = new ConcurrentBag<(string Index, List<(string Page, string Line, string preview)> Results)>(); var sw = StringHelpers.NormalizeWhiteSpace(searchword.Trim()); Parallel.ForEach(searchableObjects, (obj) => { if (obj.Value.SearchText != null) { var state = new SearchState(sw); var rd = reader.RequestStringReader(obj.Value.SearchText); var parser = new HaWeb.HTMLParser.LineXMLHelper(state, rd, new StringBuilder(), null, null, null, SearchRules.TextRules, SearchRules.WhitespaceRules); rd.Read(); if (state.Results != null) res.Add(( obj.Value.Index, state.Results.Select(x => ( x.Page, x.Line, parser.Lines != null ? parser.Lines .Where(y => y.Page == x.Page && y.Line == x.Line) .Select(x => x.Text) .FirstOrDefault(string.Empty) : "" )).ToList())); } }); return res.ToList(); } public void UnUseProduction() => this._InProduction = null; public List? ProbeFile(XDocument document, ModelStateDictionary ModelState) { if (document.Root!.Name != "opus") { ModelState.AddModelError("Error", "A valid Hamann-Docuemnt must begin with "); return null; } List? res = null; if (document.Root != null && _Roots != null) { foreach (var (_, root) in _Roots) { var elements = root.IsTypeOf(document.Root); if (elements != null && elements.Any()) foreach (var elem in elements) { if (res == null) res = new List(); res.Add(_createXMLRootDocument(root, elem)); } } } if (res == null) ModelState.AddModelError("Error", "Kein zum Hamann-Briefe-Projekt passendes XML gefunden."); return res; } public Dictionary? GetUsedDictionary() => this._Used; // Adds a document and sets it to used public void Use(XMLRootDocument doc) { if (_Used == null) _Used = new Dictionary(); if (!_Used.ContainsKey(doc.Prefix)) _Used.Add(doc.Prefix, new FileList(doc.XMLRoot)); _Used[doc.Prefix]!.Add(doc); } public void UnUse(string prefix) { if (_Used != null && _Used.ContainsKey(prefix)) _Used.Remove(prefix); return; } // Performs detection of using on the specified document type public void AutoUse(string prefix) { if (_Used == null || !_Used.ContainsKey(prefix)) return; AutoUse(_Used[prefix]!); } // Performs detection of using given a list of files public void AutoUse(FileList filelist) { FileList? res = null; var list = filelist.GetFileList(); var prefix = filelist.XMLRoot.Prefix; if (list == null) return; if (_Used != null && _Used.ContainsKey(prefix)) _Used.Remove(prefix); // TODO: Item1 var lookup = list.ToLookup(x => x.IdentificationString.Item2); foreach (var idstring in lookup) { var ordered = idstring.OrderBy(x => x.Date); if (res == null) res = new FileList(filelist.XMLRoot); Use(ordered.Last()); } } public XElement? MergeUsedDocuments(ModelStateDictionary ModelState) { if (_Used == null || _Roots == null) { ModelState.AddModelError("Error", "Keine Dokumente ausgewählt"); return null; } var opus = new XElement("opus"); // TODO: Workaround for bug in HaDocument: roots have to be added in a specific order var used = _Used.OrderByDescending(x => x.Key); foreach (var category in used) { if (category.Value == null || category.Value.GetFileList() == null || !category.Value.GetFileList()!.Any()) { ModelState.AddModelError("Error", _Roots![category.Key].Type + " nicht vorhanden."); return null; } var documents = category.Value.GetFileList(); foreach (var document in documents!) { document.XMLRoot.MergeIntoFile(opus, document); } } return opus; } private XMLRootDocument _createXMLRootDocument(IXMLRoot Root, XElement element) { var doc = new XMLRootDocument(Root, Root.Prefix, Root.GenerateIdentificationString(element), element); doc.Fields = Root.GenerateFields(doc); return doc; } private IEnumerable _GetAllTypesThatImplementInterface() { return System.Reflection.Assembly.GetExecutingAssembly() .GetTypes() .Where(type => typeof(T).IsAssignableFrom(type) && !type.IsInterface); } }