Implemented Fulltext search across tags and linebreaks for letters

This commit is contained in:
schnulller
2022-06-19 00:01:14 +02:00
parent 078c4b75b8
commit 4ef0e260e0
24 changed files with 510 additions and 148 deletions

View File

@@ -0,0 +1,6 @@
namespace HaWeb.SearchHelpers;
public interface ISearchable {
public string Index { get; }
public string SearchText { get; }
}

View File

@@ -0,0 +1,46 @@
namespace HaWeb.SearchHelpers;
using System.Text;
using System.Web;
using TagFuncList = List<(Func<HaXMLReader.EvArgs.Tag, HaWeb.HTMLParser.LineXMLHelper<SearchState>, bool>, Action<System.Text.StringBuilder, HaXMLReader.EvArgs.Tag, HaWeb.HTMLParser.LineXMLHelper<SearchState>>)>;
using TextFuncList = List<(Func<HaXMLReader.EvArgs.Text, HaWeb.HTMLParser.LineXMLHelper<SearchState>, bool>, Action<System.Text.StringBuilder, HaXMLReader.EvArgs.Text, HaWeb.HTMLParser.LineXMLHelper<SearchState>>)>;
using WhitespaceFuncList = List<(Func<HaXMLReader.EvArgs.Whitespace, HaWeb.HTMLParser.LineXMLHelper<SearchState>, bool>, Action<System.Text.StringBuilder, HaXMLReader.EvArgs.Whitespace, HaWeb.HTMLParser.LineXMLHelper<SearchState>>)>;
public class SearchRules {
public static readonly TextFuncList TextRules = new TextFuncList() {
( (x, _) => true, (sb, text, reader) => {
var t = text.Value;
if (reader.State.Normalize)
t = HaWeb.SearchHelpers.StringHelpers.NormalizeWhiteSpace(t);
sb.Append(t);
var sw = reader.State.SearchWord;
if (sb.Length >= sw.Length) {
if (sb.ToString().ToLower().Contains(sw)) {
if (reader.State.Results == null)
reader.State.Results = new List<(string Page, string Line)>();
reader.State.Results.Add((reader.CurrentPage, reader.CurrentLine));
}
sb.Remove(0, sb.Length - sw.Length);
}
})
};
public static readonly WhitespaceFuncList WhitespaceRules= new WhitespaceFuncList() {
( (x, _) => true, (sb, text, reader) => {
var t = text.Value;
if (reader.State.Normalize)
t = HaWeb.SearchHelpers.StringHelpers.NormalizeWhiteSpace(t);
sb.Append(t);
var sw = reader.State.SearchWord;
if (sb.Length >= sw.Length) {
if (sb.ToString().Contains(sw)) {
if (reader.State.Results == null)
reader.State.Results = new List<(string Page, string Line)>();
reader.State.Results.Add((reader.CurrentPage, reader.CurrentLine));
}
sb.Remove(0, sb.Length - sw.Length);
}
})
};
}

View File

@@ -0,0 +1,15 @@
namespace HaWeb.SearchHelpers;
using System.Text;
public class SearchState : HaWeb.HTMLParser.IState {
internal string SearchWord;
internal bool Normalize;
internal List<(string Page, string Line)>? Results;
public SearchState(string searchword, bool normalize = false) {
Normalize = normalize;
SearchWord = searchword;
}
public void SetupState() {}
}

View File

@@ -0,0 +1,11 @@
namespace HaWeb.SearchHelpers;
public class SeachableItem : ISearchable {
public string Index { get; private set; }
public string SearchText { get; private set; }
public SeachableItem(string index, string searchtext) {
this.Index = index;
this.SearchText = searchtext;
}
}

View File

@@ -0,0 +1,39 @@
namespace HaWeb.SearchHelpers;
using System.Text;
public static class StringHelpers {
public static string NormalizeWhiteSpace(string input, char normalizeTo = ' ', bool toLower = true) {
if (string.IsNullOrEmpty(input)) {
return string.Empty;
}
StringBuilder output = new StringBuilder();
// TODO: what about punctuation (char.IsPunctuation()) ? what about spaces?
// Remove all whitespace, search becomes whitespace insensitive
// foreach (var c in input)
// if (!char.IsWhiteSpace(c)) {
// if (toLower) output.Append(char.ToLower(c));
// else output.Append(c);
// }
// Collapse all whitespace into a single whitespace:
bool skipped = false;
foreach (char c in input) {
if (char.IsWhiteSpace(c)) {
if (!skipped) {
output.Append(normalizeTo);
skipped = true;
}
} else {
skipped = false;
if (toLower) output.Append(char.ToLower(c));
else output.Append(c);
}
}
return output.ToString();
}
}