mirror of
https://github.com/Theodor-Springmann-Stiftung/hamann-ausgabe-core.git
synced 2025-10-28 16:55:32 +00:00
63 lines
2.2 KiB
C#
63 lines
2.2 KiB
C#
using System.Text;
|
|
using System.Text.RegularExpressions;
|
|
|
|
public static class CharacterEntityReferences {
|
|
public static void Replace(IEnumerable<string> files, IEnumerable<string?> codepoints) {
|
|
foreach (var f in files) {
|
|
Console.WriteLine("Replacing file " + f);
|
|
var t = File.ReadAllText(f);
|
|
foreach (var s in codepoints) {
|
|
if (s != null) {
|
|
t = t.Replace(s, ConvertStringToCodepoint(s));
|
|
}
|
|
}
|
|
File.WriteAllText(f, t);
|
|
}
|
|
}
|
|
|
|
static string ConvertStringToCodepoint(string input) {
|
|
var sb = new StringBuilder();
|
|
for (int i = 0; i < input.Length; i += char.IsSurrogatePair(input, i) ? 2 : 1) {
|
|
int codepoint = char.ConvertToUtf32(input, i);
|
|
sb.Append("&#x").Append($"{codepoint:X}").Append(';');
|
|
}
|
|
return sb.ToString();
|
|
}
|
|
|
|
public static HashSet<string?> GetCodePoints(string[] files) {
|
|
HashSet<string?> res = new();
|
|
Regex cphex = new Regex(@"&#x([0-9a-fA-F]{1,4});");
|
|
Regex cpint = new Regex(@"&#([0-9]{1,4});");
|
|
HashSet<string> XMLForbidden = new HashSet<string>(){
|
|
"<",
|
|
"&",
|
|
">",
|
|
"'",
|
|
"\""
|
|
};
|
|
|
|
foreach (var f in files) {
|
|
var t = File.ReadAllText(f);
|
|
|
|
var ms = cphex.Matches(t);
|
|
foreach (var mat in ms) {
|
|
int value = Convert.ToInt32(String.Join(string.Empty, mat.ToString().Skip(3).SkipLast(1)), 16);
|
|
var c = char.ConvertFromUtf32(value);
|
|
if (!res.Contains(c) && !XMLForbidden.Contains(c)) {
|
|
res.Add(c);
|
|
}
|
|
}
|
|
|
|
ms = cpint.Matches(t);
|
|
foreach (var mat in ms) {
|
|
int value = Convert.ToInt32(String.Join(string.Empty, mat.ToString().Skip(2).SkipLast(1)));
|
|
var c = char.ConvertFromUtf32(value);
|
|
Console.WriteLine(mat.ToString() + " " + c);
|
|
if (!res.Contains(c) && !XMLForbidden.Contains(c)) {
|
|
res.Add(c);
|
|
}
|
|
}
|
|
}
|
|
return res;
|
|
}
|
|
} |