diff --git a/helpers/datatypes/string.go b/helpers/datatypes/string.go index c0b10fb..52da60b 100644 --- a/helpers/datatypes/string.go +++ b/helpers/datatypes/string.go @@ -3,9 +3,11 @@ package datatypes import ( "regexp" "strings" + "unicode" ) var html_regexp = regexp.MustCompile(`<[^>]+>`) +var ws_regexp = regexp.MustCompile(`\s+`) func DeleteTags(s string) string { return html_regexp.ReplaceAllString(s, "") @@ -28,3 +30,16 @@ func SliceJoin[T any](slice []T, join string, f func(T) string) string { } return strings.Join(result, join) } + +func RemovePunctuation(s string) string { + return strings.Map(func(r rune) rune { + if unicode.IsPunct(r) { + return -1 + } + return r + }, s) +} + +func NormalizeWhitespace(s string) string { + return strings.TrimSpace(ws_regexp.ReplaceAllString(s, " ")) +}