diff --git a/SabreTools.Core/Tools/TextHelper.cs b/SabreTools.Core/Tools/TextHelper.cs
new file mode 100644
index 00000000..986cc5d1
--- /dev/null
+++ b/SabreTools.Core/Tools/TextHelper.cs
@@ -0,0 +1,152 @@
+using System.Text.RegularExpressions;
+
+namespace SabreTools.Core.Tools
+{
+ public static class TextHelper
+ {
+ ///
+ /// Normalize a string to the WoD standard
+ ///
+ public static string? NormalizeCharacters(string? input)
+ {
+ if (string.IsNullOrWhiteSpace(input))
+ return input;
+
+ ///Run the name through the filters to make sure that it's correct
+ input = NormalizeChars(input);
+ input = RussianToLatin(input);
+ input = SearchPattern(input);
+
+ input = new Regex(@"(([[(].*[\)\]] )?([^([]+))").Match(input).Groups[1].Value;
+ input = input.TrimStart().TrimEnd();
+ return input;
+ }
+
+ #region Helpers
+
+ ///
+ /// Replace accented characters
+ ///
+ private static string NormalizeChars(string input)
+ {
+ string[,] charmap = {
+ { "Á", "A" }, { "á", "a" },
+ { "À", "A" }, { "à", "a" },
+ { "Â", "A" }, { "â", "a" },
+ { "Ä", "Ae" }, { "ä", "ae" },
+ { "Ã", "A" }, { "ã", "a" },
+ { "Å", "A" }, { "å", "a" },
+ { "Æ", "Ae" }, { "æ", "ae" },
+ { "Ç", "C" }, { "ç", "c" },
+ { "Ð", "D" }, { "ð", "d" },
+ { "É", "E" }, { "é", "e" },
+ { "È", "E" }, { "è", "e" },
+ { "Ê", "E" }, { "ê", "e" },
+ { "Ë", "E" }, { "ë", "e" },
+ { "ƒ", "f" },
+ { "Í", "I" }, { "í", "i" },
+ { "Ì", "I" }, { "ì", "i" },
+ { "Î", "I" }, { "î", "i" },
+ { "Ï", "I" }, { "ï", "i" },
+ { "Ñ", "N" }, { "ñ", "n" },
+ { "Ó", "O" }, { "ó", "o" },
+ { "Ò", "O" }, { "ò", "o" },
+ { "Ô", "O" }, { "ô", "o" },
+ { "Ö", "Oe" }, { "ö", "oe" },
+ { "Õ", "O" }, { "õ", "o" },
+ { "Ø", "O" }, { "ø", "o" },
+ { "Š", "S" }, { "š", "s" },
+ { "ß", "ss" },
+ { "Þ", "B" }, { "þ", "b" },
+ { "Ú", "U" }, { "ú", "u" },
+ { "Ù", "U" }, { "ù", "u" },
+ { "Û", "U" }, { "û", "u" },
+ { "Ü", "Ue" }, { "ü", "ue" },
+ { "ÿ", "y" },
+ { "Ý", "Y" }, { "ý", "y" },
+ { "Ž", "Z" }, { "ž", "z" },
+ };
+
+ for (int i = 0; i < charmap.GetLength(0); i++)
+ {
+ input = input.Replace(charmap[i, 0], charmap[i, 1]);
+ }
+
+ return input;
+ }
+
+ ///
+ /// Convert Cyrillic lettering to Latin lettering
+ ///
+ private static string RussianToLatin(string input)
+ {
+ string[,] charmap = {
+ { "А", "A" }, { "Б", "B" }, { "В", "V" }, { "Г", "G" }, { "Д", "D" },
+ { "Е", "E" }, { "Ё", "Yo" }, { "Ж", "Zh" }, { "З", "Z" }, { "И", "I" },
+ { "Й", "J" }, { "К", "K" }, { "Л", "L" }, { "М", "M" }, { "Н", "N" },
+ { "О", "O" }, { "П", "P" }, { "Р", "R" }, { "С", "S" }, { "Т", "T" },
+ { "У", "U" }, { "Ф", "f" }, { "Х", "Kh" }, { "Ц", "Ts" }, { "Ч", "Ch" },
+ { "Ш", "Sh" }, { "Щ", "Sch" }, { "Ъ", string.Empty }, { "Ы", "y" }, { "Ь", string.Empty },
+ { "Э", "e" }, { "Ю", "yu" }, { "Я", "ya" }, { "а", "a" }, { "б", "b" },
+ { "в", "v" }, { "г", "g" }, { "д", "d" }, { "е", "e" }, { "ё", "yo" },
+ { "ж", "zh" }, { "з", "z" }, { "и", "i" }, { "й", "j" }, { "к", "k" },
+ { "л", "l" }, { "м", "m" }, { "н", "n" }, { "о", "o" }, { "п", "p" },
+ { "р", "r" }, { "с", "s" }, { "т", "t" }, { "у", "u" }, { "ф", "f" },
+ { "х", "kh" }, { "ц", "ts" }, { "ч", "ch" }, { "ш", "sh" }, { "щ", "sch" },
+ { "ъ", string.Empty }, { "ы", "y" }, { "ь", string.Empty }, { "э", "e" }, { "ю", "yu" },
+ { "я", "ya" },
+ };
+
+ for (int i = 0; i < charmap.GetLength(0); i++)
+ {
+ input = input.Replace(charmap[i, 0], charmap[i, 1]);
+ }
+
+ return input;
+ }
+
+ ///
+ /// Replace special characters and patterns
+ ///
+ private static string SearchPattern(string input)
+ {
+ string[,] charmap = {
+ { @"~", " - " },
+ { @"_", " " },
+ { @":", " " },
+ { @">", ")" },
+ { @"<", "(" },
+ { @"\|", "-" },
+ { "\"", "'" },
+ { @"\*", "." },
+ { @"\\", "-" },
+ { @"/", "-" },
+ { @"\?", " " },
+ { @"\(([^)(]*)\(([^)]*)\)([^)(]*)\)", " " },
+ { @"\(([^)]+)\)", " " },
+ { @"\[([^]]+)\]", " " },
+ { @"\{([^}]+)\}", " " },
+ { @"(ZZZJUNK|ZZZ-UNK-|ZZZ-UNK |zzz unknow |zzz unk |Copy of |[.][a-z]{3}[.][a-z]{3}[.]|[.][a-z]{3}[.])", " " },
+ { @" (r|rev|v|ver)\s*[\d\.]+[^\s]*", " " },
+ { @"(( )|(\A))(\d{6}|\d{8})(( )|(\Z))", " " },
+ { @"(( )|(\A))(\d{1,2})-(\d{1,2})-(\d{4}|\d{2})", " " },
+ { @"(( )|(\A))(\d{4}|\d{2})-(\d{1,2})-(\d{1,2})", " " },
+ { @"[-]+", "-" },
+ { @"\A\s*\)", " " },
+ { @"\A\s*(,|-)", " " },
+ { @"\s+", " " },
+ { @"\s+,", "," },
+ { @"\s*(,|-)\s*\Z", " " },
+ };
+
+ for (int i = 0; i < charmap.GetLength(0); i++)
+ {
+ input = Regex.Replace(input, charmap[i, 0], charmap[i, 1]);
+ }
+
+ return input;
+ }
+
+ #endregion
+ }
+}
\ No newline at end of file
diff --git a/SabreTools.Filter/FieldManipulator.cs b/SabreTools.Filter/FieldManipulator.cs
index a3752d1a..f6fdc40a 100644
--- a/SabreTools.Filter/FieldManipulator.cs
+++ b/SabreTools.Filter/FieldManipulator.cs
@@ -30,24 +30,6 @@ namespace SabreTools.Filter
return (true, name);
}
- ///
- /// Normalize a string to the WoD standard
- ///
- public static string? NormalizeCharacters(string? input)
- {
- if (string.IsNullOrWhiteSpace(input))
- return input;
-
- ///Run the name through the filters to make sure that it's correct
- input = NormalizeChars(input);
- input = RussianToLatin(input);
- input = SearchPattern(input);
-
- input = new Regex(@"(([[(].*[\)\]] )?([^([]+))").Match(input).Groups[1].Value;
- input = input.TrimStart().TrimEnd();
- return input;
- }
-
///
/// Remove a field from a given DictionaryBase
///
@@ -75,7 +57,7 @@ namespace SabreTools.Filter
return input;
return new string(input.Where(c => c <= 255).ToArray());
- }
+ }
///
/// Set a field in a given DictionaryBase
@@ -117,132 +99,5 @@ namespace SabreTools.Filter
return true;
}
-
- #region Helpers
-
- ///
- /// Replace accented characters
- ///
- private static string NormalizeChars(string input)
- {
- string[,] charmap = {
- { "Á", "A" }, { "á", "a" },
- { "À", "A" }, { "à", "a" },
- { "Â", "A" }, { "â", "a" },
- { "Ä", "Ae" }, { "ä", "ae" },
- { "Ã", "A" }, { "ã", "a" },
- { "Å", "A" }, { "å", "a" },
- { "Æ", "Ae" }, { "æ", "ae" },
- { "Ç", "C" }, { "ç", "c" },
- { "Ð", "D" }, { "ð", "d" },
- { "É", "E" }, { "é", "e" },
- { "È", "E" }, { "è", "e" },
- { "Ê", "E" }, { "ê", "e" },
- { "Ë", "E" }, { "ë", "e" },
- { "ƒ", "f" },
- { "Í", "I" }, { "í", "i" },
- { "Ì", "I" }, { "ì", "i" },
- { "Î", "I" }, { "î", "i" },
- { "Ï", "I" }, { "ï", "i" },
- { "Ñ", "N" }, { "ñ", "n" },
- { "Ó", "O" }, { "ó", "o" },
- { "Ò", "O" }, { "ò", "o" },
- { "Ô", "O" }, { "ô", "o" },
- { "Ö", "Oe" }, { "ö", "oe" },
- { "Õ", "O" }, { "õ", "o" },
- { "Ø", "O" }, { "ø", "o" },
- { "Š", "S" }, { "š", "s" },
- { "ß", "ss" },
- { "Þ", "B" }, { "þ", "b" },
- { "Ú", "U" }, { "ú", "u" },
- { "Ù", "U" }, { "ù", "u" },
- { "Û", "U" }, { "û", "u" },
- { "Ü", "Ue" }, { "ü", "ue" },
- { "ÿ", "y" },
- { "Ý", "Y" }, { "ý", "y" },
- { "Ž", "Z" }, { "ž", "z" },
- };
-
- for (int i = 0; i < charmap.GetLength(0); i++)
- {
- input = input.Replace(charmap[i, 0], charmap[i, 1]);
- }
-
- return input;
- }
-
- ///
- /// Convert Cyrillic lettering to Latin lettering
- ///
- private static string RussianToLatin(string input)
- {
- string[,] charmap = {
- { "А", "A" }, { "Б", "B" }, { "В", "V" }, { "Г", "G" }, { "Д", "D" },
- { "Е", "E" }, { "Ё", "Yo" }, { "Ж", "Zh" }, { "З", "Z" }, { "И", "I" },
- { "Й", "J" }, { "К", "K" }, { "Л", "L" }, { "М", "M" }, { "Н", "N" },
- { "О", "O" }, { "П", "P" }, { "Р", "R" }, { "С", "S" }, { "Т", "T" },
- { "У", "U" }, { "Ф", "f" }, { "Х", "Kh" }, { "Ц", "Ts" }, { "Ч", "Ch" },
- { "Ш", "Sh" }, { "Щ", "Sch" }, { "Ъ", string.Empty }, { "Ы", "y" }, { "Ь", string.Empty },
- { "Э", "e" }, { "Ю", "yu" }, { "Я", "ya" }, { "а", "a" }, { "б", "b" },
- { "в", "v" }, { "г", "g" }, { "д", "d" }, { "е", "e" }, { "ё", "yo" },
- { "ж", "zh" }, { "з", "z" }, { "и", "i" }, { "й", "j" }, { "к", "k" },
- { "л", "l" }, { "м", "m" }, { "н", "n" }, { "о", "o" }, { "п", "p" },
- { "р", "r" }, { "с", "s" }, { "т", "t" }, { "у", "u" }, { "ф", "f" },
- { "х", "kh" }, { "ц", "ts" }, { "ч", "ch" }, { "ш", "sh" }, { "щ", "sch" },
- { "ъ", string.Empty }, { "ы", "y" }, { "ь", string.Empty }, { "э", "e" }, { "ю", "yu" },
- { "я", "ya" },
- };
-
- for (int i = 0; i < charmap.GetLength(0); i++)
- {
- input = input.Replace(charmap[i, 0], charmap[i, 1]);
- }
-
- return input;
- }
-
- ///
- /// Replace special characters and patterns
- ///
- private static string SearchPattern(string input)
- {
- string[,] charmap = {
- { @"~", " - " },
- { @"_", " " },
- { @":", " " },
- { @">", ")" },
- { @"<", "(" },
- { @"\|", "-" },
- { "\"", "'" },
- { @"\*", "." },
- { @"\\", "-" },
- { @"/", "-" },
- { @"\?", " " },
- { @"\(([^)(]*)\(([^)]*)\)([^)(]*)\)", " " },
- { @"\(([^)]+)\)", " " },
- { @"\[([^]]+)\]", " " },
- { @"\{([^}]+)\}", " " },
- { @"(ZZZJUNK|ZZZ-UNK-|ZZZ-UNK |zzz unknow |zzz unk |Copy of |[.][a-z]{3}[.][a-z]{3}[.]|[.][a-z]{3}[.])", " " },
- { @" (r|rev|v|ver)\s*[\d\.]+[^\s]*", " " },
- { @"(( )|(\A))(\d{6}|\d{8})(( )|(\Z))", " " },
- { @"(( )|(\A))(\d{1,2})-(\d{1,2})-(\d{4}|\d{2})", " " },
- { @"(( )|(\A))(\d{4}|\d{2})-(\d{1,2})-(\d{1,2})", " " },
- { @"[-]+", "-" },
- { @"\A\s*\)", " " },
- { @"\A\s*(,|-)", " " },
- { @"\s+", " " },
- { @"\s+,", "," },
- { @"\s*(,|-)\s*\Z", " " },
- };
-
- for (int i = 0; i < charmap.GetLength(0); i++)
- {
- input = Regex.Replace(input, charmap[i, 0], charmap[i, 1]);
- }
-
- return input;
- }
-
- #endregion
}
}
\ No newline at end of file