diff --git a/SabreTools.Core/Tools/TextHelper.cs b/SabreTools.Core/Tools/TextHelper.cs index 31851fd6..75327ec9 100644 --- a/SabreTools.Core/Tools/TextHelper.cs +++ b/SabreTools.Core/Tools/TextHelper.cs @@ -128,6 +128,17 @@ namespace SabreTools.Core.Tools return new string(input.Where(c => !invalidPath.Contains(c)).ToArray()); } + /// + /// Remove all unicode-specific chars from a string + /// + public static string? RemoveUnicodeCharacters(string? input) + { + if (string.IsNullOrWhiteSpace(input)) + return input; + + return new string(input.Where(c => c <= 255).ToArray()); + } + #endregion #region Helpers diff --git a/SabreTools.Filter/FieldManipulator.cs b/SabreTools.Filter/FieldManipulator.cs index 4e9b00a0..585e81e2 100644 --- a/SabreTools.Filter/FieldManipulator.cs +++ b/SabreTools.Filter/FieldManipulator.cs @@ -48,17 +48,6 @@ namespace SabreTools.Filter return true; } - /// - /// Remove all unicode-specific chars from a string - /// - public static string? RemoveUnicodeCharacters(string? input) - { - if (string.IsNullOrWhiteSpace(input)) - return input; - - return new string(input.Where(c => c <= 255).ToArray()); - } - /// /// Set a field in a given DictionaryBase /// diff --git a/SabreTools.Filtering/Cleaner.cs b/SabreTools.Filtering/Cleaner.cs index eb7139ca..df0d2d06 100644 --- a/SabreTools.Filtering/Cleaner.cs +++ b/SabreTools.Filtering/Cleaner.cs @@ -6,8 +6,8 @@ using System.Linq; using System.Runtime.CompilerServices; using System.Text.RegularExpressions; using System.Threading.Tasks; - using SabreTools.Core; +using SabreTools.Core.Tools; using SabreTools.DatFiles; using SabreTools.DatItems; using SabreTools.Logging; @@ -188,16 +188,16 @@ namespace SabreTools.Filtering // If we're stripping unicode characters, strip machine name and description if (RemoveUnicode) { - datItem.Machine.Name = RemoveUnicodeCharacters(datItem.Machine.Name); - datItem.Machine.Description = RemoveUnicodeCharacters(datItem.Machine.Description); - datItem.SetName(RemoveUnicodeCharacters(datItem.GetName())); + datItem.Machine.Name = TextHelper.RemoveUnicodeCharacters(datItem.Machine.Name); + datItem.Machine.Description = TextHelper.RemoveUnicodeCharacters(datItem.Machine.Description); + datItem.SetName(TextHelper.RemoveUnicodeCharacters(datItem.GetName())); } // If we're in cleaning mode, sanitize machine name and description if (Clean) { - datItem.Machine.Name = CleanGameName(datItem.Machine.Name); - datItem.Machine.Description = CleanGameName(datItem.Machine.Description); + datItem.Machine.Name = TextHelper.NormalizeCharacters(datItem.Machine.Name); + datItem.Machine.Description = TextHelper.NormalizeCharacters(datItem.Machine.Description); } // If we are in single game mode, rename the machine @@ -217,26 +217,6 @@ namespace SabreTools.Filtering } } - /// - /// Clean a game (or rom) name to the WoD standard - /// - /// Name of the game to be cleaned - /// The cleaned name - internal static string CleanGameName(string game) - { - if (game == null) - return null; - - ///Run the name through the filters to make sure that it's correct - game = NormalizeChars(game); - game = RussianToLatin(game); - game = SearchPattern(game); - - game = new Regex(@"(([[(].*[\)\]] )?([^([]+))").Match(game).Groups[1].Value; - game = game.TrimStart().TrimEnd(); - return game; - } - /// /// Use game descriptions as names in the DAT, updating cloneof/romof/sampleof /// @@ -296,157 +276,6 @@ namespace SabreTools.Filtering } } - /// - /// Replace accented characters - /// - /// String to be parsed - /// String with characters replaced - internal static string NormalizeChars(string input) - { - if (input == null) - return null; - - string[,] charmap = { - { "Á", "A" }, { "á", "a" }, - { "À", "A" }, { "à", "a" }, - { "Â", "A" }, { "â", "a" }, - { "Ä", "Ae" }, { "ä", "ae" }, - { "Ã", "A" }, { "ã", "a" }, - { "Å", "A" }, { "å", "a" }, - { "Æ", "Ae" }, { "æ", "ae" }, - { "Ç", "C" }, { "ç", "c" }, - { "Ð", "D" }, { "ð", "d" }, - { "É", "E" }, { "é", "e" }, - { "È", "E" }, { "è", "e" }, - { "Ê", "E" }, { "ê", "e" }, - { "Ë", "E" }, { "ë", "e" }, - { "ƒ", "f" }, - { "Í", "I" }, { "í", "i" }, - { "Ì", "I" }, { "ì", "i" }, - { "Î", "I" }, { "î", "i" }, - { "Ï", "I" }, { "ï", "i" }, - { "Ñ", "N" }, { "ñ", "n" }, - { "Ó", "O" }, { "ó", "o" }, - { "Ò", "O" }, { "ò", "o" }, - { "Ô", "O" }, { "ô", "o" }, - { "Ö", "Oe" }, { "ö", "oe" }, - { "Õ", "O" }, { "õ", "o" }, - { "Ø", "O" }, { "ø", "o" }, - { "Š", "S" }, { "š", "s" }, - { "ß", "ss" }, - { "Þ", "B" }, { "þ", "b" }, - { "Ú", "U" }, { "ú", "u" }, - { "Ù", "U" }, { "ù", "u" }, - { "Û", "U" }, { "û", "u" }, - { "Ü", "Ue" }, { "ü", "ue" }, - { "ÿ", "y" }, - { "Ý", "Y" }, { "ý", "y" }, - { "Ž", "Z" }, { "ž", "z" }, - }; - - for (int i = 0; i < charmap.GetLength(0); i++) - { - input = input.Replace(charmap[i, 0], charmap[i, 1]); - } - - return input; - } - - /// - /// Remove all unicode-specific chars from a string - /// - /// Input string to clean - /// Cleaned string - internal static string RemoveUnicodeCharacters(string s) - { - if (s == null) - return null; - - return new string(s.Where(c => c <= 255).ToArray()); - } - - /// - /// Convert Cyrillic lettering to Latin lettering - /// - /// String to be parsed - /// String with characters replaced - internal static string RussianToLatin(string input) - { - if (input == null) - return null; - - string[,] charmap = { - { "А", "A" }, { "Б", "B" }, { "В", "V" }, { "Г", "G" }, { "Д", "D" }, - { "Е", "E" }, { "Ё", "Yo" }, { "Ж", "Zh" }, { "З", "Z" }, { "И", "I" }, - { "Й", "J" }, { "К", "K" }, { "Л", "L" }, { "М", "M" }, { "Н", "N" }, - { "О", "O" }, { "П", "P" }, { "Р", "R" }, { "С", "S" }, { "Т", "T" }, - { "У", "U" }, { "Ф", "f" }, { "Х", "Kh" }, { "Ц", "Ts" }, { "Ч", "Ch" }, - { "Ш", "Sh" }, { "Щ", "Sch" }, { "Ъ", string.Empty }, { "Ы", "y" }, { "Ь", string.Empty }, - { "Э", "e" }, { "Ю", "yu" }, { "Я", "ya" }, { "а", "a" }, { "б", "b" }, - { "в", "v" }, { "г", "g" }, { "д", "d" }, { "е", "e" }, { "ё", "yo" }, - { "ж", "zh" }, { "з", "z" }, { "и", "i" }, { "й", "j" }, { "к", "k" }, - { "л", "l" }, { "м", "m" }, { "н", "n" }, { "о", "o" }, { "п", "p" }, - { "р", "r" }, { "с", "s" }, { "т", "t" }, { "у", "u" }, { "ф", "f" }, - { "х", "kh" }, { "ц", "ts" }, { "ч", "ch" }, { "ш", "sh" }, { "щ", "sch" }, - { "ъ", string.Empty }, { "ы", "y" }, { "ь", string.Empty }, { "э", "e" }, { "ю", "yu" }, - { "я", "ya" }, - }; - - for (int i = 0; i < charmap.GetLength(0); i++) - { - input = input.Replace(charmap[i, 0], charmap[i, 1]); - } - - return input; - } - - /// - /// Replace special characters and patterns - /// - /// String to be parsed - /// String with characters replaced - internal static string SearchPattern(string input) - { - if (input == null) - return null; - - string[,] charmap = { - { @"~", " - " }, - { @"_", " " }, - { @":", " " }, - { @">", ")" }, - { @"<", "(" }, - { @"\|", "-" }, - { "\"", "'" }, - { @"\*", "." }, - { @"\\", "-" }, - { @"/", "-" }, - { @"\?", " " }, - { @"\(([^)(]*)\(([^)]*)\)([^)(]*)\)", " " }, - { @"\(([^)]+)\)", " " }, - { @"\[([^]]+)\]", " " }, - { @"\{([^}]+)\}", " " }, - { @"(ZZZJUNK|ZZZ-UNK-|ZZZ-UNK |zzz unknow |zzz unk |Copy of |[.][a-z]{3}[.][a-z]{3}[.]|[.][a-z]{3}[.])", " " }, - { @" (r|rev|v|ver)\s*[\d\.]+[^\s]*", " " }, - { @"(( )|(\A))(\d{6}|\d{8})(( )|(\Z))", " " }, - { @"(( )|(\A))(\d{1,2})-(\d{1,2})-(\d{4}|\d{2})", " " }, - { @"(( )|(\A))(\d{4}|\d{2})-(\d{1,2})-(\d{1,2})", " " }, - { @"[-]+", "-" }, - { @"\A\s*\)", " " }, - { @"\A\s*(,|-)", " " }, - { @"\s+", " " }, - { @"\s+,", "," }, - { @"\s*(,|-)\s*\Z", " " }, - }; - - for (int i = 0; i < charmap.GetLength(0); i++) - { - input = Regex.Replace(input, charmap[i, 0], charmap[i, 1]); - } - - return input; - } - /// /// Filter a DAT using 1G1R logic given an ordered set of regions ///