diff --git a/RombaSharp/Features/Lookup.cs b/RombaSharp/Features/Lookup.cs index c23e1ad7..eb97ef9b 100644 --- a/RombaSharp/Features/Lookup.cs +++ b/RombaSharp/Features/Lookup.cs @@ -38,31 +38,12 @@ namespace RombaSharp.Features List sha1 = new List(); foreach (string input in Inputs) { - string temp = string.Empty; if (input.Length == Constants.CRCLength) - { - temp = Sanitizer.CleanCRC32(input); - if (!string.IsNullOrWhiteSpace(temp)) - { - crc.Add(temp); - } - } + crc.Add(input); else if (input.Length == Constants.MD5Length) - { - temp = Sanitizer.CleanMD5(input); - if (!string.IsNullOrWhiteSpace(temp)) - { - md5.Add(temp); - } - } + md5.Add(input); else if (input.Length == Constants.SHA1Length) - { - temp = Sanitizer.CleanSHA1(input); - if (!string.IsNullOrWhiteSpace(temp)) - { - sha1.Add(temp); - } - } + sha1.Add(input); } SqliteConnection dbc = new SqliteConnection(_connectionString); diff --git a/SabreTools.Core/Tools/Sanitizer.cs b/SabreTools.Core/Tools/Sanitizer.cs index 1e0863b4..8bfe4865 100644 --- a/SabreTools.Core/Tools/Sanitizer.cs +++ b/SabreTools.Core/Tools/Sanitizer.cs @@ -2,183 +2,11 @@ using System.Collections.Generic; using System.IO; using System.Linq; -using System.Text.RegularExpressions; namespace SabreTools.Core.Tools { public static class Sanitizer { - /// - /// Get a sanitized Date from an input string - /// - /// String to get value from - /// Date as a string, if possible - public static string CleanDate(string input) - { - // Null in, null out - if (input == null) - return null; - - string date = string.Empty; - if (input != null) - { - if (DateTime.TryParse(input, out DateTime dateTime)) - date = dateTime.ToString(); - else - date = input; - } - - return date; - } - - /// - /// Clean a game (or rom) name to the WoD standard - /// - /// Name of the game to be cleaned - /// The cleaned name - public static string CleanGameName(string game) - { - ///Run the name through the filters to make sure that it's correct - game = NormalizeChars(game); - game = RussianToLatin(game); - game = SearchPattern(game); - - game = new Regex(@"(([[(].*[\)\]] )?([^([]+))").Match(game).Groups[1].Value; - game = game.TrimStart().TrimEnd(); - return game; - } - - /// - /// Clean a CRC32 string and pad to the correct size - /// - /// Hash string to sanitize - /// Cleaned string - public static string CleanCRC32(string hash) - { - return CleanHashData(hash, Constants.CRCLength); - } - - /// - /// Clean a MD5 string and pad to the correct size - /// - /// Hash string to sanitize - /// Cleaned string - public static string CleanMD5(string hash) - { - return CleanHashData(hash, Constants.MD5Length); - } - -#if NET_FRAMEWORK - /// - /// Clean a RIPEMD160 string and pad to the correct size - /// - /// Hash string to sanitize - /// Cleaned string - public static string CleanRIPEMD160(string hash) - { - return CleanHashData(hash, Constants.RIPEMD160Length); - } -#endif - - /// - /// Clean a SHA1 string and pad to the correct size - /// - /// Hash string to sanitize - /// Cleaned string - public static string CleanSHA1(string hash) - { - return CleanHashData(hash, Constants.SHA1Length); - } - - /// - /// Clean a SHA256 string and pad to the correct size - /// - /// Hash string to sanitize - /// Cleaned string - public static string CleanSHA256(string hash) - { - return CleanHashData(hash, Constants.SHA256Length); - } - - /// - /// Clean a SHA384 string and pad to the correct size - /// - /// Hash string to sanitize - /// Cleaned string - public static string CleanSHA384(string hash) - { - return CleanHashData(hash, Constants.SHA384Length); - } - - /// - /// Clean a SHA512 string and pad to the correct size - /// - /// Hash string to sanitize - /// Cleaned string - public static string CleanSHA512(string hash) - { - return CleanHashData(hash, Constants.SHA512Length); - } - - /// - /// Clean a hash string and pad to the correct size - /// - /// Hash string to sanitize - /// Amount of characters to pad to - /// Cleaned string - public static string CleanHashData(string hash, int padding) - { - // If we have a known blank hash, return blank - if (string.IsNullOrWhiteSpace(hash) || hash == "-" || hash == "_") - return string.Empty; - - // Check to see if it's a "hex" hash - hash = hash.Trim().Replace("0x", string.Empty); - - // If we have a blank hash now, return blank - if (string.IsNullOrWhiteSpace(hash)) - return string.Empty; - - // If the hash shorter than the required length, pad it - if (hash.Length < padding) - hash = hash.PadLeft(padding, '0'); - - // If the hash is longer than the required length, it's invalid - else if (hash.Length > padding) - return string.Empty; - - // Now normalize the hash - hash = hash.ToLowerInvariant(); - - // Otherwise, make sure that every character is a proper match - for (int i = 0; i < hash.Length; i++) - { - if ((hash[i] < '0' || hash[i] > '9') && (hash[i] < 'a' || hash[i] > 'f')) - { - hash = string.Empty; - break; - } - } - - return hash; - } - - /// - /// Clean a hash string from a Listrom DAT - /// - /// Hash string to sanitize - /// Cleaned string - public static string CleanListromHashData(string hash) - { - if (hash.StartsWith("CRC")) - return hash.Substring(4, 8).ToLowerInvariant(); - - else if (hash.StartsWith("SHA1")) - return hash.Substring(5, 40).ToLowerInvariant(); - - return hash; - } - /// /// Get a sanitized size from an input string /// @@ -209,144 +37,5 @@ namespace SabreTools.Core.Tools List invalidPath = Path.GetInvalidPathChars().ToList(); return new string(s.Where(c => !invalidPath.Contains(c)).ToArray()); } - - /// - /// Remove all unicode-specific chars from a string - /// - /// Input string to clean - /// Cleaned string - public static string RemoveUnicodeCharacters(string s) - { - return new string(s.Where(c => c <= 255).ToArray()); - } - - /// - /// Replace accented characters - /// - /// String to be parsed - /// String with characters replaced - private static string NormalizeChars(string input) - { - string[,] charmap = { - { "Á", "A" }, { "á", "a" }, - { "À", "A" }, { "à", "a" }, - { "Â", "A" }, { "â", "a" }, - { "Ä", "Ae" }, { "ä", "ae" }, - { "Ã", "A" }, { "ã", "a" }, - { "Å", "A" }, { "å", "a" }, - { "Æ", "Ae" }, { "æ", "ae" }, - { "Ç", "C" }, { "ç", "c" }, - { "Ð", "D" }, { "ð", "d" }, - { "É", "E" }, { "é", "e" }, - { "È", "E" }, { "è", "e" }, - { "Ê", "E" }, { "ê", "e" }, - { "Ë", "E" }, { "ë", "e" }, - { "ƒ", "f" }, - { "Í", "I" }, { "í", "i" }, - { "Ì", "I" }, { "ì", "i" }, - { "Î", "I" }, { "î", "i" }, - { "Ï", "I" }, { "ï", "i" }, - { "Ñ", "N" }, { "ñ", "n" }, - { "Ó", "O" }, { "ó", "o" }, - { "Ò", "O" }, { "ò", "o" }, - { "Ô", "O" }, { "ô", "o" }, - { "Ö", "Oe" }, { "ö", "oe" }, - { "Õ", "O" }, { "õ", "o" }, - { "Ø", "O" }, { "ø", "o" }, - { "Š", "S" }, { "š", "s" }, - { "ß", "ss" }, - { "Þ", "B" }, { "þ", "b" }, - { "Ú", "U" }, { "ú", "u" }, - { "Ù", "U" }, { "ù", "u" }, - { "Û", "U" }, { "û", "u" }, - { "Ü", "Ue" }, { "ü", "ue" }, - { "ÿ", "y" }, - { "Ý", "Y" }, { "ý", "y" }, - { "Ž", "Z" }, { "ž", "z" }, - }; - - for (int i = 0; i < charmap.GetLength(0); i++) - { - input = input.Replace(charmap[i, 0], charmap[i, 1]); - } - - return input; - } - - /// - /// Convert Cyrillic lettering to Latin lettering - /// - /// String to be parsed - /// String with characters replaced - private static string RussianToLatin(string input) - { - string[,] charmap = { - { "А", "A" }, { "Б", "B" }, { "В", "V" }, { "Г", "G" }, { "Д", "D" }, - { "Е", "E" }, { "Ё", "Yo" }, { "Ж", "Zh" }, { "З", "Z" }, { "И", "I" }, - { "Й", "J" }, { "К", "K" }, { "Л", "L" }, { "М", "M" }, { "Н", "N" }, - { "О", "O" }, { "П", "P" }, { "Р", "R" }, { "С", "S" }, { "Т", "T" }, - { "У", "U" }, { "Ф", "f" }, { "Х", "Kh" }, { "Ц", "Ts" }, { "Ч", "Ch" }, - { "Ш", "Sh" }, { "Щ", "Sch" }, { "Ъ", string.Empty }, { "Ы", "y" }, { "Ь", string.Empty }, - { "Э", "e" }, { "Ю", "yu" }, { "Я", "ya" }, { "а", "a" }, { "б", "b" }, - { "в", "v" }, { "г", "g" }, { "д", "d" }, { "е", "e" }, { "ё", "yo" }, - { "ж", "zh" }, { "з", "z" }, { "и", "i" }, { "й", "j" }, { "к", "k" }, - { "л", "l" }, { "м", "m" }, { "н", "n" }, { "о", "o" }, { "п", "p" }, - { "р", "r" }, { "с", "s" }, { "т", "t" }, { "у", "u" }, { "ф", "f" }, - { "х", "kh" }, { "ц", "ts" }, { "ч", "ch" }, { "ш", "sh" }, { "щ", "sch" }, - { "ъ", string.Empty }, { "ы", "y" }, { "ь", string.Empty }, { "э", "e" }, { "ю", "yu" }, - { "я", "ya" }, - }; - - for (int i = 0; i < charmap.GetLength(0); i++) - { - input = input.Replace(charmap[i, 0], charmap[i, 1]); - } - - return input; - } - - /// - /// Replace special characters and patterns - /// - /// String to be parsed - /// String with characters replaced - private static string SearchPattern(string input) - { - string[,] charmap = { - { @"~", " - " }, - { @"_", " " }, - { @":", " " }, - { @">", ")" }, - { @"<", "(" }, - { @"\|", "-" }, - { "\"", "'" }, - { @"\*", "." }, - { @"\\", "-" }, - { @"/", "-" }, - { @"\?", " " }, - { @"\(([^)(]*)\(([^)]*)\)([^)(]*)\)", " " }, - { @"\(([^)]+)\)", " " }, - { @"\[([^]]+)\]", " " }, - { @"\{([^}]+)\}", " " }, - { @"(ZZZJUNK|ZZZ-UNK-|ZZZ-UNK |zzz unknow |zzz unk |Copy of |[.][a-z]{3}[.][a-z]{3}[.]|[.][a-z]{3}[.])", " " }, - { @" (r|rev|v|ver)\s*[\d\.]+[^\s]*", " " }, - { @"(( )|(\A))(\d{6}|\d{8})(( )|(\Z))", " " }, - { @"(( )|(\A))(\d{1,2})-(\d{1,2})-(\d{4}|\d{2})", " " }, - { @"(( )|(\A))(\d{4}|\d{2})-(\d{1,2})-(\d{1,2})", " " }, - { @"[-]+", "-" }, - { @"\A\s*\)", " " }, - { @"\A\s*(,|-)", " " }, - { @"\s+", " " }, - { @"\s+,", "," }, - { @"\s*(,|-)\s*\Z", " " }, - }; - - for (int i = 0; i < charmap.GetLength(0); i++) - { - input = Regex.Replace(input, charmap[i, 0], charmap[i, 1]); - } - - return input; - } } } diff --git a/SabreTools.DatFiles/DatFile.Parsing.cs b/SabreTools.DatFiles/DatFile.Parsing.cs index 50582586..11966a56 100644 --- a/SabreTools.DatFiles/DatFile.Parsing.cs +++ b/SabreTools.DatFiles/DatFile.Parsing.cs @@ -312,5 +312,48 @@ namespace SabreTools.DatFiles /// True if full pathnames are to be kept, false otherwise (default) /// True if the error that is thrown should be thrown back to the caller, false otherwise protected abstract void ParseFile(string filename, int indexId, bool keep, bool throwOnError = false); + + #region Input Sanitization + + /// + /// Get a sanitized Date from an input string + /// + /// String to get value from + /// Date as a string, if possible + protected string CleanDate(string input) + { + // Null in, null out + if (input == null) + return null; + + string date = string.Empty; + if (input != null) + { + if (DateTime.TryParse(input, out DateTime dateTime)) + date = dateTime.ToString(); + else + date = input; + } + + return date; + } + + /// + /// Clean a hash string from a Listrom DAT + /// + /// Hash string to sanitize + /// Cleaned string + protected string CleanListromHashData(string hash) + { + if (hash.StartsWith("CRC")) + return hash.Substring(4, 8).ToLowerInvariant(); + + else if (hash.StartsWith("SHA1")) + return hash.Substring(5, 40).ToLowerInvariant(); + + return hash; + } + + #endregion } } \ No newline at end of file diff --git a/SabreTools.DatFiles/Formats/Listrom.cs b/SabreTools.DatFiles/Formats/Listrom.cs index 28c7ca2e..b4796685 100644 --- a/SabreTools.DatFiles/Formats/Listrom.cs +++ b/SabreTools.DatFiles/Formats/Listrom.cs @@ -104,7 +104,7 @@ namespace SabreTools.DatFiles.Formats Disk disk = new Disk() { Name = romname, - SHA1 = Sanitizer.CleanListromHashData(split[0]), + SHA1 = CleanListromHashData(split[0]), Machine = new Machine { @@ -127,7 +127,7 @@ namespace SabreTools.DatFiles.Formats Disk disk = new Disk() { Name = romname, - SHA1 = Sanitizer.CleanListromHashData(split[1]), + SHA1 = CleanListromHashData(split[1]), ItemStatus = ItemStatus.BadDump, Machine = new Machine @@ -152,8 +152,8 @@ namespace SabreTools.DatFiles.Formats { Name = romname, Size = Sanitizer.CleanLong(split[0]), - CRC = Sanitizer.CleanListromHashData(split[1]), - SHA1 = Sanitizer.CleanListromHashData(split[2]), + CRC = CleanListromHashData(split[1]), + SHA1 = CleanListromHashData(split[2]), Machine = new Machine { @@ -200,8 +200,8 @@ namespace SabreTools.DatFiles.Formats { Name = romname, Size = Sanitizer.CleanLong(split[0]), - CRC = Sanitizer.CleanListromHashData(split[2]), - SHA1 = Sanitizer.CleanListromHashData(split[3]), + CRC = CleanListromHashData(split[2]), + SHA1 = CleanListromHashData(split[3]), ItemStatus = ItemStatus.BadDump, Machine = new Machine diff --git a/SabreTools.DatFiles/Formats/Logiqx.cs b/SabreTools.DatFiles/Formats/Logiqx.cs index b062b73e..484d2c14 100644 --- a/SabreTools.DatFiles/Formats/Logiqx.cs +++ b/SabreTools.DatFiles/Formats/Logiqx.cs @@ -622,7 +622,7 @@ namespace SabreTools.DatFiles.Formats SpamSum = reader.GetAttribute("spamsum"), MergeTag = reader.GetAttribute("merge"), ItemStatus = reader.GetAttribute("status").AsItemStatus(), - Date = Sanitizer.CleanDate(reader.GetAttribute("date")), + Date = CleanDate(reader.GetAttribute("date")), Inverted = reader.GetAttribute("inverted").AsYesNo(), Source = new Source diff --git a/SabreTools.DatItems/Adjuster.cs b/SabreTools.DatItems/Adjuster.cs index 295dcccb..5f1324cc 100644 --- a/SabreTools.DatItems/Adjuster.cs +++ b/SabreTools.DatItems/Adjuster.cs @@ -163,7 +163,7 @@ namespace SabreTools.DatItems // If we're stripping unicode characters, strip item name if (cleaner?.RemoveUnicode == true) - Name = Sanitizer.RemoveUnicodeCharacters(Name); + Name = RemoveUnicodeCharacters(Name); // If we are in NTFS trim mode, trim the game name if (cleaner?.Trim == true) diff --git a/SabreTools.DatItems/Archive.cs b/SabreTools.DatItems/Archive.cs index b8d3bb61..8d932fe2 100644 --- a/SabreTools.DatItems/Archive.cs +++ b/SabreTools.DatItems/Archive.cs @@ -116,7 +116,7 @@ namespace SabreTools.DatItems // If we're stripping unicode characters, strip item name if (cleaner?.RemoveUnicode == true) - Name = Sanitizer.RemoveUnicodeCharacters(Name); + Name = RemoveUnicodeCharacters(Name); // If we are in NTFS trim mode, trim the game name if (cleaner?.Trim == true) diff --git a/SabreTools.DatItems/BiosSet.cs b/SabreTools.DatItems/BiosSet.cs index 9ec30226..8166eb28 100644 --- a/SabreTools.DatItems/BiosSet.cs +++ b/SabreTools.DatItems/BiosSet.cs @@ -143,7 +143,7 @@ namespace SabreTools.DatItems // If we're stripping unicode characters, strip item name if (cleaner?.RemoveUnicode == true) - Name = Sanitizer.RemoveUnicodeCharacters(Name); + Name = RemoveUnicodeCharacters(Name); // If we are in NTFS trim mode, trim the game name if (cleaner?.Trim == true) diff --git a/SabreTools.DatItems/Chip.cs b/SabreTools.DatItems/Chip.cs index d7ea5c13..1d5f24d6 100644 --- a/SabreTools.DatItems/Chip.cs +++ b/SabreTools.DatItems/Chip.cs @@ -161,7 +161,7 @@ namespace SabreTools.DatItems // If we're stripping unicode characters, strip item name if (cleaner?.RemoveUnicode == true) - Name = Sanitizer.RemoveUnicodeCharacters(Name); + Name = RemoveUnicodeCharacters(Name); // If we are in NTFS trim mode, trim the game name if (cleaner?.Trim == true) diff --git a/SabreTools.DatItems/Configuration.cs b/SabreTools.DatItems/Configuration.cs index 5c06cbe6..cd7da793 100644 --- a/SabreTools.DatItems/Configuration.cs +++ b/SabreTools.DatItems/Configuration.cs @@ -228,7 +228,7 @@ namespace SabreTools.DatItems // If we're stripping unicode characters, strip item name if (cleaner?.RemoveUnicode == true) - Name = Sanitizer.RemoveUnicodeCharacters(Name); + Name = RemoveUnicodeCharacters(Name); // If we are in NTFS trim mode, trim the game name if (cleaner?.Trim == true) diff --git a/SabreTools.DatItems/DatItem.cs b/SabreTools.DatItems/DatItem.cs index 613a70a1..9ae03651 100644 --- a/SabreTools.DatItems/DatItem.cs +++ b/SabreTools.DatItems/DatItem.cs @@ -2,6 +2,7 @@ using System.Collections.Generic; using System.IO; using System.Linq; +using System.Text.RegularExpressions; using System.Xml.Serialization; using SabreTools.Core; @@ -726,15 +727,15 @@ namespace SabreTools.DatItems // If we're stripping unicode characters, strip machine name and description if (cleaner?.RemoveUnicode == true) { - Machine.Name = Sanitizer.RemoveUnicodeCharacters(Machine.Name); - Machine.Description = Sanitizer.RemoveUnicodeCharacters(Machine.Description); + Machine.Name = RemoveUnicodeCharacters(Machine.Name); + Machine.Description = RemoveUnicodeCharacters(Machine.Description); } // If we're in cleaning mode, sanitize machine name and description if (cleaner?.Clean == true) { - Machine.Name = Sanitizer.CleanGameName(Machine.Name); - Machine.Description = Sanitizer.CleanGameName(Machine.Description); + Machine.Name = CleanGameName(Machine.Name); + Machine.Description = CleanGameName(Machine.Description); } // If we are in single game mode, rename the machine @@ -782,6 +783,277 @@ namespace SabreTools.DatItems { } + /// + /// Clean a game (or rom) name to the WoD standard + /// + /// Name of the game to be cleaned + /// The cleaned name + protected string CleanGameName(string game) + { + ///Run the name through the filters to make sure that it's correct + game = NormalizeChars(game); + game = RussianToLatin(game); + game = SearchPattern(game); + + game = new Regex(@"(([[(].*[\)\]] )?([^([]+))").Match(game).Groups[1].Value; + game = game.TrimStart().TrimEnd(); + return game; + } + + /// + /// Clean a CRC32 string and pad to the correct size + /// + /// Hash string to sanitize + /// Cleaned string + protected string CleanCRC32(string hash) + { + return CleanHashData(hash, Constants.CRCLength); + } + + /// + /// Clean a MD5 string and pad to the correct size + /// + /// Hash string to sanitize + /// Cleaned string + protected string CleanMD5(string hash) + { + return CleanHashData(hash, Constants.MD5Length); + } + +#if NET_FRAMEWORK + /// + /// Clean a RIPEMD160 string and pad to the correct size + /// + /// Hash string to sanitize + /// Cleaned string + protected string CleanRIPEMD160(string hash) + { + return CleanHashData(hash, Constants.RIPEMD160Length); + } +#endif + + /// + /// Clean a SHA1 string and pad to the correct size + /// + /// Hash string to sanitize + /// Cleaned string + protected string CleanSHA1(string hash) + { + return CleanHashData(hash, Constants.SHA1Length); + } + + /// + /// Clean a SHA256 string and pad to the correct size + /// + /// Hash string to sanitize + /// Cleaned string + protected string CleanSHA256(string hash) + { + return CleanHashData(hash, Constants.SHA256Length); + } + + /// + /// Clean a SHA384 string and pad to the correct size + /// + /// Hash string to sanitize + /// Cleaned string + protected string CleanSHA384(string hash) + { + return CleanHashData(hash, Constants.SHA384Length); + } + + /// + /// Clean a SHA512 string and pad to the correct size + /// + /// Hash string to sanitize + /// Cleaned string + protected string CleanSHA512(string hash) + { + return CleanHashData(hash, Constants.SHA512Length); + } + + /// + /// Remove all unicode-specific chars from a string + /// + /// Input string to clean + /// Cleaned string + protected string RemoveUnicodeCharacters(string s) + { + return new string(s.Where(c => c <= 255).ToArray()); + } + + /// + /// Clean a hash string and pad to the correct size + /// + /// Hash string to sanitize + /// Amount of characters to pad to + /// Cleaned string + private string CleanHashData(string hash, int padding) + { + // If we have a known blank hash, return blank + if (string.IsNullOrWhiteSpace(hash) || hash == "-" || hash == "_") + return string.Empty; + + // Check to see if it's a "hex" hash + hash = hash.Trim().Replace("0x", string.Empty); + + // If we have a blank hash now, return blank + if (string.IsNullOrWhiteSpace(hash)) + return string.Empty; + + // If the hash shorter than the required length, pad it + if (hash.Length < padding) + hash = hash.PadLeft(padding, '0'); + + // If the hash is longer than the required length, it's invalid + else if (hash.Length > padding) + return string.Empty; + + // Now normalize the hash + hash = hash.ToLowerInvariant(); + + // Otherwise, make sure that every character is a proper match + for (int i = 0; i < hash.Length; i++) + { + if ((hash[i] < '0' || hash[i] > '9') && (hash[i] < 'a' || hash[i] > 'f')) + { + hash = string.Empty; + break; + } + } + + return hash; + } + + /// + /// Replace accented characters + /// + /// String to be parsed + /// String with characters replaced + private string NormalizeChars(string input) + { + string[,] charmap = { + { "Á", "A" }, { "á", "a" }, + { "À", "A" }, { "à", "a" }, + { "Â", "A" }, { "â", "a" }, + { "Ä", "Ae" }, { "ä", "ae" }, + { "Ã", "A" }, { "ã", "a" }, + { "Å", "A" }, { "å", "a" }, + { "Æ", "Ae" }, { "æ", "ae" }, + { "Ç", "C" }, { "ç", "c" }, + { "Ð", "D" }, { "ð", "d" }, + { "É", "E" }, { "é", "e" }, + { "È", "E" }, { "è", "e" }, + { "Ê", "E" }, { "ê", "e" }, + { "Ë", "E" }, { "ë", "e" }, + { "ƒ", "f" }, + { "Í", "I" }, { "í", "i" }, + { "Ì", "I" }, { "ì", "i" }, + { "Î", "I" }, { "î", "i" }, + { "Ï", "I" }, { "ï", "i" }, + { "Ñ", "N" }, { "ñ", "n" }, + { "Ó", "O" }, { "ó", "o" }, + { "Ò", "O" }, { "ò", "o" }, + { "Ô", "O" }, { "ô", "o" }, + { "Ö", "Oe" }, { "ö", "oe" }, + { "Õ", "O" }, { "õ", "o" }, + { "Ø", "O" }, { "ø", "o" }, + { "Š", "S" }, { "š", "s" }, + { "ß", "ss" }, + { "Þ", "B" }, { "þ", "b" }, + { "Ú", "U" }, { "ú", "u" }, + { "Ù", "U" }, { "ù", "u" }, + { "Û", "U" }, { "û", "u" }, + { "Ü", "Ue" }, { "ü", "ue" }, + { "ÿ", "y" }, + { "Ý", "Y" }, { "ý", "y" }, + { "Ž", "Z" }, { "ž", "z" }, + }; + + for (int i = 0; i < charmap.GetLength(0); i++) + { + input = input.Replace(charmap[i, 0], charmap[i, 1]); + } + + return input; + } + + /// + /// Convert Cyrillic lettering to Latin lettering + /// + /// String to be parsed + /// String with characters replaced + private string RussianToLatin(string input) + { + string[,] charmap = { + { "А", "A" }, { "Б", "B" }, { "В", "V" }, { "Г", "G" }, { "Д", "D" }, + { "Е", "E" }, { "Ё", "Yo" }, { "Ж", "Zh" }, { "З", "Z" }, { "И", "I" }, + { "Й", "J" }, { "К", "K" }, { "Л", "L" }, { "М", "M" }, { "Н", "N" }, + { "О", "O" }, { "П", "P" }, { "Р", "R" }, { "С", "S" }, { "Т", "T" }, + { "У", "U" }, { "Ф", "f" }, { "Х", "Kh" }, { "Ц", "Ts" }, { "Ч", "Ch" }, + { "Ш", "Sh" }, { "Щ", "Sch" }, { "Ъ", string.Empty }, { "Ы", "y" }, { "Ь", string.Empty }, + { "Э", "e" }, { "Ю", "yu" }, { "Я", "ya" }, { "а", "a" }, { "б", "b" }, + { "в", "v" }, { "г", "g" }, { "д", "d" }, { "е", "e" }, { "ё", "yo" }, + { "ж", "zh" }, { "з", "z" }, { "и", "i" }, { "й", "j" }, { "к", "k" }, + { "л", "l" }, { "м", "m" }, { "н", "n" }, { "о", "o" }, { "п", "p" }, + { "р", "r" }, { "с", "s" }, { "т", "t" }, { "у", "u" }, { "ф", "f" }, + { "х", "kh" }, { "ц", "ts" }, { "ч", "ch" }, { "ш", "sh" }, { "щ", "sch" }, + { "ъ", string.Empty }, { "ы", "y" }, { "ь", string.Empty }, { "э", "e" }, { "ю", "yu" }, + { "я", "ya" }, + }; + + for (int i = 0; i < charmap.GetLength(0); i++) + { + input = input.Replace(charmap[i, 0], charmap[i, 1]); + } + + return input; + } + + /// + /// Replace special characters and patterns + /// + /// String to be parsed + /// String with characters replaced + private string SearchPattern(string input) + { + string[,] charmap = { + { @"~", " - " }, + { @"_", " " }, + { @":", " " }, + { @">", ")" }, + { @"<", "(" }, + { @"\|", "-" }, + { "\"", "'" }, + { @"\*", "." }, + { @"\\", "-" }, + { @"/", "-" }, + { @"\?", " " }, + { @"\(([^)(]*)\(([^)]*)\)([^)(]*)\)", " " }, + { @"\(([^)]+)\)", " " }, + { @"\[([^]]+)\]", " " }, + { @"\{([^}]+)\}", " " }, + { @"(ZZZJUNK|ZZZ-UNK-|ZZZ-UNK |zzz unknow |zzz unk |Copy of |[.][a-z]{3}[.][a-z]{3}[.]|[.][a-z]{3}[.])", " " }, + { @" (r|rev|v|ver)\s*[\d\.]+[^\s]*", " " }, + { @"(( )|(\A))(\d{6}|\d{8})(( )|(\Z))", " " }, + { @"(( )|(\A))(\d{1,2})-(\d{1,2})-(\d{4}|\d{2})", " " }, + { @"(( )|(\A))(\d{4}|\d{2})-(\d{1,2})-(\d{1,2})", " " }, + { @"[-]+", "-" }, + { @"\A\s*\)", " " }, + { @"\A\s*(,|-)", " " }, + { @"\s+", " " }, + { @"\s+,", "," }, + { @"\s*(,|-)\s*\Z", " " }, + }; + + for (int i = 0; i < charmap.GetLength(0); i++) + { + input = Regex.Replace(input, charmap[i, 0], charmap[i, 1]); + } + + return input; + } + #endregion #region Sorting and Merging diff --git a/SabreTools.DatItems/DataArea.cs b/SabreTools.DatItems/DataArea.cs index a29a7408..1b2418c5 100644 --- a/SabreTools.DatItems/DataArea.cs +++ b/SabreTools.DatItems/DataArea.cs @@ -163,7 +163,7 @@ namespace SabreTools.DatItems // If we're stripping unicode characters, strip item name if (cleaner?.RemoveUnicode == true) - Name = Sanitizer.RemoveUnicodeCharacters(Name); + Name = RemoveUnicodeCharacters(Name); // If we are in NTFS trim mode, trim the game name if (cleaner?.Trim == true) diff --git a/SabreTools.DatItems/DeviceReference.cs b/SabreTools.DatItems/DeviceReference.cs index 1956f13d..64dc4c35 100644 --- a/SabreTools.DatItems/DeviceReference.cs +++ b/SabreTools.DatItems/DeviceReference.cs @@ -116,7 +116,7 @@ namespace SabreTools.DatItems // If we're stripping unicode characters, strip item name if (cleaner?.RemoveUnicode == true) - Name = Sanitizer.RemoveUnicodeCharacters(Name); + Name = RemoveUnicodeCharacters(Name); // If we are in NTFS trim mode, trim the game name if (cleaner?.Trim == true) diff --git a/SabreTools.DatItems/DipSwitch.cs b/SabreTools.DatItems/DipSwitch.cs index 5181c417..33602ae7 100644 --- a/SabreTools.DatItems/DipSwitch.cs +++ b/SabreTools.DatItems/DipSwitch.cs @@ -275,7 +275,7 @@ namespace SabreTools.DatItems // If we're stripping unicode characters, strip item name if (cleaner?.RemoveUnicode == true) - Name = Sanitizer.RemoveUnicodeCharacters(Name); + Name = RemoveUnicodeCharacters(Name); // If we are in NTFS trim mode, trim the game name if (cleaner?.Trim == true) diff --git a/SabreTools.DatItems/Disk.cs b/SabreTools.DatItems/Disk.cs index 1831ae06..7041733e 100644 --- a/SabreTools.DatItems/Disk.cs +++ b/SabreTools.DatItems/Disk.cs @@ -44,7 +44,7 @@ namespace SabreTools.DatItems public string MD5 { get { return _md5.IsNullOrEmpty() ? null : Utilities.ByteArrayToString(_md5); } - set { _md5 = Utilities.StringToByteArray(Sanitizer.CleanMD5(value)); } + set { _md5 = Utilities.StringToByteArray(CleanMD5(value)); } } /// @@ -55,7 +55,7 @@ namespace SabreTools.DatItems public string SHA1 { get { return _sha1.IsNullOrEmpty() ? null : Utilities.ByteArrayToString(_sha1); } - set { _sha1 = Utilities.StringToByteArray(Sanitizer.CleanSHA1(value)); } + set { _sha1 = Utilities.StringToByteArray(CleanSHA1(value)); } } /// @@ -434,7 +434,7 @@ namespace SabreTools.DatItems // If we're stripping unicode characters, strip item name if (cleaner?.RemoveUnicode == true) - Name = Sanitizer.RemoveUnicodeCharacters(Name); + Name = RemoveUnicodeCharacters(Name); // If we are in NTFS trim mode, trim the game name if (cleaner?.Trim == true) diff --git a/SabreTools.DatItems/DiskArea.cs b/SabreTools.DatItems/DiskArea.cs index 0ff4ce59..0bbd1c0c 100644 --- a/SabreTools.DatItems/DiskArea.cs +++ b/SabreTools.DatItems/DiskArea.cs @@ -118,7 +118,7 @@ namespace SabreTools.DatItems // If we're stripping unicode characters, strip item name if (cleaner?.RemoveUnicode == true) - Name = Sanitizer.RemoveUnicodeCharacters(Name); + Name = RemoveUnicodeCharacters(Name); // If we are in NTFS trim mode, trim the game name if (cleaner?.Trim == true) diff --git a/SabreTools.DatItems/Extension.cs b/SabreTools.DatItems/Extension.cs index 645cee48..43289e15 100644 --- a/SabreTools.DatItems/Extension.cs +++ b/SabreTools.DatItems/Extension.cs @@ -116,7 +116,7 @@ namespace SabreTools.DatItems // If we're stripping unicode characters, strip item name if (cleaner?.RemoveUnicode == true) - Name = Sanitizer.RemoveUnicodeCharacters(Name); + Name = RemoveUnicodeCharacters(Name); // If we are in NTFS trim mode, trim the game name if (cleaner?.Trim == true) diff --git a/SabreTools.DatItems/Info.cs b/SabreTools.DatItems/Info.cs index 55360322..99274996 100644 --- a/SabreTools.DatItems/Info.cs +++ b/SabreTools.DatItems/Info.cs @@ -127,7 +127,7 @@ namespace SabreTools.DatItems // If we're stripping unicode characters, strip item name if (cleaner?.RemoveUnicode == true) - Name = Sanitizer.RemoveUnicodeCharacters(Name); + Name = RemoveUnicodeCharacters(Name); // If we are in NTFS trim mode, trim the game name if (cleaner?.Trim == true) diff --git a/SabreTools.DatItems/Instance.cs b/SabreTools.DatItems/Instance.cs index 735510d4..3e73ede3 100644 --- a/SabreTools.DatItems/Instance.cs +++ b/SabreTools.DatItems/Instance.cs @@ -127,7 +127,7 @@ namespace SabreTools.DatItems // If we're stripping unicode characters, strip item name if (cleaner?.RemoveUnicode == true) - Name = Sanitizer.RemoveUnicodeCharacters(Name); + Name = RemoveUnicodeCharacters(Name); // If we are in NTFS trim mode, trim the game name if (cleaner?.Trim == true) diff --git a/SabreTools.DatItems/Location.cs b/SabreTools.DatItems/Location.cs index 2583a427..66767127 100644 --- a/SabreTools.DatItems/Location.cs +++ b/SabreTools.DatItems/Location.cs @@ -146,7 +146,7 @@ namespace SabreTools.DatItems // If we're stripping unicode characters, strip item name if (cleaner?.RemoveUnicode == true) - Name = Sanitizer.RemoveUnicodeCharacters(Name); + Name = RemoveUnicodeCharacters(Name); // If we are in NTFS trim mode, trim the game name if (cleaner?.Trim == true) diff --git a/SabreTools.DatItems/Media.cs b/SabreTools.DatItems/Media.cs index 3c27a8f1..19061cdb 100644 --- a/SabreTools.DatItems/Media.cs +++ b/SabreTools.DatItems/Media.cs @@ -44,7 +44,7 @@ namespace SabreTools.DatItems public string MD5 { get { return _md5.IsNullOrEmpty() ? null : Utilities.ByteArrayToString(_md5); } - set { _md5 = Utilities.StringToByteArray(Sanitizer.CleanMD5(value)); } + set { _md5 = Utilities.StringToByteArray(CleanMD5(value)); } } /// @@ -55,7 +55,7 @@ namespace SabreTools.DatItems public string SHA1 { get { return _sha1.IsNullOrEmpty() ? null : Utilities.ByteArrayToString(_sha1); } - set { _sha1 = Utilities.StringToByteArray(Sanitizer.CleanSHA1(value)); } + set { _sha1 = Utilities.StringToByteArray(CleanSHA1(value)); } } /// @@ -66,7 +66,7 @@ namespace SabreTools.DatItems public string SHA256 { get { return _sha256.IsNullOrEmpty() ? null : Utilities.ByteArrayToString(_sha256); } - set { _sha256 = Utilities.StringToByteArray(Sanitizer.CleanSHA256(value)); } + set { _sha256 = Utilities.StringToByteArray(CleanSHA256(value)); } } /// @@ -333,7 +333,7 @@ namespace SabreTools.DatItems // If we're stripping unicode characters, strip item name if (cleaner?.RemoveUnicode == true) - Name = Sanitizer.RemoveUnicodeCharacters(Name); + Name = RemoveUnicodeCharacters(Name); // If we are in NTFS trim mode, trim the game name if (cleaner?.Trim == true) diff --git a/SabreTools.DatItems/Part.cs b/SabreTools.DatItems/Part.cs index 93fede04..4017eebc 100644 --- a/SabreTools.DatItems/Part.cs +++ b/SabreTools.DatItems/Part.cs @@ -154,7 +154,7 @@ namespace SabreTools.DatItems // If we're stripping unicode characters, strip item name if (cleaner?.RemoveUnicode == true) - Name = Sanitizer.RemoveUnicodeCharacters(Name); + Name = RemoveUnicodeCharacters(Name); // If we are in NTFS trim mode, trim the game name if (cleaner?.Trim == true) diff --git a/SabreTools.DatItems/PartFeature.cs b/SabreTools.DatItems/PartFeature.cs index 6bcfdd1c..59e3e3d1 100644 --- a/SabreTools.DatItems/PartFeature.cs +++ b/SabreTools.DatItems/PartFeature.cs @@ -127,7 +127,7 @@ namespace SabreTools.DatItems // If we're stripping unicode characters, strip item name if (cleaner?.RemoveUnicode == true) - Name = Sanitizer.RemoveUnicodeCharacters(Name); + Name = RemoveUnicodeCharacters(Name); // If we are in NTFS trim mode, trim the game name if (cleaner?.Trim == true) diff --git a/SabreTools.DatItems/RamOption.cs b/SabreTools.DatItems/RamOption.cs index cbe1078c..58a457ff 100644 --- a/SabreTools.DatItems/RamOption.cs +++ b/SabreTools.DatItems/RamOption.cs @@ -141,7 +141,7 @@ namespace SabreTools.DatItems // If we're stripping unicode characters, strip item name if (cleaner?.RemoveUnicode == true) - Name = Sanitizer.RemoveUnicodeCharacters(Name); + Name = RemoveUnicodeCharacters(Name); // If we are in NTFS trim mode, trim the game name if (cleaner?.Trim == true) diff --git a/SabreTools.DatItems/Release.cs b/SabreTools.DatItems/Release.cs index a5f204e4..0276ffc9 100644 --- a/SabreTools.DatItems/Release.cs +++ b/SabreTools.DatItems/Release.cs @@ -171,7 +171,7 @@ namespace SabreTools.DatItems // If we're stripping unicode characters, strip item name if (cleaner?.RemoveUnicode == true) - Name = Sanitizer.RemoveUnicodeCharacters(Name); + Name = RemoveUnicodeCharacters(Name); // If we are in NTFS trim mode, trim the game name if (cleaner?.Trim == true) diff --git a/SabreTools.DatItems/Rom.cs b/SabreTools.DatItems/Rom.cs index ad415b85..12fb603b 100644 --- a/SabreTools.DatItems/Rom.cs +++ b/SabreTools.DatItems/Rom.cs @@ -71,7 +71,7 @@ namespace SabreTools.DatItems public string CRC { get { return _crc.IsNullOrEmpty() ? null : Utilities.ByteArrayToString(_crc); } - set { _crc = (value == "null" ? Constants.CRCZeroBytes : Utilities.StringToByteArray(Sanitizer.CleanCRC32(value))); } + set { _crc = (value == "null" ? Constants.CRCZeroBytes : Utilities.StringToByteArray(CleanCRC32(value))); } } /// @@ -82,7 +82,7 @@ namespace SabreTools.DatItems public string MD5 { get { return _md5.IsNullOrEmpty() ? null : Utilities.ByteArrayToString(_md5); } - set { _md5 = Utilities.StringToByteArray(Sanitizer.CleanMD5(value)); } + set { _md5 = Utilities.StringToByteArray(CleanMD5(value)); } } #if NET_FRAMEWORK @@ -94,7 +94,7 @@ namespace SabreTools.DatItems public string RIPEMD160 { get { return _ripemd160.IsNullOrEmpty() ? null : Utilities.ByteArrayToString(_ripemd160); } - set { _ripemd160 = Utilities.StringToByteArray(Sanitizer.CleanRIPEMD160(value)); } + set { _ripemd160 = Utilities.StringToByteArray(CleanRIPEMD160(value)); } } #endif @@ -106,7 +106,7 @@ namespace SabreTools.DatItems public string SHA1 { get { return _sha1.IsNullOrEmpty() ? null : Utilities.ByteArrayToString(_sha1); } - set { _sha1 = Utilities.StringToByteArray(Sanitizer.CleanSHA1(value)); } + set { _sha1 = Utilities.StringToByteArray(CleanSHA1(value)); } } /// @@ -117,7 +117,7 @@ namespace SabreTools.DatItems public string SHA256 { get { return _sha256.IsNullOrEmpty() ? null : Utilities.ByteArrayToString(_sha256); } - set { _sha256 = Utilities.StringToByteArray(Sanitizer.CleanSHA256(value)); } + set { _sha256 = Utilities.StringToByteArray(CleanSHA256(value)); } } /// @@ -128,7 +128,7 @@ namespace SabreTools.DatItems public string SHA384 { get { return _sha384.IsNullOrEmpty() ? null : Utilities.ByteArrayToString(_sha384); } - set { _sha384 = Utilities.StringToByteArray(Sanitizer.CleanSHA384(value)); } + set { _sha384 = Utilities.StringToByteArray(CleanSHA384(value)); } } /// @@ -139,7 +139,7 @@ namespace SabreTools.DatItems public string SHA512 { get { return _sha512.IsNullOrEmpty() ? null : Utilities.ByteArrayToString(_sha512); } - set { _sha512 = Utilities.StringToByteArray(Sanitizer.CleanSHA512(value)); } + set { _sha512 = Utilities.StringToByteArray(CleanSHA512(value)); } } /// @@ -807,7 +807,7 @@ namespace SabreTools.DatItems // If we're stripping unicode characters, strip item name if (cleaner?.RemoveUnicode == true) - Name = Sanitizer.RemoveUnicodeCharacters(Name); + Name = RemoveUnicodeCharacters(Name); // If we are in NTFS trim mode, trim the game name if (cleaner?.Trim == true) diff --git a/SabreTools.DatItems/Sample.cs b/SabreTools.DatItems/Sample.cs index 4642ced6..07b15f84 100644 --- a/SabreTools.DatItems/Sample.cs +++ b/SabreTools.DatItems/Sample.cs @@ -116,7 +116,7 @@ namespace SabreTools.DatItems // If we're stripping unicode characters, strip item name if (cleaner?.RemoveUnicode == true) - Name = Sanitizer.RemoveUnicodeCharacters(Name); + Name = RemoveUnicodeCharacters(Name); // If we are in NTFS trim mode, trim the game name if (cleaner?.Trim == true) diff --git a/SabreTools.DatItems/Setting.cs b/SabreTools.DatItems/Setting.cs index 1b5af86e..3b1ef5e7 100644 --- a/SabreTools.DatItems/Setting.cs +++ b/SabreTools.DatItems/Setting.cs @@ -175,7 +175,7 @@ namespace SabreTools.DatItems // If we're stripping unicode characters, strip item name if (cleaner?.RemoveUnicode == true) - Name = Sanitizer.RemoveUnicodeCharacters(Name); + Name = RemoveUnicodeCharacters(Name); // If we are in NTFS trim mode, trim the game name if (cleaner?.Trim == true) diff --git a/SabreTools.DatItems/SharedFeature.cs b/SabreTools.DatItems/SharedFeature.cs index 9d43c182..b4403adc 100644 --- a/SabreTools.DatItems/SharedFeature.cs +++ b/SabreTools.DatItems/SharedFeature.cs @@ -127,7 +127,7 @@ namespace SabreTools.DatItems // If we're stripping unicode characters, strip item name if (cleaner?.RemoveUnicode == true) - Name = Sanitizer.RemoveUnicodeCharacters(Name); + Name = RemoveUnicodeCharacters(Name); // If we are in NTFS trim mode, trim the game name if (cleaner?.Trim == true) diff --git a/SabreTools.DatItems/Slot.cs b/SabreTools.DatItems/Slot.cs index 02e59ff2..a3c7b086 100644 --- a/SabreTools.DatItems/Slot.cs +++ b/SabreTools.DatItems/Slot.cs @@ -148,7 +148,7 @@ namespace SabreTools.DatItems // If we're stripping unicode characters, strip item name if (cleaner?.RemoveUnicode == true) - Name = Sanitizer.RemoveUnicodeCharacters(Name); + Name = RemoveUnicodeCharacters(Name); // If we are in NTFS trim mode, trim the game name if (cleaner?.Trim == true) diff --git a/SabreTools.DatItems/SlotOption.cs b/SabreTools.DatItems/SlotOption.cs index d1c2f290..c54b714d 100644 --- a/SabreTools.DatItems/SlotOption.cs +++ b/SabreTools.DatItems/SlotOption.cs @@ -143,7 +143,7 @@ namespace SabreTools.DatItems // If we're stripping unicode characters, strip item name if (cleaner?.RemoveUnicode == true) - Name = Sanitizer.RemoveUnicodeCharacters(Name); + Name = RemoveUnicodeCharacters(Name); // If we are in NTFS trim mode, trim the game name if (cleaner?.Trim == true) diff --git a/SabreTools.DatItems/SoftwareList.cs b/SabreTools.DatItems/SoftwareList.cs index 55d83128..88aadad8 100644 --- a/SabreTools.DatItems/SoftwareList.cs +++ b/SabreTools.DatItems/SoftwareList.cs @@ -145,7 +145,7 @@ namespace SabreTools.DatItems // If we're stripping unicode characters, strip item name if (cleaner?.RemoveUnicode == true) - Name = Sanitizer.RemoveUnicodeCharacters(Name); + Name = RemoveUnicodeCharacters(Name); // If we are in NTFS trim mode, trim the game name if (cleaner?.Trim == true)