Move former Sanitizer methods to better places

This commit is contained in:
Matt Nadareski
2020-12-09 22:27:41 -08:00
parent caf5fae5ad
commit 7e86b6914d
32 changed files with 367 additions and 382 deletions

View File

@@ -38,31 +38,12 @@ namespace RombaSharp.Features
List<string> sha1 = new List<string>(); List<string> sha1 = new List<string>();
foreach (string input in Inputs) foreach (string input in Inputs)
{ {
string temp = string.Empty;
if (input.Length == Constants.CRCLength) if (input.Length == Constants.CRCLength)
{ crc.Add(input);
temp = Sanitizer.CleanCRC32(input);
if (!string.IsNullOrWhiteSpace(temp))
{
crc.Add(temp);
}
}
else if (input.Length == Constants.MD5Length) else if (input.Length == Constants.MD5Length)
{ md5.Add(input);
temp = Sanitizer.CleanMD5(input);
if (!string.IsNullOrWhiteSpace(temp))
{
md5.Add(temp);
}
}
else if (input.Length == Constants.SHA1Length) else if (input.Length == Constants.SHA1Length)
{ sha1.Add(input);
temp = Sanitizer.CleanSHA1(input);
if (!string.IsNullOrWhiteSpace(temp))
{
sha1.Add(temp);
}
}
} }
SqliteConnection dbc = new SqliteConnection(_connectionString); SqliteConnection dbc = new SqliteConnection(_connectionString);

View File

@@ -2,183 +2,11 @@
using System.Collections.Generic; using System.Collections.Generic;
using System.IO; using System.IO;
using System.Linq; using System.Linq;
using System.Text.RegularExpressions;
namespace SabreTools.Core.Tools namespace SabreTools.Core.Tools
{ {
public static class Sanitizer public static class Sanitizer
{ {
/// <summary>
/// Get a sanitized Date from an input string
/// </summary>
/// <param name="input">String to get value from</param>
/// <returns>Date as a string, if possible</returns>
public static string CleanDate(string input)
{
// Null in, null out
if (input == null)
return null;
string date = string.Empty;
if (input != null)
{
if (DateTime.TryParse(input, out DateTime dateTime))
date = dateTime.ToString();
else
date = input;
}
return date;
}
/// <summary>
/// Clean a game (or rom) name to the WoD standard
/// </summary>
/// <param name="game">Name of the game to be cleaned</param>
/// <returns>The cleaned name</returns>
public static string CleanGameName(string game)
{
///Run the name through the filters to make sure that it's correct
game = NormalizeChars(game);
game = RussianToLatin(game);
game = SearchPattern(game);
game = new Regex(@"(([[(].*[\)\]] )?([^([]+))").Match(game).Groups[1].Value;
game = game.TrimStart().TrimEnd();
return game;
}
/// <summary>
/// Clean a CRC32 string and pad to the correct size
/// </summary>
/// <param name="hash">Hash string to sanitize</param>
/// <returns>Cleaned string</returns>
public static string CleanCRC32(string hash)
{
return CleanHashData(hash, Constants.CRCLength);
}
/// <summary>
/// Clean a MD5 string and pad to the correct size
/// </summary>
/// <param name="hash">Hash string to sanitize</param>
/// <returns>Cleaned string</returns>
public static string CleanMD5(string hash)
{
return CleanHashData(hash, Constants.MD5Length);
}
#if NET_FRAMEWORK
/// <summary>
/// Clean a RIPEMD160 string and pad to the correct size
/// </summary>
/// <param name="hash">Hash string to sanitize</param>
/// <returns>Cleaned string</returns>
public static string CleanRIPEMD160(string hash)
{
return CleanHashData(hash, Constants.RIPEMD160Length);
}
#endif
/// <summary>
/// Clean a SHA1 string and pad to the correct size
/// </summary>
/// <param name="hash">Hash string to sanitize</param>
/// <returns>Cleaned string</returns>
public static string CleanSHA1(string hash)
{
return CleanHashData(hash, Constants.SHA1Length);
}
/// <summary>
/// Clean a SHA256 string and pad to the correct size
/// </summary>
/// <param name="hash">Hash string to sanitize</param>
/// <returns>Cleaned string</returns>
public static string CleanSHA256(string hash)
{
return CleanHashData(hash, Constants.SHA256Length);
}
/// <summary>
/// Clean a SHA384 string and pad to the correct size
/// </summary>
/// <param name="hash">Hash string to sanitize</param>
/// <returns>Cleaned string</returns>
public static string CleanSHA384(string hash)
{
return CleanHashData(hash, Constants.SHA384Length);
}
/// <summary>
/// Clean a SHA512 string and pad to the correct size
/// </summary>
/// <param name="hash">Hash string to sanitize</param>
/// <returns>Cleaned string</returns>
public static string CleanSHA512(string hash)
{
return CleanHashData(hash, Constants.SHA512Length);
}
/// <summary>
/// Clean a hash string and pad to the correct size
/// </summary>
/// <param name="hash">Hash string to sanitize</param>
/// <param name="padding">Amount of characters to pad to</param>
/// <returns>Cleaned string</returns>
public static string CleanHashData(string hash, int padding)
{
// If we have a known blank hash, return blank
if (string.IsNullOrWhiteSpace(hash) || hash == "-" || hash == "_")
return string.Empty;
// Check to see if it's a "hex" hash
hash = hash.Trim().Replace("0x", string.Empty);
// If we have a blank hash now, return blank
if (string.IsNullOrWhiteSpace(hash))
return string.Empty;
// If the hash shorter than the required length, pad it
if (hash.Length < padding)
hash = hash.PadLeft(padding, '0');
// If the hash is longer than the required length, it's invalid
else if (hash.Length > padding)
return string.Empty;
// Now normalize the hash
hash = hash.ToLowerInvariant();
// Otherwise, make sure that every character is a proper match
for (int i = 0; i < hash.Length; i++)
{
if ((hash[i] < '0' || hash[i] > '9') && (hash[i] < 'a' || hash[i] > 'f'))
{
hash = string.Empty;
break;
}
}
return hash;
}
/// <summary>
/// Clean a hash string from a Listrom DAT
/// </summary>
/// <param name="hash">Hash string to sanitize</param>
/// <returns>Cleaned string</returns>
public static string CleanListromHashData(string hash)
{
if (hash.StartsWith("CRC"))
return hash.Substring(4, 8).ToLowerInvariant();
else if (hash.StartsWith("SHA1"))
return hash.Substring(5, 40).ToLowerInvariant();
return hash;
}
/// <summary> /// <summary>
/// Get a sanitized size from an input string /// Get a sanitized size from an input string
/// </summary> /// </summary>
@@ -209,144 +37,5 @@ namespace SabreTools.Core.Tools
List<char> invalidPath = Path.GetInvalidPathChars().ToList(); List<char> invalidPath = Path.GetInvalidPathChars().ToList();
return new string(s.Where(c => !invalidPath.Contains(c)).ToArray()); return new string(s.Where(c => !invalidPath.Contains(c)).ToArray());
} }
/// <summary>
/// Remove all unicode-specific chars from a string
/// </summary>
/// <param name="s">Input string to clean</param>
/// <returns>Cleaned string</returns>
public static string RemoveUnicodeCharacters(string s)
{
return new string(s.Where(c => c <= 255).ToArray());
}
/// <summary>
/// Replace accented characters
/// </summary>
/// <param name="input">String to be parsed</param>
/// <returns>String with characters replaced</returns>
private static string NormalizeChars(string input)
{
string[,] charmap = {
{ "Á", "A" }, { "á", "a" },
{ "À", "A" }, { "à", "a" },
{ "Â", "A" }, { "â", "a" },
{ "Ä", "Ae" }, { "ä", "ae" },
{ "Ã", "A" }, { "ã", "a" },
{ "Å", "A" }, { "å", "a" },
{ "Æ", "Ae" }, { "æ", "ae" },
{ "Ç", "C" }, { "ç", "c" },
{ "Ð", "D" }, { "ð", "d" },
{ "É", "E" }, { "é", "e" },
{ "È", "E" }, { "è", "e" },
{ "Ê", "E" }, { "ê", "e" },
{ "Ë", "E" }, { "ë", "e" },
{ "ƒ", "f" },
{ "Í", "I" }, { "í", "i" },
{ "Ì", "I" }, { "ì", "i" },
{ "Î", "I" }, { "î", "i" },
{ "Ï", "I" }, { "ï", "i" },
{ "Ñ", "N" }, { "ñ", "n" },
{ "Ó", "O" }, { "ó", "o" },
{ "Ò", "O" }, { "ò", "o" },
{ "Ô", "O" }, { "ô", "o" },
{ "Ö", "Oe" }, { "ö", "oe" },
{ "Õ", "O" }, { "õ", "o" },
{ "Ø", "O" }, { "ø", "o" },
{ "Š", "S" }, { "š", "s" },
{ "ß", "ss" },
{ "Þ", "B" }, { "þ", "b" },
{ "Ú", "U" }, { "ú", "u" },
{ "Ù", "U" }, { "ù", "u" },
{ "Û", "U" }, { "û", "u" },
{ "Ü", "Ue" }, { "ü", "ue" },
{ "ÿ", "y" },
{ "Ý", "Y" }, { "ý", "y" },
{ "Ž", "Z" }, { "ž", "z" },
};
for (int i = 0; i < charmap.GetLength(0); i++)
{
input = input.Replace(charmap[i, 0], charmap[i, 1]);
}
return input;
}
/// <summary>
/// Convert Cyrillic lettering to Latin lettering
/// </summary>
/// <param name="input">String to be parsed</param>
/// <returns>String with characters replaced</returns>
private static string RussianToLatin(string input)
{
string[,] charmap = {
{ "А", "A" }, { "Б", "B" }, { "В", "V" }, { "Г", "G" }, { "Д", "D" },
{ "Е", "E" }, { "Ё", "Yo" }, { "Ж", "Zh" }, { "З", "Z" }, { "И", "I" },
{ "Й", "J" }, { "К", "K" }, { "Л", "L" }, { "М", "M" }, { "Н", "N" },
{ "О", "O" }, { "П", "P" }, { "Р", "R" }, { "С", "S" }, { "Т", "T" },
{ "У", "U" }, { "Ф", "f" }, { "Х", "Kh" }, { "Ц", "Ts" }, { "Ч", "Ch" },
{ "Ш", "Sh" }, { "Щ", "Sch" }, { "Ъ", string.Empty }, { "Ы", "y" }, { "Ь", string.Empty },
{ "Э", "e" }, { "Ю", "yu" }, { "Я", "ya" }, { "а", "a" }, { "б", "b" },
{ "в", "v" }, { "г", "g" }, { "д", "d" }, { "е", "e" }, { "ё", "yo" },
{ "ж", "zh" }, { "з", "z" }, { "и", "i" }, { "й", "j" }, { "к", "k" },
{ "л", "l" }, { "м", "m" }, { "н", "n" }, { "о", "o" }, { "п", "p" },
{ "р", "r" }, { "с", "s" }, { "т", "t" }, { "у", "u" }, { "ф", "f" },
{ "х", "kh" }, { "ц", "ts" }, { "ч", "ch" }, { "ш", "sh" }, { "щ", "sch" },
{ "ъ", string.Empty }, { "ы", "y" }, { "ь", string.Empty }, { "э", "e" }, { "ю", "yu" },
{ "я", "ya" },
};
for (int i = 0; i < charmap.GetLength(0); i++)
{
input = input.Replace(charmap[i, 0], charmap[i, 1]);
}
return input;
}
/// <summary>
/// Replace special characters and patterns
/// </summary>
/// <param name="input">String to be parsed</param>
/// <returns>String with characters replaced</returns>
private static string SearchPattern(string input)
{
string[,] charmap = {
{ @"~", " - " },
{ @"_", " " },
{ @":", " " },
{ @">", ")" },
{ @"<", "(" },
{ @"\|", "-" },
{ "\"", "'" },
{ @"\*", "." },
{ @"\\", "-" },
{ @"/", "-" },
{ @"\?", " " },
{ @"\(([^)(]*)\(([^)]*)\)([^)(]*)\)", " " },
{ @"\(([^)]+)\)", " " },
{ @"\[([^]]+)\]", " " },
{ @"\{([^}]+)\}", " " },
{ @"(ZZZJUNK|ZZZ-UNK-|ZZZ-UNK |zzz unknow |zzz unk |Copy of |[.][a-z]{3}[.][a-z]{3}[.]|[.][a-z]{3}[.])", " " },
{ @" (r|rev|v|ver)\s*[\d\.]+[^\s]*", " " },
{ @"(( )|(\A))(\d{6}|\d{8})(( )|(\Z))", " " },
{ @"(( )|(\A))(\d{1,2})-(\d{1,2})-(\d{4}|\d{2})", " " },
{ @"(( )|(\A))(\d{4}|\d{2})-(\d{1,2})-(\d{1,2})", " " },
{ @"[-]+", "-" },
{ @"\A\s*\)", " " },
{ @"\A\s*(,|-)", " " },
{ @"\s+", " " },
{ @"\s+,", "," },
{ @"\s*(,|-)\s*\Z", " " },
};
for (int i = 0; i < charmap.GetLength(0); i++)
{
input = Regex.Replace(input, charmap[i, 0], charmap[i, 1]);
}
return input;
}
} }
} }

View File

@@ -312,5 +312,48 @@ namespace SabreTools.DatFiles
/// <param name="keep">True if full pathnames are to be kept, false otherwise (default)</param> /// <param name="keep">True if full pathnames are to be kept, false otherwise (default)</param>
/// <param name="throwOnError">True if the error that is thrown should be thrown back to the caller, false otherwise</param> /// <param name="throwOnError">True if the error that is thrown should be thrown back to the caller, false otherwise</param>
protected abstract void ParseFile(string filename, int indexId, bool keep, bool throwOnError = false); protected abstract void ParseFile(string filename, int indexId, bool keep, bool throwOnError = false);
#region Input Sanitization
/// <summary>
/// Get a sanitized Date from an input string
/// </summary>
/// <param name="input">String to get value from</param>
/// <returns>Date as a string, if possible</returns>
protected string CleanDate(string input)
{
// Null in, null out
if (input == null)
return null;
string date = string.Empty;
if (input != null)
{
if (DateTime.TryParse(input, out DateTime dateTime))
date = dateTime.ToString();
else
date = input;
}
return date;
}
/// <summary>
/// Clean a hash string from a Listrom DAT
/// </summary>
/// <param name="hash">Hash string to sanitize</param>
/// <returns>Cleaned string</returns>
protected string CleanListromHashData(string hash)
{
if (hash.StartsWith("CRC"))
return hash.Substring(4, 8).ToLowerInvariant();
else if (hash.StartsWith("SHA1"))
return hash.Substring(5, 40).ToLowerInvariant();
return hash;
}
#endregion
} }
} }

View File

@@ -104,7 +104,7 @@ namespace SabreTools.DatFiles.Formats
Disk disk = new Disk() Disk disk = new Disk()
{ {
Name = romname, Name = romname,
SHA1 = Sanitizer.CleanListromHashData(split[0]), SHA1 = CleanListromHashData(split[0]),
Machine = new Machine Machine = new Machine
{ {
@@ -127,7 +127,7 @@ namespace SabreTools.DatFiles.Formats
Disk disk = new Disk() Disk disk = new Disk()
{ {
Name = romname, Name = romname,
SHA1 = Sanitizer.CleanListromHashData(split[1]), SHA1 = CleanListromHashData(split[1]),
ItemStatus = ItemStatus.BadDump, ItemStatus = ItemStatus.BadDump,
Machine = new Machine Machine = new Machine
@@ -152,8 +152,8 @@ namespace SabreTools.DatFiles.Formats
{ {
Name = romname, Name = romname,
Size = Sanitizer.CleanLong(split[0]), Size = Sanitizer.CleanLong(split[0]),
CRC = Sanitizer.CleanListromHashData(split[1]), CRC = CleanListromHashData(split[1]),
SHA1 = Sanitizer.CleanListromHashData(split[2]), SHA1 = CleanListromHashData(split[2]),
Machine = new Machine Machine = new Machine
{ {
@@ -200,8 +200,8 @@ namespace SabreTools.DatFiles.Formats
{ {
Name = romname, Name = romname,
Size = Sanitizer.CleanLong(split[0]), Size = Sanitizer.CleanLong(split[0]),
CRC = Sanitizer.CleanListromHashData(split[2]), CRC = CleanListromHashData(split[2]),
SHA1 = Sanitizer.CleanListromHashData(split[3]), SHA1 = CleanListromHashData(split[3]),
ItemStatus = ItemStatus.BadDump, ItemStatus = ItemStatus.BadDump,
Machine = new Machine Machine = new Machine

View File

@@ -622,7 +622,7 @@ namespace SabreTools.DatFiles.Formats
SpamSum = reader.GetAttribute("spamsum"), SpamSum = reader.GetAttribute("spamsum"),
MergeTag = reader.GetAttribute("merge"), MergeTag = reader.GetAttribute("merge"),
ItemStatus = reader.GetAttribute("status").AsItemStatus(), ItemStatus = reader.GetAttribute("status").AsItemStatus(),
Date = Sanitizer.CleanDate(reader.GetAttribute("date")), Date = CleanDate(reader.GetAttribute("date")),
Inverted = reader.GetAttribute("inverted").AsYesNo(), Inverted = reader.GetAttribute("inverted").AsYesNo(),
Source = new Source Source = new Source

View File

@@ -163,7 +163,7 @@ namespace SabreTools.DatItems
// If we're stripping unicode characters, strip item name // If we're stripping unicode characters, strip item name
if (cleaner?.RemoveUnicode == true) if (cleaner?.RemoveUnicode == true)
Name = Sanitizer.RemoveUnicodeCharacters(Name); Name = RemoveUnicodeCharacters(Name);
// If we are in NTFS trim mode, trim the game name // If we are in NTFS trim mode, trim the game name
if (cleaner?.Trim == true) if (cleaner?.Trim == true)

View File

@@ -116,7 +116,7 @@ namespace SabreTools.DatItems
// If we're stripping unicode characters, strip item name // If we're stripping unicode characters, strip item name
if (cleaner?.RemoveUnicode == true) if (cleaner?.RemoveUnicode == true)
Name = Sanitizer.RemoveUnicodeCharacters(Name); Name = RemoveUnicodeCharacters(Name);
// If we are in NTFS trim mode, trim the game name // If we are in NTFS trim mode, trim the game name
if (cleaner?.Trim == true) if (cleaner?.Trim == true)

View File

@@ -143,7 +143,7 @@ namespace SabreTools.DatItems
// If we're stripping unicode characters, strip item name // If we're stripping unicode characters, strip item name
if (cleaner?.RemoveUnicode == true) if (cleaner?.RemoveUnicode == true)
Name = Sanitizer.RemoveUnicodeCharacters(Name); Name = RemoveUnicodeCharacters(Name);
// If we are in NTFS trim mode, trim the game name // If we are in NTFS trim mode, trim the game name
if (cleaner?.Trim == true) if (cleaner?.Trim == true)

View File

@@ -161,7 +161,7 @@ namespace SabreTools.DatItems
// If we're stripping unicode characters, strip item name // If we're stripping unicode characters, strip item name
if (cleaner?.RemoveUnicode == true) if (cleaner?.RemoveUnicode == true)
Name = Sanitizer.RemoveUnicodeCharacters(Name); Name = RemoveUnicodeCharacters(Name);
// If we are in NTFS trim mode, trim the game name // If we are in NTFS trim mode, trim the game name
if (cleaner?.Trim == true) if (cleaner?.Trim == true)

View File

@@ -228,7 +228,7 @@ namespace SabreTools.DatItems
// If we're stripping unicode characters, strip item name // If we're stripping unicode characters, strip item name
if (cleaner?.RemoveUnicode == true) if (cleaner?.RemoveUnicode == true)
Name = Sanitizer.RemoveUnicodeCharacters(Name); Name = RemoveUnicodeCharacters(Name);
// If we are in NTFS trim mode, trim the game name // If we are in NTFS trim mode, trim the game name
if (cleaner?.Trim == true) if (cleaner?.Trim == true)

View File

@@ -2,6 +2,7 @@
using System.Collections.Generic; using System.Collections.Generic;
using System.IO; using System.IO;
using System.Linq; using System.Linq;
using System.Text.RegularExpressions;
using System.Xml.Serialization; using System.Xml.Serialization;
using SabreTools.Core; using SabreTools.Core;
@@ -726,15 +727,15 @@ namespace SabreTools.DatItems
// If we're stripping unicode characters, strip machine name and description // If we're stripping unicode characters, strip machine name and description
if (cleaner?.RemoveUnicode == true) if (cleaner?.RemoveUnicode == true)
{ {
Machine.Name = Sanitizer.RemoveUnicodeCharacters(Machine.Name); Machine.Name = RemoveUnicodeCharacters(Machine.Name);
Machine.Description = Sanitizer.RemoveUnicodeCharacters(Machine.Description); Machine.Description = RemoveUnicodeCharacters(Machine.Description);
} }
// If we're in cleaning mode, sanitize machine name and description // If we're in cleaning mode, sanitize machine name and description
if (cleaner?.Clean == true) if (cleaner?.Clean == true)
{ {
Machine.Name = Sanitizer.CleanGameName(Machine.Name); Machine.Name = CleanGameName(Machine.Name);
Machine.Description = Sanitizer.CleanGameName(Machine.Description); Machine.Description = CleanGameName(Machine.Description);
} }
// If we are in single game mode, rename the machine // If we are in single game mode, rename the machine
@@ -782,6 +783,277 @@ namespace SabreTools.DatItems
{ {
} }
/// <summary>
/// Clean a game (or rom) name to the WoD standard
/// </summary>
/// <param name="game">Name of the game to be cleaned</param>
/// <returns>The cleaned name</returns>
protected string CleanGameName(string game)
{
///Run the name through the filters to make sure that it's correct
game = NormalizeChars(game);
game = RussianToLatin(game);
game = SearchPattern(game);
game = new Regex(@"(([[(].*[\)\]] )?([^([]+))").Match(game).Groups[1].Value;
game = game.TrimStart().TrimEnd();
return game;
}
/// <summary>
/// Clean a CRC32 string and pad to the correct size
/// </summary>
/// <param name="hash">Hash string to sanitize</param>
/// <returns>Cleaned string</returns>
protected string CleanCRC32(string hash)
{
return CleanHashData(hash, Constants.CRCLength);
}
/// <summary>
/// Clean a MD5 string and pad to the correct size
/// </summary>
/// <param name="hash">Hash string to sanitize</param>
/// <returns>Cleaned string</returns>
protected string CleanMD5(string hash)
{
return CleanHashData(hash, Constants.MD5Length);
}
#if NET_FRAMEWORK
/// <summary>
/// Clean a RIPEMD160 string and pad to the correct size
/// </summary>
/// <param name="hash">Hash string to sanitize</param>
/// <returns>Cleaned string</returns>
protected string CleanRIPEMD160(string hash)
{
return CleanHashData(hash, Constants.RIPEMD160Length);
}
#endif
/// <summary>
/// Clean a SHA1 string and pad to the correct size
/// </summary>
/// <param name="hash">Hash string to sanitize</param>
/// <returns>Cleaned string</returns>
protected string CleanSHA1(string hash)
{
return CleanHashData(hash, Constants.SHA1Length);
}
/// <summary>
/// Clean a SHA256 string and pad to the correct size
/// </summary>
/// <param name="hash">Hash string to sanitize</param>
/// <returns>Cleaned string</returns>
protected string CleanSHA256(string hash)
{
return CleanHashData(hash, Constants.SHA256Length);
}
/// <summary>
/// Clean a SHA384 string and pad to the correct size
/// </summary>
/// <param name="hash">Hash string to sanitize</param>
/// <returns>Cleaned string</returns>
protected string CleanSHA384(string hash)
{
return CleanHashData(hash, Constants.SHA384Length);
}
/// <summary>
/// Clean a SHA512 string and pad to the correct size
/// </summary>
/// <param name="hash">Hash string to sanitize</param>
/// <returns>Cleaned string</returns>
protected string CleanSHA512(string hash)
{
return CleanHashData(hash, Constants.SHA512Length);
}
/// <summary>
/// Remove all unicode-specific chars from a string
/// </summary>
/// <param name="s">Input string to clean</param>
/// <returns>Cleaned string</returns>
protected string RemoveUnicodeCharacters(string s)
{
return new string(s.Where(c => c <= 255).ToArray());
}
/// <summary>
/// Clean a hash string and pad to the correct size
/// </summary>
/// <param name="hash">Hash string to sanitize</param>
/// <param name="padding">Amount of characters to pad to</param>
/// <returns>Cleaned string</returns>
private string CleanHashData(string hash, int padding)
{
// If we have a known blank hash, return blank
if (string.IsNullOrWhiteSpace(hash) || hash == "-" || hash == "_")
return string.Empty;
// Check to see if it's a "hex" hash
hash = hash.Trim().Replace("0x", string.Empty);
// If we have a blank hash now, return blank
if (string.IsNullOrWhiteSpace(hash))
return string.Empty;
// If the hash shorter than the required length, pad it
if (hash.Length < padding)
hash = hash.PadLeft(padding, '0');
// If the hash is longer than the required length, it's invalid
else if (hash.Length > padding)
return string.Empty;
// Now normalize the hash
hash = hash.ToLowerInvariant();
// Otherwise, make sure that every character is a proper match
for (int i = 0; i < hash.Length; i++)
{
if ((hash[i] < '0' || hash[i] > '9') && (hash[i] < 'a' || hash[i] > 'f'))
{
hash = string.Empty;
break;
}
}
return hash;
}
/// <summary>
/// Replace accented characters
/// </summary>
/// <param name="input">String to be parsed</param>
/// <returns>String with characters replaced</returns>
private string NormalizeChars(string input)
{
string[,] charmap = {
{ "Á", "A" }, { "á", "a" },
{ "À", "A" }, { "à", "a" },
{ "Â", "A" }, { "â", "a" },
{ "Ä", "Ae" }, { "ä", "ae" },
{ "Ã", "A" }, { "ã", "a" },
{ "Å", "A" }, { "å", "a" },
{ "Æ", "Ae" }, { "æ", "ae" },
{ "Ç", "C" }, { "ç", "c" },
{ "Ð", "D" }, { "ð", "d" },
{ "É", "E" }, { "é", "e" },
{ "È", "E" }, { "è", "e" },
{ "Ê", "E" }, { "ê", "e" },
{ "Ë", "E" }, { "ë", "e" },
{ "ƒ", "f" },
{ "Í", "I" }, { "í", "i" },
{ "Ì", "I" }, { "ì", "i" },
{ "Î", "I" }, { "î", "i" },
{ "Ï", "I" }, { "ï", "i" },
{ "Ñ", "N" }, { "ñ", "n" },
{ "Ó", "O" }, { "ó", "o" },
{ "Ò", "O" }, { "ò", "o" },
{ "Ô", "O" }, { "ô", "o" },
{ "Ö", "Oe" }, { "ö", "oe" },
{ "Õ", "O" }, { "õ", "o" },
{ "Ø", "O" }, { "ø", "o" },
{ "Š", "S" }, { "š", "s" },
{ "ß", "ss" },
{ "Þ", "B" }, { "þ", "b" },
{ "Ú", "U" }, { "ú", "u" },
{ "Ù", "U" }, { "ù", "u" },
{ "Û", "U" }, { "û", "u" },
{ "Ü", "Ue" }, { "ü", "ue" },
{ "ÿ", "y" },
{ "Ý", "Y" }, { "ý", "y" },
{ "Ž", "Z" }, { "ž", "z" },
};
for (int i = 0; i < charmap.GetLength(0); i++)
{
input = input.Replace(charmap[i, 0], charmap[i, 1]);
}
return input;
}
/// <summary>
/// Convert Cyrillic lettering to Latin lettering
/// </summary>
/// <param name="input">String to be parsed</param>
/// <returns>String with characters replaced</returns>
private string RussianToLatin(string input)
{
string[,] charmap = {
{ "А", "A" }, { "Б", "B" }, { "В", "V" }, { "Г", "G" }, { "Д", "D" },
{ "Е", "E" }, { "Ё", "Yo" }, { "Ж", "Zh" }, { "З", "Z" }, { "И", "I" },
{ "Й", "J" }, { "К", "K" }, { "Л", "L" }, { "М", "M" }, { "Н", "N" },
{ "О", "O" }, { "П", "P" }, { "Р", "R" }, { "С", "S" }, { "Т", "T" },
{ "У", "U" }, { "Ф", "f" }, { "Х", "Kh" }, { "Ц", "Ts" }, { "Ч", "Ch" },
{ "Ш", "Sh" }, { "Щ", "Sch" }, { "Ъ", string.Empty }, { "Ы", "y" }, { "Ь", string.Empty },
{ "Э", "e" }, { "Ю", "yu" }, { "Я", "ya" }, { "а", "a" }, { "б", "b" },
{ "в", "v" }, { "г", "g" }, { "д", "d" }, { "е", "e" }, { "ё", "yo" },
{ "ж", "zh" }, { "з", "z" }, { "и", "i" }, { "й", "j" }, { "к", "k" },
{ "л", "l" }, { "м", "m" }, { "н", "n" }, { "о", "o" }, { "п", "p" },
{ "р", "r" }, { "с", "s" }, { "т", "t" }, { "у", "u" }, { "ф", "f" },
{ "х", "kh" }, { "ц", "ts" }, { "ч", "ch" }, { "ш", "sh" }, { "щ", "sch" },
{ "ъ", string.Empty }, { "ы", "y" }, { "ь", string.Empty }, { "э", "e" }, { "ю", "yu" },
{ "я", "ya" },
};
for (int i = 0; i < charmap.GetLength(0); i++)
{
input = input.Replace(charmap[i, 0], charmap[i, 1]);
}
return input;
}
/// <summary>
/// Replace special characters and patterns
/// </summary>
/// <param name="input">String to be parsed</param>
/// <returns>String with characters replaced</returns>
private string SearchPattern(string input)
{
string[,] charmap = {
{ @"~", " - " },
{ @"_", " " },
{ @":", " " },
{ @">", ")" },
{ @"<", "(" },
{ @"\|", "-" },
{ "\"", "'" },
{ @"\*", "." },
{ @"\\", "-" },
{ @"/", "-" },
{ @"\?", " " },
{ @"\(([^)(]*)\(([^)]*)\)([^)(]*)\)", " " },
{ @"\(([^)]+)\)", " " },
{ @"\[([^]]+)\]", " " },
{ @"\{([^}]+)\}", " " },
{ @"(ZZZJUNK|ZZZ-UNK-|ZZZ-UNK |zzz unknow |zzz unk |Copy of |[.][a-z]{3}[.][a-z]{3}[.]|[.][a-z]{3}[.])", " " },
{ @" (r|rev|v|ver)\s*[\d\.]+[^\s]*", " " },
{ @"(( )|(\A))(\d{6}|\d{8})(( )|(\Z))", " " },
{ @"(( )|(\A))(\d{1,2})-(\d{1,2})-(\d{4}|\d{2})", " " },
{ @"(( )|(\A))(\d{4}|\d{2})-(\d{1,2})-(\d{1,2})", " " },
{ @"[-]+", "-" },
{ @"\A\s*\)", " " },
{ @"\A\s*(,|-)", " " },
{ @"\s+", " " },
{ @"\s+,", "," },
{ @"\s*(,|-)\s*\Z", " " },
};
for (int i = 0; i < charmap.GetLength(0); i++)
{
input = Regex.Replace(input, charmap[i, 0], charmap[i, 1]);
}
return input;
}
#endregion #endregion
#region Sorting and Merging #region Sorting and Merging

View File

@@ -163,7 +163,7 @@ namespace SabreTools.DatItems
// If we're stripping unicode characters, strip item name // If we're stripping unicode characters, strip item name
if (cleaner?.RemoveUnicode == true) if (cleaner?.RemoveUnicode == true)
Name = Sanitizer.RemoveUnicodeCharacters(Name); Name = RemoveUnicodeCharacters(Name);
// If we are in NTFS trim mode, trim the game name // If we are in NTFS trim mode, trim the game name
if (cleaner?.Trim == true) if (cleaner?.Trim == true)

View File

@@ -116,7 +116,7 @@ namespace SabreTools.DatItems
// If we're stripping unicode characters, strip item name // If we're stripping unicode characters, strip item name
if (cleaner?.RemoveUnicode == true) if (cleaner?.RemoveUnicode == true)
Name = Sanitizer.RemoveUnicodeCharacters(Name); Name = RemoveUnicodeCharacters(Name);
// If we are in NTFS trim mode, trim the game name // If we are in NTFS trim mode, trim the game name
if (cleaner?.Trim == true) if (cleaner?.Trim == true)

View File

@@ -275,7 +275,7 @@ namespace SabreTools.DatItems
// If we're stripping unicode characters, strip item name // If we're stripping unicode characters, strip item name
if (cleaner?.RemoveUnicode == true) if (cleaner?.RemoveUnicode == true)
Name = Sanitizer.RemoveUnicodeCharacters(Name); Name = RemoveUnicodeCharacters(Name);
// If we are in NTFS trim mode, trim the game name // If we are in NTFS trim mode, trim the game name
if (cleaner?.Trim == true) if (cleaner?.Trim == true)

View File

@@ -44,7 +44,7 @@ namespace SabreTools.DatItems
public string MD5 public string MD5
{ {
get { return _md5.IsNullOrEmpty() ? null : Utilities.ByteArrayToString(_md5); } get { return _md5.IsNullOrEmpty() ? null : Utilities.ByteArrayToString(_md5); }
set { _md5 = Utilities.StringToByteArray(Sanitizer.CleanMD5(value)); } set { _md5 = Utilities.StringToByteArray(CleanMD5(value)); }
} }
/// <summary> /// <summary>
@@ -55,7 +55,7 @@ namespace SabreTools.DatItems
public string SHA1 public string SHA1
{ {
get { return _sha1.IsNullOrEmpty() ? null : Utilities.ByteArrayToString(_sha1); } get { return _sha1.IsNullOrEmpty() ? null : Utilities.ByteArrayToString(_sha1); }
set { _sha1 = Utilities.StringToByteArray(Sanitizer.CleanSHA1(value)); } set { _sha1 = Utilities.StringToByteArray(CleanSHA1(value)); }
} }
/// <summary> /// <summary>
@@ -434,7 +434,7 @@ namespace SabreTools.DatItems
// If we're stripping unicode characters, strip item name // If we're stripping unicode characters, strip item name
if (cleaner?.RemoveUnicode == true) if (cleaner?.RemoveUnicode == true)
Name = Sanitizer.RemoveUnicodeCharacters(Name); Name = RemoveUnicodeCharacters(Name);
// If we are in NTFS trim mode, trim the game name // If we are in NTFS trim mode, trim the game name
if (cleaner?.Trim == true) if (cleaner?.Trim == true)

View File

@@ -118,7 +118,7 @@ namespace SabreTools.DatItems
// If we're stripping unicode characters, strip item name // If we're stripping unicode characters, strip item name
if (cleaner?.RemoveUnicode == true) if (cleaner?.RemoveUnicode == true)
Name = Sanitizer.RemoveUnicodeCharacters(Name); Name = RemoveUnicodeCharacters(Name);
// If we are in NTFS trim mode, trim the game name // If we are in NTFS trim mode, trim the game name
if (cleaner?.Trim == true) if (cleaner?.Trim == true)

View File

@@ -116,7 +116,7 @@ namespace SabreTools.DatItems
// If we're stripping unicode characters, strip item name // If we're stripping unicode characters, strip item name
if (cleaner?.RemoveUnicode == true) if (cleaner?.RemoveUnicode == true)
Name = Sanitizer.RemoveUnicodeCharacters(Name); Name = RemoveUnicodeCharacters(Name);
// If we are in NTFS trim mode, trim the game name // If we are in NTFS trim mode, trim the game name
if (cleaner?.Trim == true) if (cleaner?.Trim == true)

View File

@@ -127,7 +127,7 @@ namespace SabreTools.DatItems
// If we're stripping unicode characters, strip item name // If we're stripping unicode characters, strip item name
if (cleaner?.RemoveUnicode == true) if (cleaner?.RemoveUnicode == true)
Name = Sanitizer.RemoveUnicodeCharacters(Name); Name = RemoveUnicodeCharacters(Name);
// If we are in NTFS trim mode, trim the game name // If we are in NTFS trim mode, trim the game name
if (cleaner?.Trim == true) if (cleaner?.Trim == true)

View File

@@ -127,7 +127,7 @@ namespace SabreTools.DatItems
// If we're stripping unicode characters, strip item name // If we're stripping unicode characters, strip item name
if (cleaner?.RemoveUnicode == true) if (cleaner?.RemoveUnicode == true)
Name = Sanitizer.RemoveUnicodeCharacters(Name); Name = RemoveUnicodeCharacters(Name);
// If we are in NTFS trim mode, trim the game name // If we are in NTFS trim mode, trim the game name
if (cleaner?.Trim == true) if (cleaner?.Trim == true)

View File

@@ -146,7 +146,7 @@ namespace SabreTools.DatItems
// If we're stripping unicode characters, strip item name // If we're stripping unicode characters, strip item name
if (cleaner?.RemoveUnicode == true) if (cleaner?.RemoveUnicode == true)
Name = Sanitizer.RemoveUnicodeCharacters(Name); Name = RemoveUnicodeCharacters(Name);
// If we are in NTFS trim mode, trim the game name // If we are in NTFS trim mode, trim the game name
if (cleaner?.Trim == true) if (cleaner?.Trim == true)

View File

@@ -44,7 +44,7 @@ namespace SabreTools.DatItems
public string MD5 public string MD5
{ {
get { return _md5.IsNullOrEmpty() ? null : Utilities.ByteArrayToString(_md5); } get { return _md5.IsNullOrEmpty() ? null : Utilities.ByteArrayToString(_md5); }
set { _md5 = Utilities.StringToByteArray(Sanitizer.CleanMD5(value)); } set { _md5 = Utilities.StringToByteArray(CleanMD5(value)); }
} }
/// <summary> /// <summary>
@@ -55,7 +55,7 @@ namespace SabreTools.DatItems
public string SHA1 public string SHA1
{ {
get { return _sha1.IsNullOrEmpty() ? null : Utilities.ByteArrayToString(_sha1); } get { return _sha1.IsNullOrEmpty() ? null : Utilities.ByteArrayToString(_sha1); }
set { _sha1 = Utilities.StringToByteArray(Sanitizer.CleanSHA1(value)); } set { _sha1 = Utilities.StringToByteArray(CleanSHA1(value)); }
} }
/// <summary> /// <summary>
@@ -66,7 +66,7 @@ namespace SabreTools.DatItems
public string SHA256 public string SHA256
{ {
get { return _sha256.IsNullOrEmpty() ? null : Utilities.ByteArrayToString(_sha256); } get { return _sha256.IsNullOrEmpty() ? null : Utilities.ByteArrayToString(_sha256); }
set { _sha256 = Utilities.StringToByteArray(Sanitizer.CleanSHA256(value)); } set { _sha256 = Utilities.StringToByteArray(CleanSHA256(value)); }
} }
/// <summary> /// <summary>
@@ -333,7 +333,7 @@ namespace SabreTools.DatItems
// If we're stripping unicode characters, strip item name // If we're stripping unicode characters, strip item name
if (cleaner?.RemoveUnicode == true) if (cleaner?.RemoveUnicode == true)
Name = Sanitizer.RemoveUnicodeCharacters(Name); Name = RemoveUnicodeCharacters(Name);
// If we are in NTFS trim mode, trim the game name // If we are in NTFS trim mode, trim the game name
if (cleaner?.Trim == true) if (cleaner?.Trim == true)

View File

@@ -154,7 +154,7 @@ namespace SabreTools.DatItems
// If we're stripping unicode characters, strip item name // If we're stripping unicode characters, strip item name
if (cleaner?.RemoveUnicode == true) if (cleaner?.RemoveUnicode == true)
Name = Sanitizer.RemoveUnicodeCharacters(Name); Name = RemoveUnicodeCharacters(Name);
// If we are in NTFS trim mode, trim the game name // If we are in NTFS trim mode, trim the game name
if (cleaner?.Trim == true) if (cleaner?.Trim == true)

View File

@@ -127,7 +127,7 @@ namespace SabreTools.DatItems
// If we're stripping unicode characters, strip item name // If we're stripping unicode characters, strip item name
if (cleaner?.RemoveUnicode == true) if (cleaner?.RemoveUnicode == true)
Name = Sanitizer.RemoveUnicodeCharacters(Name); Name = RemoveUnicodeCharacters(Name);
// If we are in NTFS trim mode, trim the game name // If we are in NTFS trim mode, trim the game name
if (cleaner?.Trim == true) if (cleaner?.Trim == true)

View File

@@ -141,7 +141,7 @@ namespace SabreTools.DatItems
// If we're stripping unicode characters, strip item name // If we're stripping unicode characters, strip item name
if (cleaner?.RemoveUnicode == true) if (cleaner?.RemoveUnicode == true)
Name = Sanitizer.RemoveUnicodeCharacters(Name); Name = RemoveUnicodeCharacters(Name);
// If we are in NTFS trim mode, trim the game name // If we are in NTFS trim mode, trim the game name
if (cleaner?.Trim == true) if (cleaner?.Trim == true)

View File

@@ -171,7 +171,7 @@ namespace SabreTools.DatItems
// If we're stripping unicode characters, strip item name // If we're stripping unicode characters, strip item name
if (cleaner?.RemoveUnicode == true) if (cleaner?.RemoveUnicode == true)
Name = Sanitizer.RemoveUnicodeCharacters(Name); Name = RemoveUnicodeCharacters(Name);
// If we are in NTFS trim mode, trim the game name // If we are in NTFS trim mode, trim the game name
if (cleaner?.Trim == true) if (cleaner?.Trim == true)

View File

@@ -71,7 +71,7 @@ namespace SabreTools.DatItems
public string CRC public string CRC
{ {
get { return _crc.IsNullOrEmpty() ? null : Utilities.ByteArrayToString(_crc); } get { return _crc.IsNullOrEmpty() ? null : Utilities.ByteArrayToString(_crc); }
set { _crc = (value == "null" ? Constants.CRCZeroBytes : Utilities.StringToByteArray(Sanitizer.CleanCRC32(value))); } set { _crc = (value == "null" ? Constants.CRCZeroBytes : Utilities.StringToByteArray(CleanCRC32(value))); }
} }
/// <summary> /// <summary>
@@ -82,7 +82,7 @@ namespace SabreTools.DatItems
public string MD5 public string MD5
{ {
get { return _md5.IsNullOrEmpty() ? null : Utilities.ByteArrayToString(_md5); } get { return _md5.IsNullOrEmpty() ? null : Utilities.ByteArrayToString(_md5); }
set { _md5 = Utilities.StringToByteArray(Sanitizer.CleanMD5(value)); } set { _md5 = Utilities.StringToByteArray(CleanMD5(value)); }
} }
#if NET_FRAMEWORK #if NET_FRAMEWORK
@@ -94,7 +94,7 @@ namespace SabreTools.DatItems
public string RIPEMD160 public string RIPEMD160
{ {
get { return _ripemd160.IsNullOrEmpty() ? null : Utilities.ByteArrayToString(_ripemd160); } get { return _ripemd160.IsNullOrEmpty() ? null : Utilities.ByteArrayToString(_ripemd160); }
set { _ripemd160 = Utilities.StringToByteArray(Sanitizer.CleanRIPEMD160(value)); } set { _ripemd160 = Utilities.StringToByteArray(CleanRIPEMD160(value)); }
} }
#endif #endif
@@ -106,7 +106,7 @@ namespace SabreTools.DatItems
public string SHA1 public string SHA1
{ {
get { return _sha1.IsNullOrEmpty() ? null : Utilities.ByteArrayToString(_sha1); } get { return _sha1.IsNullOrEmpty() ? null : Utilities.ByteArrayToString(_sha1); }
set { _sha1 = Utilities.StringToByteArray(Sanitizer.CleanSHA1(value)); } set { _sha1 = Utilities.StringToByteArray(CleanSHA1(value)); }
} }
/// <summary> /// <summary>
@@ -117,7 +117,7 @@ namespace SabreTools.DatItems
public string SHA256 public string SHA256
{ {
get { return _sha256.IsNullOrEmpty() ? null : Utilities.ByteArrayToString(_sha256); } get { return _sha256.IsNullOrEmpty() ? null : Utilities.ByteArrayToString(_sha256); }
set { _sha256 = Utilities.StringToByteArray(Sanitizer.CleanSHA256(value)); } set { _sha256 = Utilities.StringToByteArray(CleanSHA256(value)); }
} }
/// <summary> /// <summary>
@@ -128,7 +128,7 @@ namespace SabreTools.DatItems
public string SHA384 public string SHA384
{ {
get { return _sha384.IsNullOrEmpty() ? null : Utilities.ByteArrayToString(_sha384); } get { return _sha384.IsNullOrEmpty() ? null : Utilities.ByteArrayToString(_sha384); }
set { _sha384 = Utilities.StringToByteArray(Sanitizer.CleanSHA384(value)); } set { _sha384 = Utilities.StringToByteArray(CleanSHA384(value)); }
} }
/// <summary> /// <summary>
@@ -139,7 +139,7 @@ namespace SabreTools.DatItems
public string SHA512 public string SHA512
{ {
get { return _sha512.IsNullOrEmpty() ? null : Utilities.ByteArrayToString(_sha512); } get { return _sha512.IsNullOrEmpty() ? null : Utilities.ByteArrayToString(_sha512); }
set { _sha512 = Utilities.StringToByteArray(Sanitizer.CleanSHA512(value)); } set { _sha512 = Utilities.StringToByteArray(CleanSHA512(value)); }
} }
/// <summary> /// <summary>
@@ -807,7 +807,7 @@ namespace SabreTools.DatItems
// If we're stripping unicode characters, strip item name // If we're stripping unicode characters, strip item name
if (cleaner?.RemoveUnicode == true) if (cleaner?.RemoveUnicode == true)
Name = Sanitizer.RemoveUnicodeCharacters(Name); Name = RemoveUnicodeCharacters(Name);
// If we are in NTFS trim mode, trim the game name // If we are in NTFS trim mode, trim the game name
if (cleaner?.Trim == true) if (cleaner?.Trim == true)

View File

@@ -116,7 +116,7 @@ namespace SabreTools.DatItems
// If we're stripping unicode characters, strip item name // If we're stripping unicode characters, strip item name
if (cleaner?.RemoveUnicode == true) if (cleaner?.RemoveUnicode == true)
Name = Sanitizer.RemoveUnicodeCharacters(Name); Name = RemoveUnicodeCharacters(Name);
// If we are in NTFS trim mode, trim the game name // If we are in NTFS trim mode, trim the game name
if (cleaner?.Trim == true) if (cleaner?.Trim == true)

View File

@@ -175,7 +175,7 @@ namespace SabreTools.DatItems
// If we're stripping unicode characters, strip item name // If we're stripping unicode characters, strip item name
if (cleaner?.RemoveUnicode == true) if (cleaner?.RemoveUnicode == true)
Name = Sanitizer.RemoveUnicodeCharacters(Name); Name = RemoveUnicodeCharacters(Name);
// If we are in NTFS trim mode, trim the game name // If we are in NTFS trim mode, trim the game name
if (cleaner?.Trim == true) if (cleaner?.Trim == true)

View File

@@ -127,7 +127,7 @@ namespace SabreTools.DatItems
// If we're stripping unicode characters, strip item name // If we're stripping unicode characters, strip item name
if (cleaner?.RemoveUnicode == true) if (cleaner?.RemoveUnicode == true)
Name = Sanitizer.RemoveUnicodeCharacters(Name); Name = RemoveUnicodeCharacters(Name);
// If we are in NTFS trim mode, trim the game name // If we are in NTFS trim mode, trim the game name
if (cleaner?.Trim == true) if (cleaner?.Trim == true)

View File

@@ -148,7 +148,7 @@ namespace SabreTools.DatItems
// If we're stripping unicode characters, strip item name // If we're stripping unicode characters, strip item name
if (cleaner?.RemoveUnicode == true) if (cleaner?.RemoveUnicode == true)
Name = Sanitizer.RemoveUnicodeCharacters(Name); Name = RemoveUnicodeCharacters(Name);
// If we are in NTFS trim mode, trim the game name // If we are in NTFS trim mode, trim the game name
if (cleaner?.Trim == true) if (cleaner?.Trim == true)

View File

@@ -143,7 +143,7 @@ namespace SabreTools.DatItems
// If we're stripping unicode characters, strip item name // If we're stripping unicode characters, strip item name
if (cleaner?.RemoveUnicode == true) if (cleaner?.RemoveUnicode == true)
Name = Sanitizer.RemoveUnicodeCharacters(Name); Name = RemoveUnicodeCharacters(Name);
// If we are in NTFS trim mode, trim the game name // If we are in NTFS trim mode, trim the game name
if (cleaner?.Trim == true) if (cleaner?.Trim == true)

View File

@@ -145,7 +145,7 @@ namespace SabreTools.DatItems
// If we're stripping unicode characters, strip item name // If we're stripping unicode characters, strip item name
if (cleaner?.RemoveUnicode == true) if (cleaner?.RemoveUnicode == true)
Name = Sanitizer.RemoveUnicodeCharacters(Name); Name = RemoveUnicodeCharacters(Name);
// If we are in NTFS trim mode, trim the game name // If we are in NTFS trim mode, trim the game name
if (cleaner?.Trim == true) if (cleaner?.Trim == true)