using System; using System.Collections.Generic; using System.IO; using System.Linq; using System.Text; using System.Text.RegularExpressions; namespace SabreTools.Helper { /// /// Include character normalization and replacement mappings /// public class Style { #region WoD-based String Cleaning /// /// Replace accented characters /// /// String to be parsed /// String with characters replaced public static string NormalizeChars(string input) { string[,] charmap = { { "Á", "A" }, { "á", "a" }, { "À", "A" }, { "à", "a" }, { "Â", "A" }, { "â", "a" }, { "Ä", "Ae" }, { "ä", "ae" }, { "Ã", "A" }, { "ã", "a" }, { "Å", "A" }, { "å", "a" }, { "Æ", "Ae" }, { "æ", "ae" }, { "Ç", "C" }, { "ç", "c" }, { "Ð", "D" }, { "ð", "d" }, { "É", "E" }, { "é", "e" }, { "È", "E" }, { "è", "e" }, { "Ê", "E" }, { "ê", "e" }, { "Ë", "E" }, { "ë", "e" }, { "ƒ", "f" }, { "Í", "I" }, { "í", "i" }, { "Ì", "I" }, { "ì", "i" }, { "Î", "I" }, { "î", "i" }, { "Ï", "I" }, { "ï", "i" }, { "Ñ", "N" }, { "ñ", "n" }, { "Ó", "O" }, { "ó", "o" }, { "Ò", "O" }, { "ò", "o" }, { "Ô", "O" }, { "ô", "o" }, { "Ö", "Oe" }, { "ö", "oe" }, { "Õ", "O" }, { "õ", "o" }, { "Ø", "O" }, { "ø", "o" }, { "Š", "S" }, { "š", "s" }, { "ß", "ss" }, { "Þ", "B" }, { "þ", "b" }, { "Ú", "U" }, { "ú", "u" }, { "Ù", "U" }, { "ù", "u" }, { "Û", "U" }, { "û", "u" }, { "Ü", "Ue" }, { "ü", "ue" }, { "ÿ", "y" }, { "Ý", "Y" }, { "ý", "y" }, { "Ž", "Z" }, { "ž", "z" }, }; for (int i = 0; i < charmap.GetLength(0); i++) { input = input.Replace(charmap[i, 0], charmap[i, 1]); } return input; } /// /// Replace special characters and patterns /// /// String to be parsed /// String with characters replaced public static string SearchPattern(string input) { string[,] charmap = { { @"~", " - " }, { @"_", " " }, { @":", " " }, { @">", ")" }, { @"<", "(" }, { @"\|", "-" }, { "\"", "'" }, { @"\*", "." }, { @"\\", "-" }, { @"/", "-" }, { @"\?", " " }, { @"\(([^)(]*)\(([^)]*)\)([^)(]*)\)", " " }, { @"\(([^)]+)\)", " " }, { @"\[([^]]+)\]", " " }, { @"\{([^}]+)\}", " " }, { @"(ZZZJUNK|ZZZ-UNK-|ZZZ-UNK |zzz unknow |zzz unk |Copy of |[.][a-z]{3}[.][a-z]{3}[.]|[.][a-z]{3}[.])", " " }, { @" (r|rev|v|ver)\s*[\d\.]+[^\s]*", " " }, { @"(( )|(\A))(\d{6}|\d{8})(( )|(\Z))", " " }, { @"(( )|(\A))(\d{1,2})-(\d{1,2})-(\d{4}|\d{2})", " " }, { @"(( )|(\A))(\d{4}|\d{2})-(\d{1,2})-(\d{1,2})", " " }, { @"[-]+", "-" }, { @"\A\s*\)", " " }, { @"\A\s*(,|-)", " " }, { @"\s+", " " }, { @"\s+,", "," }, { @"\s*(,|-)\s*\Z", " " }, }; for (int i = 0; i < charmap.GetLength(0); i++) { input = Regex.Replace(input, charmap[i, 0], charmap[i, 1]); } return input; } /// /// Convert Cyrillic lettering to Latin lettering /// /// String to be parsed /// String with characters replaced public static string RussianToLatin(string input) { string[,] charmap = { { "А", "A" }, { "Б", "B" }, { "В", "V" }, { "Г", "G" }, { "Д", "D" }, { "Е", "E" }, { "Ё", "Yo" }, { "Ж", "Zh" }, { "З", "Z" }, { "И", "I" }, { "Й", "J" }, { "К", "K" }, { "Л", "L" }, { "М", "M" }, { "Н", "N" }, { "О", "O" }, { "П", "P" }, { "Р", "R" }, { "С", "S" }, { "Т", "T" }, { "У", "U" }, { "Ф", "f" }, { "Х", "Kh" }, { "Ц", "Ts" }, { "Ч", "Ch" }, { "Ш", "Sh" }, { "Щ", "Sch" }, { "Ъ", "" }, { "Ы", "y" }, { "Ь", "" }, { "Э", "e" }, { "Ю", "yu" }, { "Я", "ya" }, { "а", "a" }, { "б", "b" }, { "в", "v" }, { "г", "g" }, { "д", "d" }, { "е", "e" }, { "ё", "yo" }, { "ж", "zh" }, { "з", "z" }, { "и", "i" }, { "й", "j" }, { "к", "k" }, { "л", "l" }, { "м", "m" }, { "н", "n" }, { "о", "o" }, { "п", "p" }, { "р", "r" }, { "с", "s" }, { "т", "t" }, { "у", "u" }, { "ф", "f" }, { "х", "kh" }, { "ц", "ts" }, { "ч", "ch" }, { "ш", "sh" }, { "щ", "sch" }, { "ъ", "" }, { "ы", "y" }, { "ь", "" }, { "э", "e" }, { "ю", "yu" }, { "я", "ya" }, }; for (int i = 0; i < charmap.GetLength(0); i++) { input = input.Replace(charmap[i, 0], charmap[i, 1]); } return input; } #endregion #region DAT Cleaning /// /// Generate a proper outfile name based on a DAT and output directory /// /// Output directory /// DAT information /// True if we ignore existing files (default), false otherwise /// Dictionary of output formats mapped to file names public static Dictionary CreateOutfileNames(string outDir, DatFile datdata, bool overwrite = true) { // Create the output dictionary Dictionary outfileNames = new Dictionary(); // Double check the outDir for the end delim if (!outDir.EndsWith(Path.DirectorySeparatorChar.ToString())) { outDir += Path.DirectorySeparatorChar; } // Get the extensions from the output type // ClrMamePro if ((datdata.OutputFormat & OutputFormat.ClrMamePro) != 0) { outfileNames.Add(OutputFormat.ClrMamePro, CreateOutfileNamesHelper(outDir, ".dat", datdata, overwrite)); }; // CSV if ((datdata.OutputFormat & OutputFormat.CSV) != 0) { outfileNames.Add(OutputFormat.CSV, CreateOutfileNamesHelper(outDir, ".csv", datdata, overwrite)); }; // DOSCenter if ((datdata.OutputFormat & OutputFormat.DOSCenter) != 0 && (datdata.OutputFormat & OutputFormat.ClrMamePro) == 0 && (datdata.OutputFormat & OutputFormat.RomCenter) == 0) { outfileNames.Add(OutputFormat.DOSCenter, CreateOutfileNamesHelper(outDir, ".dat", datdata, overwrite)); }; if ((datdata.OutputFormat & OutputFormat.DOSCenter) != 0 && ((datdata.OutputFormat & OutputFormat.ClrMamePro) != 0 || (datdata.OutputFormat & OutputFormat.RomCenter) != 0)) { outfileNames.Add(OutputFormat.DOSCenter, CreateOutfileNamesHelper(outDir, ".dc.dat", datdata, overwrite)); }; // Logiqx XML if ((datdata.OutputFormat & OutputFormat.Logiqx) != 0) { outfileNames.Add(OutputFormat.Logiqx, CreateOutfileNamesHelper(outDir, ".xml", datdata, overwrite)); }; // Missfile if ((datdata.OutputFormat & OutputFormat.MissFile) != 0) { outfileNames.Add(OutputFormat.MissFile, CreateOutfileNamesHelper(outDir, ".txt", datdata, overwrite)); }; // OfflineList if (((datdata.OutputFormat & OutputFormat.OfflineList) != 0) && (datdata.OutputFormat & OutputFormat.Logiqx) == 0 && (datdata.OutputFormat & OutputFormat.SabreDat) == 0 && (datdata.OutputFormat & OutputFormat.SoftwareList) == 0) { outfileNames.Add(OutputFormat.OfflineList, CreateOutfileNamesHelper(outDir, ".xml", datdata, overwrite)); } if (((datdata.OutputFormat & OutputFormat.OfflineList) != 0 && ((datdata.OutputFormat & OutputFormat.Logiqx) != 0 || (datdata.OutputFormat & OutputFormat.SabreDat) != 0 || (datdata.OutputFormat & OutputFormat.SoftwareList) != 0))) { outfileNames.Add(OutputFormat.OfflineList, CreateOutfileNamesHelper(outDir, ".ol.xml", datdata, overwrite)); } // Redump MD5 if ((datdata.OutputFormat & OutputFormat.RedumpMD5) != 0) { outfileNames.Add(OutputFormat.RedumpMD5, CreateOutfileNamesHelper(outDir, ".md5", datdata, overwrite)); }; // Redump SFV if ((datdata.OutputFormat & OutputFormat.RedumpSFV) != 0) { outfileNames.Add(OutputFormat.RedumpSFV, CreateOutfileNamesHelper(outDir, ".sfv", datdata, overwrite)); }; // Redump SHA-1 if ((datdata.OutputFormat & OutputFormat.RedumpSHA1) != 0) { outfileNames.Add(OutputFormat.RedumpSHA1, CreateOutfileNamesHelper(outDir, ".sha1", datdata, overwrite)); }; // RomCenter if ((datdata.OutputFormat & OutputFormat.RomCenter) != 0 && (datdata.OutputFormat & OutputFormat.ClrMamePro) == 0) { outfileNames.Add(OutputFormat.RomCenter, CreateOutfileNamesHelper(outDir, ".dat", datdata, overwrite)); }; if ((datdata.OutputFormat & OutputFormat.RomCenter) != 0 && (datdata.OutputFormat & OutputFormat.ClrMamePro) != 0) { outfileNames.Add(OutputFormat.RomCenter, CreateOutfileNamesHelper(outDir, ".rc.dat", datdata, overwrite)); }; // SabreDAT if ((datdata.OutputFormat & OutputFormat.SabreDat) != 0 && (datdata.OutputFormat & OutputFormat.Logiqx) == 0) { outfileNames.Add(OutputFormat.SabreDat, CreateOutfileNamesHelper(outDir, ".xml", datdata, overwrite)); }; if ((datdata.OutputFormat & OutputFormat.SabreDat) != 0 && (datdata.OutputFormat & OutputFormat.Logiqx) != 0) { outfileNames.Add(OutputFormat.SabreDat, CreateOutfileNamesHelper(outDir, ".sd.xml", datdata, overwrite)); }; // Software List if ((datdata.OutputFormat & OutputFormat.SoftwareList) != 0 && (datdata.OutputFormat & OutputFormat.Logiqx) == 0 && (datdata.OutputFormat & OutputFormat.SabreDat) == 0) { outfileNames.Add(OutputFormat.SoftwareList, CreateOutfileNamesHelper(outDir, ".xml", datdata, overwrite)); } if ((datdata.OutputFormat & OutputFormat.SoftwareList) != 0 && ((datdata.OutputFormat & OutputFormat.Logiqx) != 0 || (datdata.OutputFormat & OutputFormat.SabreDat) != 0)) { outfileNames.Add(OutputFormat.SoftwareList, CreateOutfileNamesHelper(outDir, ".sl.xml", datdata, overwrite)); } // TSV if ((datdata.OutputFormat & OutputFormat.TSV) != 0) { outfileNames.Add(OutputFormat.TSV, CreateOutfileNamesHelper(outDir, ".tsv", datdata, overwrite)); }; return outfileNames; } /// /// Help generating the outfile name /// /// Output directory /// Extension to use for the file /// DAT information /// True if we ignore existing files, false otherwise /// String containing the new filename private static string CreateOutfileNamesHelper(string outDir, string extension, DatFile datdata, bool overwrite) { string filename = (String.IsNullOrEmpty(datdata.FileName) ? datdata.Description : datdata.FileName); string outfile = outDir + filename + extension; outfile = (outfile.Contains(Path.DirectorySeparatorChar.ToString() + Path.DirectorySeparatorChar.ToString()) ? outfile.Replace(Path.DirectorySeparatorChar.ToString() + Path.DirectorySeparatorChar.ToString(), Path.DirectorySeparatorChar.ToString()) : outfile); if (!overwrite) { int i = 1; while (File.Exists(outfile)) { outfile = outDir + filename + "_" + i + extension; outfile = (outfile.Contains(Path.DirectorySeparatorChar.ToString() + Path.DirectorySeparatorChar.ToString()) ? outfile.Replace(Path.DirectorySeparatorChar.ToString() + Path.DirectorySeparatorChar.ToString(), Path.DirectorySeparatorChar.ToString()) : outfile); i++; } } return outfile; } /// /// Clean a game (or rom) name to the WoD standard /// /// Name of the game to be cleaned /// The cleaned name public static string CleanGameName(string game) { ///Run the name through the filters to make sure that it's correct game = Style.NormalizeChars(game); game = Style.RussianToLatin(game); game = Style.SearchPattern(game); game = new Regex(@"(([[(].*[\)\]] )?([^([]+))").Match(game).Groups[1].Value; game = game.TrimStart().TrimEnd(); return game; } /// /// Clean a game (or rom) name to the WoD standard /// /// Array representing the path to be cleaned /// The cleaned name public static string CleanGameName(string[] game) { game[game.Length - 1] = CleanGameName(game[game.Length - 1]); string outgame = String.Join(Path.DirectorySeparatorChar.ToString(), game); outgame = outgame.TrimStart().TrimEnd(); return outgame; } /// /// Clean a hash string and pad to the correct size /// /// Hash string to sanitize /// Amount of characters to pad to /// Cleaned string public static string CleanHashData(string hash, int padding) { // First get the hash to the correct length hash = (String.IsNullOrEmpty(hash) ? "" : hash.Trim()); hash = (hash.StartsWith("0x") ? hash.Remove(0, 2) : hash); hash = (hash == "-" ? "" : hash); hash = (String.IsNullOrEmpty(hash) ? "" : hash.PadLeft(padding, '0')); hash = hash.ToLowerInvariant(); // Then make sure that it has the correct characters if (!Regex.IsMatch(hash, "[0-9a-f]{" + padding + "}")) { hash = ""; } return hash; } /// /// Clean a hash byte array and pad to the correct size /// /// Hash byte array to sanitize /// Amount of bytes to pad to /// Cleaned byte array public static byte[] CleanHashData(byte[] hash, int padding) { // If we have a null hash or a <=0 padding, return the hash if (hash == null || padding <= 0) { return hash; } // If we have a hash longer than the padding, trim and return if (hash.Length > padding) { return hash.Take(padding).ToArray(); } // If we have a hash of the correct length, return if (hash.Length == padding) { return hash; } // Otherwise get the output byte array of the correct length byte[] newhash = new byte[padding]; // Then write the proper number of empty bytes int padNeeded = padding - hash.Length; int index = 0; for (index = 0; index < padNeeded; index++) { newhash[index] = 0x00; } // Now add the original hash for (int i = 0; i < hash.Length; i++) { newhash[index + i] = hash[index]; } return newhash; } #endregion #region String Manipulation /// /// Get if a string contains Unicode characters /// /// Input string to test /// True if the string contains at least one Unicode character, false otherwise public static bool IsUnicode(string s) { return (s.Any(c => c > 255)); } /// /// Remove all chars that are considered path unsafe /// /// Input string to clean /// Cleaned string public static string RemovePathUnsafeCharacters(string s) { List invalidPath = Path.GetInvalidPathChars().ToList(); return new string(s.Where(c => !invalidPath.Contains(c)).ToArray()); } #endregion #region Externally sourced methods /// /// Returns the human-readable file size for an arbitrary, 64-bit file size /// The default format is "0.### XB", e.g. "4.2 KB" or "1.434 GB" /// /// /// Human-readable file size /// http://www.somacon.com/p576.php public static string GetBytesReadable(long input) { // Get absolute value long absolute_i = (input < 0 ? -input : input); // Determine the suffix and readable value string suffix; double readable; if (absolute_i >= 0x1000000000000000) // Exabyte { suffix = "EB"; readable = (input >> 50); } else if (absolute_i >= 0x4000000000000) // Petabyte { suffix = "PB"; readable = (input >> 40); } else if (absolute_i >= 0x10000000000) // Terabyte { suffix = "TB"; readable = (input >> 30); } else if (absolute_i >= 0x40000000) // Gigabyte { suffix = "GB"; readable = (input >> 20); } else if (absolute_i >= 0x100000) // Megabyte { suffix = "MB"; readable = (input >> 10); } else if (absolute_i >= 0x400) // Kilobyte { suffix = "KB"; readable = input; } else { return input.ToString("0 B"); // Byte } // Divide by 1024 to get fractional value readable = (readable / 1024); // Return formatted number with suffix return readable.ToString("0.### ") + suffix; } /// /// http://stackoverflow.com/questions/311165/how-do-you-convert-byte-array-to-hexadecimal-string-and-vice-versa /// public static byte[] StringToByteArray(string hex) { int NumberChars = hex.Length; byte[] bytes = new byte[NumberChars / 2]; for (int i = 0; i < NumberChars; i += 2) bytes[i / 2] = Convert.ToByte(hex.Substring(i, 2), 16); return bytes; } /// /// http://stackoverflow.com/questions/5613279/c-sharp-hex-to-ascii /// public static string ConvertHexToAscii(string hexString) { if (hexString.Contains("-")) { hexString = hexString.Replace("-", ""); } StringBuilder sb = new StringBuilder(); for (int i = 0; i < hexString.Length; i += 2) { String hs = hexString.Substring(i, 2); sb.Append(Convert.ToChar(Convert.ToUInt32(hs, 16))); } return sb.ToString(); } /// /// http://stackoverflow.com/questions/15920741/convert-from-string-ascii-to-string-hex /// public static string ConvertAsciiToHex(string asciiString) { string hexOutput = ""; foreach (char _eachChar in asciiString.ToCharArray()) { // Get the integral value of the character. int value = Convert.ToInt32(_eachChar); // Convert the decimal value to a hexadecimal value in string form. hexOutput += String.Format("{0:X2}", value).Remove(0, 2); // to make output as your eg // hexOutput +=" "+ String.Format("{0:X}", value); } return hexOutput; } /// /// Adapted from 7-zip Source Code: CPP/Windows/TimeUtils.cpp:FileTimeToDosTime /// public static uint ConvertDateTimeToMsDosTimeFormat(DateTime dateTime) { uint year = (uint)((dateTime.Year - 1980) % 128); uint mon = (uint)dateTime.Month; uint day = (uint)dateTime.Day; uint hour = (uint)dateTime.Hour; uint min = (uint)dateTime.Minute; uint sec = (uint)dateTime.Second; return (year << 25) | (mon << 21) | (day << 16) | (hour << 11) | (min << 5) | (sec >> 1); } /// /// Adapted from 7-zip Source Code: CPP/Windows/TimeUtils.cpp:DosTimeToFileTime /// public static DateTime ConvertMsDosTimeFormatToDateTime(uint msDosDateTime) { return new DateTime((int)(1980 + (msDosDateTime >> 25)), (int)((msDosDateTime >> 21) & 0xF), (int)((msDosDateTime >> 16) & 0x1F), (int)((msDosDateTime >> 11) & 0x1F), (int)((msDosDateTime >> 5) & 0x3F), (int)((msDosDateTime & 0x1F) * 2)); } /// /// Determines a text file's encoding by analyzing its byte order mark (BOM). /// Defaults to ASCII when detection of the text file's endianness fails. /// http://stackoverflow.com/questions/3825390/effective-way-to-find-any-files-encoding /// /// The text file to analyze. /// The detected encoding. public static Encoding GetEncoding(string filename) { // Read the BOM var bom = new byte[4]; FileStream file = File.OpenRead(filename); file.Read(bom, 0, 4); file.Dispose(); // Analyze the BOM if (bom[0] == 0x2b && bom[1] == 0x2f && bom[2] == 0x76) return Encoding.UTF7; if (bom[0] == 0xef && bom[1] == 0xbb && bom[2] == 0xbf) return Encoding.UTF8; if (bom[0] == 0xff && bom[1] == 0xfe) return Encoding.Unicode; //UTF-16LE if (bom[0] == 0xfe && bom[1] == 0xff) return Encoding.BigEndianUnicode; //UTF-16BE if (bom[0] == 0 && bom[1] == 0 && bom[2] == 0xfe && bom[3] == 0xff) return Encoding.UTF32; return Encoding.Default; } #endregion } }