diff --git a/SabreTools.Helper/Dats/Partials/DatFile.Parsers.cs b/SabreTools.Helper/Dats/Partials/DatFile.Parsers.cs index 99167a01..cb9613d4 100644 --- a/SabreTools.Helper/Dats/Partials/DatFile.Parsers.cs +++ b/SabreTools.Helper/Dats/Partials/DatFile.Parsers.cs @@ -20,7 +20,7 @@ namespace SabreTools.Helper.Dats { public partial class DatFile { - #region Parsing [MODULAR DONE, FOR NOW] + #region Parsing /// /// Parse a DAT and return all found games and roms within @@ -31,12 +31,15 @@ namespace SabreTools.Helper.Dats /// The DatData object representing found roms to this point /// True if full pathnames are to be kept, false otherwise (default) /// True if game names are sanitized, false otherwise (default) + /// True if we should remove non-ASCII characters from output, false otherwise (default) /// True if descriptions should be used as names, false otherwise (default) /// True if original extension should be kept, false otherwise (default) /// True if tags from the DAT should be used to merge the output, false otherwise (default) - public void Parse(string filename, int sysid, int srcid, bool keep = false, bool clean = false, bool descAsName = false, bool keepext = false, bool useTags = false) + public void Parse(string filename, int sysid, int srcid, bool keep = false, bool clean = false, + bool remUnicode = false, bool descAsName = false, bool keepext = false, bool useTags = false) { - Parse(filename, sysid, srcid, SplitType.None, keep: keep, clean: clean, descAsName: descAsName, keepext: keepext, useTags: useTags); + Parse(filename, sysid, srcid, SplitType.None, keep: keep, clean: clean, + remUnicode: remUnicode, descAsName: descAsName, keepext: keepext, useTags: useTags); } /// @@ -48,6 +51,7 @@ namespace SabreTools.Helper.Dats /// Type of the split that should be performed (split, merged, fully merged) /// True if full pathnames are to be kept, false otherwise (default) /// True if game names are sanitized, false otherwise (default) + /// True if we should remove non-ASCII characters from output, false otherwise (default) /// True if descriptions should be used as names, false otherwise (default) /// True if original extension should be kept, false otherwise (default) /// True if tags from the DAT should be used to merge the output, false otherwise (default) @@ -64,6 +68,7 @@ namespace SabreTools.Helper.Dats // Miscellaneous bool keep = false, bool clean = false, + bool remUnicode = false, bool descAsName = false, bool keepext = false, bool useTags = false) @@ -92,44 +97,44 @@ namespace SabreTools.Helper.Dats switch (FileTools.GetDatFormat(filename)) { case DatFormat.AttractMode: - ParseAttractMode(filename, sysid, srcid, keep, clean, descAsName); + ParseAttractMode(filename, sysid, srcid, keep, clean, remUnicode, descAsName); break; case DatFormat.ClrMamePro: case DatFormat.DOSCenter: - ParseCMP(filename, sysid, srcid, keep, clean, descAsName); + ParseCMP(filename, sysid, srcid, keep, clean, remUnicode, descAsName); break; case DatFormat.CSV: - ParseCSVTSV(filename, sysid, srcid, ',', keep, clean, descAsName); + ParseCSVTSV(filename, sysid, srcid, ',', keep, clean, remUnicode, descAsName); break; case DatFormat.Logiqx: case DatFormat.OfflineList: case DatFormat.SabreDat: case DatFormat.SoftwareList: - ParseGenericXML(filename, sysid, srcid, keep, clean, descAsName); + ParseGenericXML(filename, sysid, srcid, keep, clean, remUnicode, descAsName); break; case DatFormat.RedumpMD5: - ParseRedumpMD5(filename, sysid, srcid, clean); + ParseRedumpMD5(filename, sysid, srcid, clean, remUnicode); break; case DatFormat.RedumpSFV: - ParseRedumpSFV(filename, sysid, srcid, clean); + ParseRedumpSFV(filename, sysid, srcid, clean, remUnicode); break; case DatFormat.RedumpSHA1: - ParseRedumpSHA1(filename, sysid, srcid, clean); + ParseRedumpSHA1(filename, sysid, srcid, clean, remUnicode); break; case DatFormat.RedumpSHA256: - ParseRedumpSHA256(filename, sysid, srcid, clean); + ParseRedumpSHA256(filename, sysid, srcid, clean, remUnicode); break; case DatFormat.RedumpSHA384: - ParseRedumpSHA384(filename, sysid, srcid, clean); + ParseRedumpSHA384(filename, sysid, srcid, clean, remUnicode); break; case DatFormat.RedumpSHA512: - ParseRedumpSHA512(filename, sysid, srcid, clean); + ParseRedumpSHA512(filename, sysid, srcid, clean, remUnicode); break; case DatFormat.RomCenter: - ParseRC(filename, sysid, srcid, clean, descAsName); + ParseRC(filename, sysid, srcid, clean, remUnicode, descAsName); break; case DatFormat.TSV: - ParseCSVTSV(filename, sysid, srcid, '\t', keep, clean, descAsName); + ParseCSVTSV(filename, sysid, srcid, '\t', keep, clean, remUnicode, descAsName); break; default: return; @@ -189,6 +194,7 @@ namespace SabreTools.Helper.Dats /// Source ID for the DAT /// True if full pathnames are to be kept, false otherwise (default) /// True if game names are sanitized, false otherwise (default) + /// True if we should remove non-ASCII characters from output, false otherwise (default) /// True if descriptions should be used as names, false otherwise (default) private void ParseAttractMode( // Standard Dat parsing @@ -199,6 +205,7 @@ namespace SabreTools.Helper.Dats // Miscellaneous bool keep, bool clean, + bool remUnicode, bool descAsName) { // Open a file reader @@ -254,7 +261,7 @@ namespace SabreTools.Helper.Dats }; // Now process and add the rom - ParseAddHelper(rom, clean, out string key); + ParseAddHelper(rom, clean, remUnicode, out string key); } sr.Dispose(); @@ -268,6 +275,7 @@ namespace SabreTools.Helper.Dats /// Source ID for the DAT /// True if full pathnames are to be kept, false otherwise (default) /// True if game names are sanitized, false otherwise (default) + /// True if we should remove non-ASCII characters from output, false otherwise (default) /// True if descriptions should be used as names, false otherwise (default) private void ParseCMP( // Standard Dat parsing @@ -278,6 +286,7 @@ namespace SabreTools.Helper.Dats // Miscellaneous bool keep, bool clean, + bool remUnicode, bool descAsName) { // Open a file reader @@ -386,7 +395,7 @@ namespace SabreTools.Helper.Dats // Now process and add the sample key = ""; - ParseAddHelper(item, clean, out key); + ParseAddHelper(item, clean, remUnicode, out key); continue; } @@ -471,7 +480,7 @@ namespace SabreTools.Helper.Dats // Now process and add the rom key = ""; - ParseAddHelper(item, clean, out key); + ParseAddHelper(item, clean, remUnicode, out key); continue; } @@ -684,7 +693,7 @@ namespace SabreTools.Helper.Dats // Now process and add the rom key = ""; - ParseAddHelper(item, clean, out key); + ParseAddHelper(item, clean, remUnicode, out key); } // If the line is anything but a rom or disk and we're in a block @@ -871,6 +880,7 @@ namespace SabreTools.Helper.Dats /// Delimiter for parsing individual lines /// True if full pathnames are to be kept, false otherwise (default) /// True if game names are sanitized, false otherwise (default) + /// True if we should remove non-ASCII characters from output, false otherwise (default) /// True if SL XML names should be kept, false otherwise (default) private void ParseCSVTSV( // Standard Dat parsing @@ -882,6 +892,7 @@ namespace SabreTools.Helper.Dats // Miscellaneous bool keep, bool clean, + bool remUnicode, bool descAsName) { // Open a file reader @@ -1117,7 +1128,7 @@ namespace SabreTools.Helper.Dats }, }; - ParseAddHelper(archive, clean, out key); + ParseAddHelper(archive, clean, remUnicode, out key); break; case ItemType.BiosSet: BiosSet biosset = new BiosSet() @@ -1131,7 +1142,7 @@ namespace SabreTools.Helper.Dats }, }; - ParseAddHelper(biosset, clean, out key); + ParseAddHelper(biosset, clean, remUnicode, out key); break; case ItemType.Disk: Disk disk = new Disk() @@ -1152,7 +1163,7 @@ namespace SabreTools.Helper.Dats ItemStatus = status, }; - ParseAddHelper(disk, clean, out key); + ParseAddHelper(disk, clean, remUnicode, out key); break; case ItemType.Release: Release release = new Release() @@ -1166,7 +1177,7 @@ namespace SabreTools.Helper.Dats }, }; - ParseAddHelper(release, clean, out key); + ParseAddHelper(release, clean, remUnicode, out key); break; case ItemType.Rom: Rom rom = new Rom() @@ -1189,7 +1200,7 @@ namespace SabreTools.Helper.Dats ItemStatus = status, }; - ParseAddHelper(rom, clean, out key); + ParseAddHelper(rom, clean, remUnicode, out key); break; case ItemType.Sample: Sample sample = new Sample() @@ -1203,7 +1214,7 @@ namespace SabreTools.Helper.Dats }, }; - ParseAddHelper(sample, clean, out key); + ParseAddHelper(sample, clean, remUnicode, out key); break; } } @@ -1217,6 +1228,7 @@ namespace SabreTools.Helper.Dats /// Source ID for the DAT /// True if full pathnames are to be kept, false otherwise (default) /// True if game names are sanitized, false otherwise (default) + /// True if we should remove non-ASCII characters from output, false otherwise (default) /// True if SL XML names should be kept, false otherwise (default) /// /// TODO: Software Lists - sharedfeat tag (read-in, write-out) @@ -1230,6 +1242,7 @@ namespace SabreTools.Helper.Dats // Miscellaneous bool keep, bool clean, + bool remUnicode, bool descAsName) { // Prepare all internal variables @@ -1265,7 +1278,7 @@ namespace SabreTools.Helper.Dats Rom rom = new Rom("null", tempgame); // Now process and add the rom - ParseAddHelper(rom, clean, out key); + ParseAddHelper(rom, clean, remUnicode, out key); } // Regardless, end the current folder @@ -1811,7 +1824,7 @@ namespace SabreTools.Helper.Dats }; // Now process and add the rom - ParseAddHelper(olrom, clean, out key); + ParseAddHelper(olrom, clean, remUnicode, out key); break; // For Software List and MAME listxml only @@ -1913,7 +1926,7 @@ namespace SabreTools.Helper.Dats }; // Now process and add the rom - ParseAddHelper(relrom, clean, out key); + ParseAddHelper(relrom, clean, remUnicode, out key); subreader.Read(); break; @@ -1956,7 +1969,7 @@ namespace SabreTools.Helper.Dats }; // Now process and add the rom - ParseAddHelper(biosrom, clean, out key); + ParseAddHelper(biosrom, clean, remUnicode, out key); subreader.Read(); break; @@ -1984,7 +1997,7 @@ namespace SabreTools.Helper.Dats }; // Now process and add the rom - ParseAddHelper(archiverom, clean, out key); + ParseAddHelper(archiverom, clean, remUnicode, out key); subreader.Read(); break; @@ -2012,7 +2025,7 @@ namespace SabreTools.Helper.Dats }; // Now process and add the rom - ParseAddHelper(samplerom, clean, out key); + ParseAddHelper(samplerom, clean, remUnicode, out key); subreader.Read(); break; @@ -2155,7 +2168,7 @@ namespace SabreTools.Helper.Dats } // Now process and add the rom - ParseAddHelper(inrom, clean, out key); + ParseAddHelper(inrom, clean, remUnicode, out key); subreader.Read(); break; @@ -2330,7 +2343,7 @@ namespace SabreTools.Helper.Dats } // Now process and add the rom - ParseAddHelper(rom, clean, out key); + ParseAddHelper(rom, clean, remUnicode, out key); xtr.Read(); break; @@ -2358,6 +2371,7 @@ namespace SabreTools.Helper.Dats /// System ID for the DAT /// Source ID for the DAT /// True if game names are sanitized, false otherwise (default) + /// True if we should remove non-ASCII characters from output, false otherwise (default) private void ParseRedumpMD5( // Standard Dat parsing string filename, @@ -2365,7 +2379,8 @@ namespace SabreTools.Helper.Dats int srcid, // Miscellaneous - bool clean) + bool clean, + bool remUnicode) { // Open a file reader Encoding enc = Style.GetEncoding(filename); @@ -2392,7 +2407,7 @@ namespace SabreTools.Helper.Dats }; // Now process and add the rom - ParseAddHelper(rom, clean, out string key); + ParseAddHelper(rom, clean, remUnicode, out string key); } sr.Dispose(); @@ -2405,6 +2420,7 @@ namespace SabreTools.Helper.Dats /// System ID for the DAT /// Source ID for the DAT /// True if game names are sanitized, false otherwise (default) + /// True if we should remove non-ASCII characters from output, false otherwise (default) private void ParseRedumpSFV( // Standard Dat parsing string filename, @@ -2412,7 +2428,8 @@ namespace SabreTools.Helper.Dats int srcid, // Miscellaneous - bool clean) + bool clean, + bool remUnicode) { // Open a file reader Encoding enc = Style.GetEncoding(filename); @@ -2439,7 +2456,7 @@ namespace SabreTools.Helper.Dats }; // Now process and add the rom - ParseAddHelper(rom, clean, out string key); + ParseAddHelper(rom, clean, remUnicode, out string key); } sr.Dispose(); @@ -2452,6 +2469,7 @@ namespace SabreTools.Helper.Dats /// System ID for the DAT /// Source ID for the DAT /// True if game names are sanitized, false otherwise (default) + /// True if we should remove non-ASCII characters from output, false otherwise (default) private void ParseRedumpSHA1( // Standard Dat parsing string filename, @@ -2459,7 +2477,8 @@ namespace SabreTools.Helper.Dats int srcid, // Miscellaneous - bool clean) + bool clean, + bool remUnicode) { // Open a file reader Encoding enc = Style.GetEncoding(filename); @@ -2486,7 +2505,7 @@ namespace SabreTools.Helper.Dats }; // Now process and add the rom - ParseAddHelper(rom, clean, out string key); + ParseAddHelper(rom, clean, remUnicode, out string key); } sr.Dispose(); @@ -2499,6 +2518,7 @@ namespace SabreTools.Helper.Dats /// System ID for the DAT /// Source ID for the DAT /// True if game names are sanitized, false otherwise (default) + /// True if we should remove non-ASCII characters from output, false otherwise (default) private void ParseRedumpSHA256( // Standard Dat parsing string filename, @@ -2506,7 +2526,8 @@ namespace SabreTools.Helper.Dats int srcid, // Miscellaneous - bool clean) + bool clean, + bool remUnicode) { // Open a file reader Encoding enc = Style.GetEncoding(filename); @@ -2533,7 +2554,7 @@ namespace SabreTools.Helper.Dats }; // Now process and add the rom - ParseAddHelper(rom, clean, out string key); + ParseAddHelper(rom, clean, remUnicode, out string key); } sr.Dispose(); @@ -2546,6 +2567,7 @@ namespace SabreTools.Helper.Dats /// System ID for the DAT /// Source ID for the DAT /// True if game names are sanitized, false otherwise (default) + /// True if we should remove non-ASCII characters from output, false otherwise (default) private void ParseRedumpSHA384( // Standard Dat parsing string filename, @@ -2553,7 +2575,8 @@ namespace SabreTools.Helper.Dats int srcid, // Miscellaneous - bool clean) + bool clean, + bool remUnicode) { // Open a file reader Encoding enc = Style.GetEncoding(filename); @@ -2580,7 +2603,7 @@ namespace SabreTools.Helper.Dats }; // Now process and add the rom - ParseAddHelper(rom, clean, out string key); + ParseAddHelper(rom, clean, remUnicode, out string key); } sr.Dispose(); @@ -2593,6 +2616,7 @@ namespace SabreTools.Helper.Dats /// System ID for the DAT /// Source ID for the DAT /// True if game names are sanitized, false otherwise (default) + /// True if we should remove non-ASCII characters from output, false otherwise (default) private void ParseRedumpSHA512( // Standard Dat parsing string filename, @@ -2600,7 +2624,8 @@ namespace SabreTools.Helper.Dats int srcid, // Miscellaneous - bool clean) + bool clean, + bool remUnicode) { // Open a file reader Encoding enc = Style.GetEncoding(filename); @@ -2627,7 +2652,7 @@ namespace SabreTools.Helper.Dats }; // Now process and add the rom - ParseAddHelper(rom, clean, out string key); + ParseAddHelper(rom, clean, remUnicode, out string key); } sr.Dispose(); @@ -2640,6 +2665,7 @@ namespace SabreTools.Helper.Dats /// System ID for the DAT /// Source ID for the DAT /// True if game names are sanitized, false otherwise (default) + /// True if we should remove non-ASCII characters from output, false otherwise (default) /// True if descriptions should be used as names, false otherwise (default) private void ParseRC( // Standard Dat parsing @@ -2649,6 +2675,7 @@ namespace SabreTools.Helper.Dats // Miscellaneous bool clean, + bool remUnicode, bool descAsName) { // Open a file reader @@ -2787,7 +2814,7 @@ namespace SabreTools.Helper.Dats }; // Now process and add the rom - ParseAddHelper(rom, clean, out string key); + ParseAddHelper(rom, clean, remUnicode, out string key); } } } @@ -2800,8 +2827,9 @@ namespace SabreTools.Helper.Dats /// /// Item data to check against /// True if the names should be cleaned to WoD standards, false otherwise + /// True if we should remove non-ASCII characters from output, false otherwise (default) /// Output param containing the key for the item - private void ParseAddHelper(DatItem item, bool clean, out string key) + private void ParseAddHelper(DatItem item, bool clean, bool remUnicode, out string key) { key = ""; @@ -2822,6 +2850,14 @@ namespace SabreTools.Helper.Dats // If we're in cleaning mode, sanitize the game name item.Machine.Name = (clean ? Style.CleanGameName(item.Machine.Name) : item.Machine.Name); + // If we're stripping unicode characters, do so from all relevant things + if (remUnicode) + { + item.Name = Style.RemoveUnicodeCharacters(item.Name); + item.Machine.Name = Style.RemoveUnicodeCharacters(item.Machine.Name); + item.Machine.Description = Style.RemoveUnicodeCharacters(item.Machine.Description); + } + // If we have a Rom or a Disk, clean the hash data if (item.Type == ItemType.Rom) { diff --git a/SabreTools.Helper/Dats/Partials/DatFile.Writers.cs b/SabreTools.Helper/Dats/Partials/DatFile.Writers.cs index 45357575..3a06a00f 100644 --- a/SabreTools.Helper/Dats/Partials/DatFile.Writers.cs +++ b/SabreTools.Helper/Dats/Partials/DatFile.Writers.cs @@ -34,10 +34,6 @@ namespace SabreTools.Helper.Dats /// True if blank roms should be skipped on output, false otherwise (default) /// True if files should be overwritten (default), false if they should be renamed instead /// True if the DAT was written correctly, false otherwise - /// - /// The following features have been requested for file output: - /// - Have the ability to strip special (non-ASCII) characters from rom information - /// public bool WriteToFile(string outDir, bool norename = true, bool stats = false, bool ignoreblanks = false, bool overwrite = true) { // If there's nothing there, abort diff --git a/SabreTools.Helper/README.1ST b/SabreTools.Helper/README.1ST index 1a6c72bc..10dde0bd 100644 --- a/SabreTools.Helper/README.1ST +++ b/SabreTools.Helper/README.1ST @@ -915,10 +915,15 @@ Options: If this flag is enabled, then the romof, cloneof, and sampleof tags will be omitted from the outputted DAT or DATs. - -clean Clean game names according to WoD standards + -clean Clean game names according to WoD standards Game names will be santitized to remove what the original WoD standards deemed as unneeded information, such as parenthized or bracketed strings + -ru, --rem-uni Remove unicode characters from names + By default, the character set from the original file(s) will be used for + item naming. This flag removes all Unicode characters from the item names, + machine names, and machine descriptions. + -rmd5, --rem-md5 Remove MD5 hashes from the output By default, all available hashes will be written out to the DAT. This will remove all MD5 hashes from the output file(s). diff --git a/SabreTools.Helper/Tools/Style.cs b/SabreTools.Helper/Tools/Style.cs index bb018775..3e6231bd 100644 --- a/SabreTools.Helper/Tools/Style.cs +++ b/SabreTools.Helper/Tools/Style.cs @@ -454,6 +454,16 @@ namespace SabreTools.Helper.Tools return new string(s.Where(c => !invalidPath.Contains(c)).ToArray()); } + /// + /// Remove all unicode-specific chars from a string + /// + /// Input string to clean + /// Cleaned string + public static string RemoveUnicodeCharacters(string s) + { + return new string(s.Where(c => c > 255).ToArray()); + } + /// /// Split a line as if it were a CMP rom line /// diff --git a/SabreTools/Partials/SabreTools.Help.cs b/SabreTools/Partials/SabreTools.Help.cs index 42f6b87a..6142e60d 100644 --- a/SabreTools/Partials/SabreTools.Help.cs +++ b/SabreTools/Partials/SabreTools.Help.cs @@ -1005,6 +1005,11 @@ namespace SabreTools "Clean game names according to WoD standards", FeatureType.Flag, null)); + update.AddFeature("rem-uni", new Feature( + new List() { "-ru", "--rem-uni" }, + "Remove unicode characters from names", + FeatureType.Flag, + null)); update.AddFeature("rem-md5", new Feature( new List() { "-rmd5", "--rem-md5" }, "Remove MD5 hashes from the output", diff --git a/SabreTools/Partials/SabreTools.Inits.cs b/SabreTools/Partials/SabreTools.Inits.cs index 18678613..a4e74ada 100644 --- a/SabreTools/Partials/SabreTools.Inits.cs +++ b/SabreTools/Partials/SabreTools.Inits.cs @@ -472,6 +472,7 @@ namespace SabreTools /// /* Output DAT info */ /// Optional param for output directory /// True to clean the game names to WoD standard, false otherwise (default) + /// True if we should remove non-ASCII characters from output, false otherwise (default) /// True if descriptions should be used as names, false otherwise (default) /// True to dedupe the roms in the DAT, false otherwise (default) /// StripHash that represents the hash(es) that you want to remove from the output @@ -529,6 +530,7 @@ namespace SabreTools /* Output DAT info */ string outDir, bool clean, + bool remUnicode, bool descAsName, bool dedup, Hash stripHash) diff --git a/SabreTools/SabreTools.cs b/SabreTools/SabreTools.cs index e0aced65..5f77a711 100644 --- a/SabreTools/SabreTools.cs +++ b/SabreTools/SabreTools.cs @@ -112,6 +112,7 @@ namespace SabreTools quotes = false, remext = false, removeDateFromAutomaticName = false, + removeUnicode = false, romba = false, showBaddumpColumn = false, showNodumpColumn = false, @@ -539,6 +540,10 @@ namespace SabreTools case "--rem-sha512": stripHash |= Hash.SHA512; break; + case "-ru": + case "--rem-uni": + removeUnicode = true; + break; case "-run": case "--runnable": filter.Runnable = true; @@ -1271,7 +1276,7 @@ namespace SabreTools InitUpdate(inputs, filename, name, description, rootdir, category, version, date, author, email, homepage, url, comment, header, superdat, forcemerge, forcend, forcepack, excludeOf, datFormat, usegame, prefix, postfix, quotes, repext, addext, remext, datPrefix, romba, merge, diffMode, inplace, skip, removeDateFromAutomaticName, filter, oneGameOneRegion, regions, - splitType, trim, single, root, outDir, cleanGameNames, descAsName, dedup, stripHash); + splitType, trim, single, root, outDir, cleanGameNames, removeUnicode, descAsName, dedup, stripHash); } // If we're using the verifier