using System; #if NET40_OR_GREATER || NETCOREAPP using System.Collections.Concurrent; #endif using System.Collections.Generic; using System.IO; using System.Linq; using System.Runtime.CompilerServices; using System.Text.RegularExpressions; #if NET40_OR_GREATER || NETCOREAPP using System.Threading.Tasks; #endif using SabreTools.Core; using SabreTools.Core.Tools; using SabreTools.DatFiles; using SabreTools.DatItems; using SabreTools.Logging; [assembly: InternalsVisibleTo("SabreTools.Test")] namespace SabreTools.Filtering { /// /// Represents the cleaning operations that need to be performed on a set of items, usually a DAT /// public class Cleaner { #region Fields /// /// Clean all names to WoD standards /// public bool Clean { get; set; } /// /// Deduplicate items using the given method /// public DedupeType DedupeRoms { get; set; } /// /// Set Machine Description from Machine Name /// public bool DescriptionAsName { get; set; } /// /// Keep machines that don't contain any items /// public bool KeepEmptyGames { get; set; } /// /// Enable "One Rom, One Region (1G1R)" mode /// public bool OneGamePerRegion { get; set; } /// /// Ordered list of regions for "One Rom, One Region (1G1R)" mode /// public List? RegionList { get; set; } /// /// Ensure each rom is in their own game /// public bool OneRomPerGame { get; set; } /// /// Remove all unicode characters /// public bool RemoveUnicode { get; set; } /// /// Include root directory when determing trim sizes /// public string? Root { get; set; } /// /// Remove scene dates from the beginning of machine names /// public bool SceneDateStrip { get; set; } /// /// Change all machine names to "!" /// public bool Single { get; set; } /// /// Trim total machine and item name to not exceed NTFS limits /// public bool Trim { get; set; } #endregion #region Logging /// /// Logging object /// private readonly Logger logger = new(); #endregion #region Running /// /// Apply cleaning methods to the DatFile /// /// Current DatFile object to run operations on /// True if the error that is thrown should be thrown back to the caller, false otherwise /// True if cleaning was successful, false on error public bool ApplyCleaning(DatFile datFile, bool throwOnError = false) { InternalStopwatch watch = new("Applying cleaning steps to DAT"); try { // Perform item-level cleaning CleanDatItems(datFile); // Bucket and dedupe according to the flag if (DedupeRoms == DedupeType.Full) datFile.Items.BucketBy(ItemKey.CRC, DedupeRoms); else if (DedupeRoms == DedupeType.Game) datFile.Items.BucketBy(ItemKey.Machine, DedupeRoms); // Process description to machine name if (DescriptionAsName == true) MachineDescriptionToName(datFile); // If we are removing scene dates, do that now if (SceneDateStrip == true) StripSceneDatesFromItems(datFile); // Run the one rom per game logic, if required if (OneGamePerRegion == true) SetOneGamePerRegion(datFile); // Run the one rom per game logic, if required if (OneRomPerGame == true) SetOneRomPerGame(datFile); // Remove all marked items datFile.Items.ClearMarked(); // We remove any blanks, if we aren't supposed to have any if (KeepEmptyGames == false) datFile.Items.ClearEmpty(); } catch (Exception ex) when (!throwOnError) { logger.Error(ex); return false; } finally { watch.Stop(); } return true; } /// /// Clean individual items based on the current filter /// /// Current DatFile object to run operations on internal void CleanDatItems(DatFile datFile) { List keys = datFile.Items.Keys.ToList(); foreach (string key in keys) { // For every item in the current key var items = datFile.Items[key]; if (items == null) continue; foreach (DatItem item in items) { // If we have a null item, we can't clean it it if (item == null) continue; // Run cleaning per item CleanDatItem(item); } // Assign back for caution datFile.Items[key] = items; } } /// /// Clean a DatItem according to the cleaner /// /// DatItem to clean internal void CleanDatItem(DatItem datItem) { // If we're stripping unicode characters, strip machine name and description if (RemoveUnicode) { datItem.GetFieldValue(DatItem.MachineKey)!.SetFieldValue(Models.Metadata.Machine.NameKey, TextHelper.RemoveUnicodeCharacters(datItem.GetFieldValue(DatItem.MachineKey)!.GetFieldValue(Models.Metadata.Machine.NameKey))); datItem.GetFieldValue(DatItem.MachineKey)!.SetFieldValue(Models.Metadata.Machine.DescriptionKey, TextHelper.RemoveUnicodeCharacters(datItem.GetFieldValue(DatItem.MachineKey)!.GetFieldValue(Models.Metadata.Machine.DescriptionKey))); datItem.SetName(TextHelper.RemoveUnicodeCharacters(datItem.GetName())); } // If we're in cleaning mode, sanitize machine name and description if (Clean) { datItem.GetFieldValue(DatItem.MachineKey)!.SetFieldValue(Models.Metadata.Machine.NameKey, TextHelper.NormalizeCharacters(datItem.GetFieldValue(DatItem.MachineKey)!.GetFieldValue(Models.Metadata.Machine.NameKey))); datItem.GetFieldValue(DatItem.MachineKey)!.SetFieldValue(Models.Metadata.Machine.DescriptionKey, TextHelper.NormalizeCharacters(datItem.GetFieldValue(DatItem.MachineKey)!.GetFieldValue(Models.Metadata.Machine.DescriptionKey))); } // If we are in single game mode, rename the machine if (Single) datItem.GetFieldValue(DatItem.MachineKey)!.SetFieldValue(Models.Metadata.Machine.NameKey, "!"); // If we are in NTFS trim mode, trim the item name if (Trim && datItem.GetName() != null) { // Windows max name length is 260 int usableLength = 260 - datItem.GetFieldValue(DatItem.MachineKey)!.GetFieldValue(Models.Metadata.Machine.NameKey)!.Length - (Root?.Length ?? 0); if (datItem.GetName()!.Length > usableLength) { string ext = Path.GetExtension(datItem.GetName()!); datItem.SetName(datItem.GetName()!.Substring(0, usableLength - ext.Length) + ext); } } } /// /// Use game descriptions as names in the DAT, updating cloneof/romof/sampleof /// /// Current DatFile object to run operations on /// True if the error that is thrown should be thrown back to the caller, false otherwise internal void MachineDescriptionToName(DatFile datFile, bool throwOnError = false) { try { // First we want to get a mapping for all games to description #if NET40_OR_GREATER || NETCOREAPP ConcurrentDictionary concurrentDictionary = new(); ConcurrentDictionary mapping = concurrentDictionary; #else Dictionary concurrentDictionary = []; Dictionary mapping = concurrentDictionary; #endif #if NET452_OR_GREATER || NETCOREAPP Parallel.ForEach(datFile.Items.Keys, Globals.ParallelOptions, key => #elif NET40_OR_GREATER Parallel.ForEach(datFile.Items.Keys, key => #else foreach (var key in datFile.Items.Keys) #endif { var items = datFile.Items[key]; if (items == null) #if NET40_OR_GREATER || NETCOREAPP return; #else continue; #endif foreach (DatItem item in items) { // If the key mapping doesn't exist, add it #if NET40_OR_GREATER || NETCOREAPP mapping.TryAdd(item.GetFieldValue(DatItem.MachineKey)!.GetFieldValue(Models.Metadata.Machine.NameKey)!, item.GetFieldValue(DatItem.MachineKey)!.GetFieldValue(Models.Metadata.Machine.DescriptionKey)!.Replace('/', '_').Replace("\"", "''").Replace(":", " -")); #else mapping[item.GetFieldValue(DatItem.MachineKey)!.GetFieldValue(Models.Metadata.Machine.NameKey)!] = item.GetFieldValue(DatItem.MachineKey)!.GetFieldValue(Models.Metadata.Machine.DescriptionKey)!.Replace('/', '_').Replace("\"", "''").Replace(":", " -"); #endif } #if NET40_OR_GREATER || NETCOREAPP }); #else } #endif // Now we loop through every item and update accordingly #if NET452_OR_GREATER || NETCOREAPP Parallel.ForEach(datFile.Items.Keys, Globals.ParallelOptions, key => #elif NET40_OR_GREATER Parallel.ForEach(datFile.Items.Keys, key => #else foreach (var key in datFile.Items.Keys) #endif { var items = datFile.Items[key]; if (items == null) #if NET40_OR_GREATER || NETCOREAPP return; #else continue; #endif ConcurrentList newItems = []; foreach (DatItem item in items) { // Update machine name if (!string.IsNullOrEmpty(item.GetFieldValue(DatItem.MachineKey)!.GetFieldValue(Models.Metadata.Machine.NameKey)) && mapping.ContainsKey(item.GetFieldValue(DatItem.MachineKey)!.GetFieldValue(Models.Metadata.Machine.NameKey)!)) item.GetFieldValue(DatItem.MachineKey)!.SetFieldValue(Models.Metadata.Machine.NameKey, mapping[item.GetFieldValue(DatItem.MachineKey)!.GetFieldValue(Models.Metadata.Machine.NameKey)!]); // Update cloneof if (!string.IsNullOrEmpty(item.GetFieldValue(DatItem.MachineKey)!.GetFieldValue(Models.Metadata.Machine.CloneOfKey)) && mapping.ContainsKey(item.GetFieldValue(DatItem.MachineKey)!.GetFieldValue(Models.Metadata.Machine.CloneOfKey)!)) item.GetFieldValue(DatItem.MachineKey)!.SetFieldValue(Models.Metadata.Machine.CloneOfKey, mapping[item.GetFieldValue(DatItem.MachineKey)!.GetFieldValue(Models.Metadata.Machine.CloneOfKey)!]); // Update romof if (!string.IsNullOrEmpty(item.GetFieldValue(DatItem.MachineKey)!.GetFieldValue(Models.Metadata.Machine.RomOfKey)) && mapping.ContainsKey(item.GetFieldValue(DatItem.MachineKey)!.GetFieldValue(Models.Metadata.Machine.RomOfKey)!)) item.GetFieldValue(DatItem.MachineKey)!.SetFieldValue(Models.Metadata.Machine.RomOfKey, mapping[item.GetFieldValue(DatItem.MachineKey)!.GetFieldValue(Models.Metadata.Machine.RomOfKey)!]); // Update sampleof if (!string.IsNullOrEmpty(item.GetFieldValue(DatItem.MachineKey)!.GetFieldValue(Models.Metadata.Machine.SampleOfKey)) && mapping.ContainsKey(item.GetFieldValue(DatItem.MachineKey)!.GetFieldValue(Models.Metadata.Machine.SampleOfKey)!)) item.GetFieldValue(DatItem.MachineKey)!.SetFieldValue(Models.Metadata.Machine.SampleOfKey, mapping[item.GetFieldValue(DatItem.MachineKey)!.GetFieldValue(Models.Metadata.Machine.SampleOfKey)!]); // Add the new item to the output list newItems.Add(item); } // Replace the old list of roms with the new one datFile.Items.Remove(key); datFile.Items.AddRange(key, newItems); #if NET40_OR_GREATER || NETCOREAPP }); #else } #endif } catch (Exception ex) when (!throwOnError) { logger.Warning(ex.ToString()); } } /// /// Filter a DAT using 1G1R logic given an ordered set of regions /// /// Current DatFile object to run operations on /// /// In the most technical sense, the way that the region list is being used does not /// confine its values to be just regions. Since it's essentially acting like a /// specialized version of the machine name filter, anything that is usually encapsulated /// in parenthesis would be matched on, including disc numbers, languages, editions, /// and anything else commonly used. Please note that, unlike other existing 1G1R /// solutions, this does not have the ability to contain custom mappings of parent /// to clone sets based on name, nor does it have the ability to match on the /// Release DatItem type. /// internal void SetOneGamePerRegion(DatFile datFile) { // If we have null region list, make it empty RegionList ??= []; // For sake of ease, the first thing we want to do is bucket by game datFile.Items.BucketBy(ItemKey.Machine, DedupeType.None, norename: true); // Then we want to get a mapping of all machines to parents Dictionary> parents = []; foreach (string key in datFile.Items.Keys) { DatItem item = datFile.Items[key]![0]; // Match on CloneOf first if (!string.IsNullOrEmpty(item.GetFieldValue(DatItem.MachineKey)!.GetFieldValue(Models.Metadata.Machine.CloneOfKey))) { if (!parents.ContainsKey(item.GetFieldValue(DatItem.MachineKey)!.GetFieldValue(Models.Metadata.Machine.CloneOfKey)!.ToLowerInvariant())) parents.Add(item.GetFieldValue(DatItem.MachineKey)!.GetFieldValue(Models.Metadata.Machine.CloneOfKey)!.ToLowerInvariant(), []); parents[item.GetFieldValue(DatItem.MachineKey)!.GetFieldValue(Models.Metadata.Machine.CloneOfKey)!.ToLowerInvariant()].Add(item.GetFieldValue(DatItem.MachineKey)!.GetFieldValue(Models.Metadata.Machine.NameKey)!.ToLowerInvariant()); } // Then by RomOf else if (!string.IsNullOrEmpty(item.GetFieldValue(DatItem.MachineKey)!.GetFieldValue(Models.Metadata.Machine.RomOfKey))) { if (!parents.ContainsKey(item.GetFieldValue(DatItem.MachineKey)!.GetFieldValue(Models.Metadata.Machine.RomOfKey)!.ToLowerInvariant())) parents.Add(item.GetFieldValue(DatItem.MachineKey)!.GetFieldValue(Models.Metadata.Machine.RomOfKey)!.ToLowerInvariant(), []); parents[item.GetFieldValue(DatItem.MachineKey)!.GetFieldValue(Models.Metadata.Machine.RomOfKey)!.ToLowerInvariant()].Add(item.GetFieldValue(DatItem.MachineKey)!.GetFieldValue(Models.Metadata.Machine.NameKey)!.ToLowerInvariant()); } // Otherwise, treat it as a parent else { if (!parents.ContainsKey(item.GetFieldValue(DatItem.MachineKey)!.GetFieldValue(Models.Metadata.Machine.NameKey)!.ToLowerInvariant())) parents.Add(item.GetFieldValue(DatItem.MachineKey)!.GetFieldValue(Models.Metadata.Machine.NameKey)!.ToLowerInvariant(), []); parents[item.GetFieldValue(DatItem.MachineKey)!.GetFieldValue(Models.Metadata.Machine.NameKey)!.ToLowerInvariant()].Add(item.GetFieldValue(DatItem.MachineKey)!.GetFieldValue(Models.Metadata.Machine.NameKey)!.ToLowerInvariant()); } } // Once we have the full list of mappings, filter out games to keep foreach (string key in parents.Keys) { // Find the first machine that matches the regions in order, if possible string? machine = default; foreach (string region in RegionList) { machine = parents[key].FirstOrDefault(m => Regex.IsMatch(m, @"\(.*" + region + @".*\)", RegexOptions.IgnoreCase)); if (machine != default) break; } // If we didn't get a match, use the parent if (machine == default) machine = key; // Remove the key from the list parents[key].Remove(machine); // Remove the rest of the items from this key parents[key].ForEach(k => datFile.Items.Remove(k)); } // Finally, strip out the parent tags Splitter.RemoveTagsFromChild(datFile); } /// /// Ensure that all roms are in their own game (or at least try to ensure) /// /// Current DatFile object to run operations on internal static void SetOneRomPerGame(DatFile datFile) { // Because this introduces subfolders, we need to set the SuperDAT type datFile.Header.SetFieldValue(Models.Metadata.Header.TypeKey, "SuperDAT"); // For each rom, we want to update the game to be "/" #if NET452_OR_GREATER || NETCOREAPP Parallel.ForEach(datFile.Items.Keys, Globals.ParallelOptions, key => #elif NET40_OR_GREATER Parallel.ForEach(datFile.Items.Keys, key => #else foreach (var key in datFile.Items.Keys) #endif { var items = datFile.Items[key]; if (items == null) #if NET40_OR_GREATER || NETCOREAPP return; #else continue; #endif for (int i = 0; i < items.Count; i++) { SetOneRomPerGame(items[i]); } #if NET40_OR_GREATER || NETCOREAPP }); #else } #endif } /// /// Set internal names to match One Rom Per Game (ORPG) logic /// /// DatItem to run logic on internal static void SetOneRomPerGame(DatItem datItem) { if (datItem.GetName() == null) return; string[] splitname = datItem.GetName()!.Split('.'); #if NET20 || NET35 datItem.GetFieldValue(DatItem.MachineKey)!.SetFieldValue(Models.Metadata.Machine.NameKey, datItem.GetFieldValue(DatItem.MachineKey)!.GetFieldValue(Models.Metadata.Machine.NameKey) + $"/{string.Join(".", splitname.Take(splitname.Length > 1 ? splitname.Length - 1 : 1).ToArray())}"); #else datItem.GetFieldValue(DatItem.MachineKey)!.SetFieldValue(Models.Metadata.Machine.NameKey, datItem.GetFieldValue(DatItem.MachineKey)!.GetFieldValue(Models.Metadata.Machine.NameKey) + $"/{string.Join(".", splitname.Take(splitname.Length > 1 ? splitname.Length - 1 : 1))}"); #endif datItem.SetName(Path.GetFileName(datItem.GetName())); } /// /// Strip the dates from the beginning of scene-style set names /// /// Current DatFile object to run operations on internal void StripSceneDatesFromItems(DatFile datFile) { // Output the logging statement logger.User("Stripping scene-style dates"); // Set the regex pattern to use string pattern = @"([0-9]{2}\.[0-9]{2}\.[0-9]{2}-)(.*?-.*?)"; // Now process all of the roms #if NET452_OR_GREATER || NETCOREAPP Parallel.ForEach(datFile.Items.Keys, Globals.ParallelOptions, key => #elif NET40_OR_GREATER Parallel.ForEach(datFile.Items.Keys, key => #else foreach (var key in datFile.Items.Keys) #endif { var items = datFile.Items[key]; if (items == null) #if NET40_OR_GREATER || NETCOREAPP return; #else continue; #endif for (int j = 0; j < items.Count; j++) { DatItem item = items[j]; if (Regex.IsMatch(item.GetFieldValue(DatItem.MachineKey)!.GetFieldValue(Models.Metadata.Machine.NameKey)!, pattern)) item.GetFieldValue(DatItem.MachineKey)!.SetFieldValue(Models.Metadata.Machine.NameKey, Regex.Replace(item.GetFieldValue(DatItem.MachineKey)!.GetFieldValue(Models.Metadata.Machine.NameKey)!, pattern, "$2")); if (Regex.IsMatch(item.GetFieldValue(DatItem.MachineKey)!.GetFieldValue(Models.Metadata.Machine.DescriptionKey)!, pattern)) item.GetFieldValue(DatItem.MachineKey)!.SetFieldValue(Models.Metadata.Machine.DescriptionKey, Regex.Replace(item.GetFieldValue(DatItem.MachineKey)!.GetFieldValue(Models.Metadata.Machine.DescriptionKey)!, pattern, "$2")); items[j] = item; } datFile.Items.Remove(key); datFile.Items.AddRange(key, items); #if NET40_OR_GREATER || NETCOREAPP }); #else } #endif } #endregion } }