using System; #if NET40_OR_GREATER || NETCOREAPP using System.Collections.Concurrent; #endif using System.Collections.Generic; using System.IO; using System.Linq; using System.Runtime.CompilerServices; using System.Text.RegularExpressions; #if NET40_OR_GREATER || NETCOREAPP using System.Threading.Tasks; #endif using SabreTools.Core; using SabreTools.Core.Tools; using SabreTools.DatFiles; using SabreTools.DatItems; using SabreTools.Logging; [assembly: InternalsVisibleTo("SabreTools.Test")] namespace SabreTools.Filtering { /// /// Represents the cleaning operations that need to be performed on a set of items, usually a DAT /// public class Cleaner { #region Fields /// /// Clean all names to WoD standards /// public bool Clean { get; set; } /// /// Deduplicate items using the given method /// public DedupeType DedupeRoms { get; set; } /// /// Set Machine Description from Machine Name /// public bool DescriptionAsName { get; set; } /// /// Keep machines that don't contain any items /// public bool KeepEmptyGames { get; set; } /// /// Enable "One Rom, One Region (1G1R)" mode /// public bool OneGamePerRegion { get; set; } /// /// Ordered list of regions for "One Rom, One Region (1G1R)" mode /// public List? RegionList { get; set; } /// /// Ensure each rom is in their own game /// public bool OneRomPerGame { get; set; } /// /// Remove all unicode characters /// public bool RemoveUnicode { get; set; } /// /// Include root directory when determing trim sizes /// public string? Root { get; set; } /// /// Remove scene dates from the beginning of machine names /// public bool SceneDateStrip { get; set; } /// /// Change all machine names to "!" /// public bool Single { get; set; } /// /// Trim total machine and item name to not exceed NTFS limits /// public bool Trim { get; set; } #endregion #region Logging /// /// Logging object /// private readonly Logger logger = new(); #endregion #region Running /// /// Apply cleaning methods to the DatFile /// /// Current DatFile object to run operations on /// True if the error that is thrown should be thrown back to the caller, false otherwise /// True if cleaning was successful, false on error public bool ApplyCleaning(DatFile datFile, bool throwOnError = false) { InternalStopwatch watch = new("Applying cleaning steps to DAT"); try { // Perform item-level cleaning CleanDatItems(datFile); // Bucket and dedupe according to the flag if (DedupeRoms == DedupeType.Full) datFile.Items.BucketBy(ItemKey.CRC, DedupeRoms); else if (DedupeRoms == DedupeType.Game) datFile.Items.BucketBy(ItemKey.Machine, DedupeRoms); // Process description to machine name if (DescriptionAsName == true) { datFile.Items.MachineDescriptionToName(throwOnError); datFile.ItemsDB.MachineDescriptionToName(throwOnError); } // If we are removing scene dates, do that now if (SceneDateStrip == true) StripSceneDatesFromItems(datFile); // Run the one rom per game logic, if required if (OneGamePerRegion == true && RegionList != null) { SetOneGamePerRegion(datFile); datFile.ItemsDB.SetOneGamePerRegion(RegionList); } // Run the one rom per game logic, if required if (OneRomPerGame == true) SetOneRomPerGame(datFile); // Remove all marked items datFile.Items.ClearMarked(); datFile.ItemsDB.ClearMarked(); // We remove any blanks, if we aren't supposed to have any if (KeepEmptyGames == false) { datFile.Items.ClearEmpty(); datFile.ItemsDB.ClearEmpty(); } } catch (Exception ex) when (!throwOnError) { logger.Error(ex); return false; } finally { watch.Stop(); } return true; } /// /// Clean individual items based on the current filter /// /// Current DatFile object to run operations on internal void CleanDatItems(DatFile datFile) { List keys = datFile.Items.Keys.ToList(); foreach (string key in keys) { // For every item in the current key var items = datFile.Items[key]; if (items == null) continue; foreach (DatItem item in items) { // If we have a null item, we can't clean it it if (item == null) continue; // Run cleaning per item CleanDatItem(item); } // Assign back for caution datFile.Items[key] = items; } } /// /// Clean a DatItem according to the cleaner /// /// DatItem to clean internal void CleanDatItem(DatItem datItem) { // If we're stripping unicode characters, strip machine name and description if (RemoveUnicode) { datItem.GetFieldValue(DatItem.MachineKey)!.SetFieldValue(Models.Metadata.Machine.NameKey, TextHelper.RemoveUnicodeCharacters(datItem.GetFieldValue(DatItem.MachineKey)!.GetStringFieldValue(Models.Metadata.Machine.NameKey))); datItem.GetFieldValue(DatItem.MachineKey)!.SetFieldValue(Models.Metadata.Machine.DescriptionKey, TextHelper.RemoveUnicodeCharacters(datItem.GetFieldValue(DatItem.MachineKey)!.GetStringFieldValue(Models.Metadata.Machine.DescriptionKey))); datItem.SetName(TextHelper.RemoveUnicodeCharacters(datItem.GetName())); } // If we're in cleaning mode, sanitize machine name and description if (Clean) { datItem.GetFieldValue(DatItem.MachineKey)!.SetFieldValue(Models.Metadata.Machine.NameKey, TextHelper.NormalizeCharacters(datItem.GetFieldValue(DatItem.MachineKey)!.GetStringFieldValue(Models.Metadata.Machine.NameKey))); datItem.GetFieldValue(DatItem.MachineKey)!.SetFieldValue(Models.Metadata.Machine.DescriptionKey, TextHelper.NormalizeCharacters(datItem.GetFieldValue(DatItem.MachineKey)!.GetStringFieldValue(Models.Metadata.Machine.DescriptionKey))); } // If we are in single game mode, rename the machine if (Single) datItem.GetFieldValue(DatItem.MachineKey)!.SetFieldValue(Models.Metadata.Machine.NameKey, "!"); // If we are in NTFS trim mode, trim the item name if (Trim && datItem.GetName() != null) { // Windows max name length is 260 int usableLength = 260 - datItem.GetFieldValue(DatItem.MachineKey)!.GetStringFieldValue(Models.Metadata.Machine.NameKey)!.Length - (Root?.Length ?? 0); if (datItem.GetName()!.Length > usableLength) { string ext = Path.GetExtension(datItem.GetName()!); datItem.SetName(datItem.GetName()!.Substring(0, usableLength - ext.Length) + ext); } } } /// /// Filter a DAT using 1G1R logic given an ordered set of regions /// /// Current DatFile object to run operations on /// /// In the most technical sense, the way that the region list is being used does not /// confine its values to be just regions. Since it's essentially acting like a /// specialized version of the machine name filter, anything that is usually encapsulated /// in parenthesis would be matched on, including disc numbers, languages, editions, /// and anything else commonly used. Please note that, unlike other existing 1G1R /// solutions, this does not have the ability to contain custom mappings of parent /// to clone sets based on name, nor does it have the ability to match on the /// Release DatItem type. /// internal void SetOneGamePerRegion(DatFile datFile) { // If we have null region list, make it empty RegionList ??= []; // For sake of ease, the first thing we want to do is bucket by game datFile.Items.BucketBy(ItemKey.Machine, DedupeType.None, norename: true); // Then we want to get a mapping of all machines to parents Dictionary> parents = []; foreach (string key in datFile.Items.Keys) { DatItem item = datFile.Items[key]![0]; // Match on CloneOf first if (!string.IsNullOrEmpty(item.GetFieldValue(DatItem.MachineKey)!.GetStringFieldValue(Models.Metadata.Machine.CloneOfKey))) { if (!parents.ContainsKey(item.GetFieldValue(DatItem.MachineKey)!.GetStringFieldValue(Models.Metadata.Machine.CloneOfKey)!.ToLowerInvariant())) parents.Add(item.GetFieldValue(DatItem.MachineKey)!.GetStringFieldValue(Models.Metadata.Machine.CloneOfKey)!.ToLowerInvariant(), []); parents[item.GetFieldValue(DatItem.MachineKey)!.GetStringFieldValue(Models.Metadata.Machine.CloneOfKey)!.ToLowerInvariant()].Add(item.GetFieldValue(DatItem.MachineKey)!.GetStringFieldValue(Models.Metadata.Machine.NameKey)!.ToLowerInvariant()); } // Then by RomOf else if (!string.IsNullOrEmpty(item.GetFieldValue(DatItem.MachineKey)!.GetStringFieldValue(Models.Metadata.Machine.RomOfKey))) { if (!parents.ContainsKey(item.GetFieldValue(DatItem.MachineKey)!.GetStringFieldValue(Models.Metadata.Machine.RomOfKey)!.ToLowerInvariant())) parents.Add(item.GetFieldValue(DatItem.MachineKey)!.GetStringFieldValue(Models.Metadata.Machine.RomOfKey)!.ToLowerInvariant(), []); parents[item.GetFieldValue(DatItem.MachineKey)!.GetStringFieldValue(Models.Metadata.Machine.RomOfKey)!.ToLowerInvariant()].Add(item.GetFieldValue(DatItem.MachineKey)!.GetStringFieldValue(Models.Metadata.Machine.NameKey)!.ToLowerInvariant()); } // Otherwise, treat it as a parent else { if (!parents.ContainsKey(item.GetFieldValue(DatItem.MachineKey)!.GetStringFieldValue(Models.Metadata.Machine.NameKey)!.ToLowerInvariant())) parents.Add(item.GetFieldValue(DatItem.MachineKey)!.GetStringFieldValue(Models.Metadata.Machine.NameKey)!.ToLowerInvariant(), []); parents[item.GetFieldValue(DatItem.MachineKey)!.GetStringFieldValue(Models.Metadata.Machine.NameKey)!.ToLowerInvariant()].Add(item.GetFieldValue(DatItem.MachineKey)!.GetStringFieldValue(Models.Metadata.Machine.NameKey)!.ToLowerInvariant()); } } // Once we have the full list of mappings, filter out games to keep foreach (string key in parents.Keys) { // Find the first machine that matches the regions in order, if possible string? machine = default; foreach (string region in RegionList) { machine = parents[key].FirstOrDefault(m => Regex.IsMatch(m, @"\(.*" + region + @".*\)", RegexOptions.IgnoreCase)); if (machine != default) break; } // If we didn't get a match, use the parent if (machine == default) machine = key; // Remove the key from the list parents[key].Remove(machine); // Remove the rest of the items from this key parents[key].ForEach(k => datFile.Items.Remove(k)); } // Finally, strip out the parent tags Splitter.RemoveTagsFromChild(datFile); datFile.ItemsDB.RemoveTagsFromChild(); } /// /// Ensure that all roms are in their own game (or at least try to ensure) /// /// Current DatFile object to run operations on internal static void SetOneRomPerGame(DatFile datFile) { // Because this introduces subfolders, we need to set the SuperDAT type datFile.Header.SetFieldValue(Models.Metadata.Header.TypeKey, "SuperDAT"); datFile.Items.SetOneRomPerGame(); datFile.ItemsDB.SetOneRomPerGame(); } /// /// Set internal names to match One Rom Per Game (ORPG) logic /// /// DatItem to run logic on internal static void SetOneRomPerGame(DatItem datItem) { if (datItem.GetName() == null) return; string[] splitname = datItem.GetName()!.Split('.'); #if NET20 || NET35 datItem.GetFieldValue(DatItem.MachineKey)!.SetFieldValue(Models.Metadata.Machine.NameKey, datItem.GetFieldValue(DatItem.MachineKey)!.GetStringFieldValue(Models.Metadata.Machine.NameKey) + $"/{string.Join(".", splitname.Take(splitname.Length > 1 ? splitname.Length - 1 : 1).ToArray())}"); #else datItem.GetFieldValue(DatItem.MachineKey)!.SetFieldValue(Models.Metadata.Machine.NameKey, datItem.GetFieldValue(DatItem.MachineKey)!.GetStringFieldValue(Models.Metadata.Machine.NameKey) + $"/{string.Join(".", splitname.Take(splitname.Length > 1 ? splitname.Length - 1 : 1))}"); #endif datItem.SetName(Path.GetFileName(datItem.GetName())); } /// /// Strip the dates from the beginning of scene-style set names /// /// Current DatFile object to run operations on internal void StripSceneDatesFromItems(DatFile datFile) { // Output the logging statement logger.User("Stripping scene-style dates"); datFile.Items.StripSceneDatesFromItems(); datFile.ItemsDB.StripSceneDatesFromItems(); } #endregion } }