using System;
#if NET40_OR_GREATER || NETCOREAPP
using System.Collections.Concurrent;
#endif
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Runtime.CompilerServices;
using System.Text.RegularExpressions;
#if NET40_OR_GREATER || NETCOREAPP
using System.Threading.Tasks;
#endif
using SabreTools.Core;
using SabreTools.Core.Tools;
using SabreTools.DatFiles;
using SabreTools.DatItems;
using SabreTools.Logging;
[assembly: InternalsVisibleTo("SabreTools.Test")]
namespace SabreTools.Filtering
{
///
/// Represents the cleaning operations that need to be performed on a set of items, usually a DAT
///
public class Cleaner
{
#region Fields
///
/// Clean all names to WoD standards
///
public bool Clean { get; set; }
///
/// Deduplicate items using the given method
///
public DedupeType DedupeRoms { get; set; }
///
/// Set Machine Description from Machine Name
///
public bool DescriptionAsName { get; set; }
///
/// Keep machines that don't contain any items
///
public bool KeepEmptyGames { get; set; }
///
/// Enable "One Rom, One Region (1G1R)" mode
///
public bool OneGamePerRegion { get; set; }
///
/// Ordered list of regions for "One Rom, One Region (1G1R)" mode
///
public List? RegionList { get; set; }
///
/// Ensure each rom is in their own game
///
public bool OneRomPerGame { get; set; }
///
/// Remove all unicode characters
///
public bool RemoveUnicode { get; set; }
///
/// Include root directory when determing trim sizes
///
public string? Root { get; set; }
///
/// Remove scene dates from the beginning of machine names
///
public bool SceneDateStrip { get; set; }
///
/// Change all machine names to "!"
///
public bool Single { get; set; }
///
/// Trim total machine and item name to not exceed NTFS limits
///
public bool Trim { get; set; }
#endregion
#region Logging
///
/// Logging object
///
private readonly Logger logger = new();
#endregion
#region Running
///
/// Apply cleaning methods to the DatFile
///
/// Current DatFile object to run operations on
/// True if the error that is thrown should be thrown back to the caller, false otherwise
/// True if cleaning was successful, false on error
public bool ApplyCleaning(DatFile datFile, bool throwOnError = false)
{
InternalStopwatch watch = new("Applying cleaning steps to DAT");
try
{
// Perform item-level cleaning
CleanDatItems(datFile);
// Bucket and dedupe according to the flag
if (DedupeRoms == DedupeType.Full)
datFile.Items.BucketBy(ItemKey.CRC, DedupeRoms);
else if (DedupeRoms == DedupeType.Game)
datFile.Items.BucketBy(ItemKey.Machine, DedupeRoms);
// Process description to machine name
if (DescriptionAsName == true)
MachineDescriptionToName(datFile);
// If we are removing scene dates, do that now
if (SceneDateStrip == true)
StripSceneDatesFromItems(datFile);
// Run the one rom per game logic, if required
if (OneGamePerRegion == true)
SetOneGamePerRegion(datFile);
// Run the one rom per game logic, if required
if (OneRomPerGame == true)
SetOneRomPerGame(datFile);
// Remove all marked items
datFile.Items.ClearMarked();
// We remove any blanks, if we aren't supposed to have any
if (KeepEmptyGames == false)
datFile.Items.ClearEmpty();
}
catch (Exception ex) when (!throwOnError)
{
logger.Error(ex);
return false;
}
finally
{
watch.Stop();
}
return true;
}
///
/// Clean individual items based on the current filter
///
/// Current DatFile object to run operations on
internal void CleanDatItems(DatFile datFile)
{
List keys = datFile.Items.Keys.ToList();
foreach (string key in keys)
{
// For every item in the current key
var items = datFile.Items[key];
if (items == null)
continue;
foreach (DatItem item in items)
{
// If we have a null item, we can't clean it it
if (item == null)
continue;
// Run cleaning per item
CleanDatItem(item);
}
// Assign back for caution
datFile.Items[key] = items;
}
}
///
/// Clean a DatItem according to the cleaner
///
/// DatItem to clean
internal void CleanDatItem(DatItem datItem)
{
// If we're stripping unicode characters, strip machine name and description
if (RemoveUnicode)
{
datItem.Machine.SetFieldValue(Models.Metadata.Machine.NameKey, TextHelper.RemoveUnicodeCharacters(datItem.Machine.GetFieldValue(Models.Metadata.Machine.NameKey)));
datItem.Machine.SetFieldValue(Models.Metadata.Machine.DescriptionKey, TextHelper.RemoveUnicodeCharacters(datItem.Machine.GetFieldValue(Models.Metadata.Machine.DescriptionKey)));
datItem.SetName(TextHelper.RemoveUnicodeCharacters(datItem.GetName()));
}
// If we're in cleaning mode, sanitize machine name and description
if (Clean)
{
datItem.Machine.SetFieldValue(Models.Metadata.Machine.NameKey, TextHelper.NormalizeCharacters(datItem.Machine.GetFieldValue(Models.Metadata.Machine.NameKey)));
datItem.Machine.SetFieldValue(Models.Metadata.Machine.DescriptionKey, TextHelper.NormalizeCharacters(datItem.Machine.GetFieldValue(Models.Metadata.Machine.DescriptionKey)));
}
// If we are in single game mode, rename the machine
if (Single)
datItem.Machine.SetFieldValue(Models.Metadata.Machine.NameKey, "!");
// If we are in NTFS trim mode, trim the item name
if (Trim && datItem.GetName() != null)
{
// Windows max name length is 260
int usableLength = 260 - datItem.Machine.GetFieldValue(Models.Metadata.Machine.NameKey)!.Length - (Root?.Length ?? 0);
if (datItem.GetName()!.Length > usableLength)
{
string ext = Path.GetExtension(datItem.GetName()!);
datItem.SetName(datItem.GetName()!.Substring(0, usableLength - ext.Length) + ext);
}
}
}
///
/// Use game descriptions as names in the DAT, updating cloneof/romof/sampleof
///
/// Current DatFile object to run operations on
/// True if the error that is thrown should be thrown back to the caller, false otherwise
internal void MachineDescriptionToName(DatFile datFile, bool throwOnError = false)
{
try
{
// First we want to get a mapping for all games to description
#if NET40_OR_GREATER || NETCOREAPP
ConcurrentDictionary concurrentDictionary = new();
ConcurrentDictionary mapping = concurrentDictionary;
#else
Dictionary concurrentDictionary = [];
Dictionary mapping = concurrentDictionary;
#endif
#if NET452_OR_GREATER || NETCOREAPP
Parallel.ForEach(datFile.Items.Keys, Globals.ParallelOptions, key =>
#elif NET40_OR_GREATER
Parallel.ForEach(datFile.Items.Keys, key =>
#else
foreach (var key in datFile.Items.Keys)
#endif
{
var items = datFile.Items[key];
if (items == null)
#if NET40_OR_GREATER || NETCOREAPP
return;
#else
continue;
#endif
foreach (DatItem item in items)
{
// If the key mapping doesn't exist, add it
#if NET40_OR_GREATER || NETCOREAPP
mapping.TryAdd(item.Machine.GetFieldValue(Models.Metadata.Machine.NameKey)!, item.Machine.GetFieldValue(Models.Metadata.Machine.DescriptionKey)!.Replace('/', '_').Replace("\"", "''").Replace(":", " -"));
#else
mapping[item.Machine.GetFieldValue(Models.Metadata.Machine.NameKey)!] = item.Machine.GetFieldValue(Models.Metadata.Machine.DescriptionKey)!.Replace('/', '_').Replace("\"", "''").Replace(":", " -");
#endif
}
#if NET40_OR_GREATER || NETCOREAPP
});
#else
}
#endif
// Now we loop through every item and update accordingly
#if NET452_OR_GREATER || NETCOREAPP
Parallel.ForEach(datFile.Items.Keys, Globals.ParallelOptions, key =>
#elif NET40_OR_GREATER
Parallel.ForEach(datFile.Items.Keys, key =>
#else
foreach (var key in datFile.Items.Keys)
#endif
{
var items = datFile.Items[key];
if (items == null)
#if NET40_OR_GREATER || NETCOREAPP
return;
#else
continue;
#endif
ConcurrentList newItems = [];
foreach (DatItem item in items)
{
// Update machine name
if (!string.IsNullOrEmpty(item.Machine.GetFieldValue(Models.Metadata.Machine.NameKey)) && mapping.ContainsKey(item.Machine.GetFieldValue(Models.Metadata.Machine.NameKey)!))
item.Machine.SetFieldValue(Models.Metadata.Machine.NameKey, mapping[item.Machine.GetFieldValue(Models.Metadata.Machine.NameKey)!]);
// Update cloneof
if (!string.IsNullOrEmpty(item.Machine.GetFieldValue(Models.Metadata.Machine.CloneOfKey)) && mapping.ContainsKey(item.Machine.GetFieldValue(Models.Metadata.Machine.CloneOfKey)!))
item.Machine.SetFieldValue(Models.Metadata.Machine.CloneOfKey, mapping[item.Machine.GetFieldValue(Models.Metadata.Machine.CloneOfKey)!]);
// Update romof
if (!string.IsNullOrEmpty(item.Machine.GetFieldValue(Models.Metadata.Machine.RomOfKey)) && mapping.ContainsKey(item.Machine.GetFieldValue(Models.Metadata.Machine.RomOfKey)!))
item.Machine.SetFieldValue(Models.Metadata.Machine.RomOfKey, mapping[item.Machine.GetFieldValue(Models.Metadata.Machine.RomOfKey)!]);
// Update sampleof
if (!string.IsNullOrEmpty(item.Machine.GetFieldValue(Models.Metadata.Machine.SampleOfKey)) && mapping.ContainsKey(item.Machine.GetFieldValue(Models.Metadata.Machine.SampleOfKey)!))
item.Machine.SetFieldValue(Models.Metadata.Machine.SampleOfKey, mapping[item.Machine.GetFieldValue(Models.Metadata.Machine.SampleOfKey)!]);
// Add the new item to the output list
newItems.Add(item);
}
// Replace the old list of roms with the new one
datFile.Items.Remove(key);
datFile.Items.AddRange(key, newItems);
#if NET40_OR_GREATER || NETCOREAPP
});
#else
}
#endif
}
catch (Exception ex) when (!throwOnError)
{
logger.Warning(ex.ToString());
}
}
///
/// Filter a DAT using 1G1R logic given an ordered set of regions
///
/// Current DatFile object to run operations on
///
/// In the most technical sense, the way that the region list is being used does not
/// confine its values to be just regions. Since it's essentially acting like a
/// specialized version of the machine name filter, anything that is usually encapsulated
/// in parenthesis would be matched on, including disc numbers, languages, editions,
/// and anything else commonly used. Please note that, unlike other existing 1G1R
/// solutions, this does not have the ability to contain custom mappings of parent
/// to clone sets based on name, nor does it have the ability to match on the
/// Release DatItem type.
///
internal void SetOneGamePerRegion(DatFile datFile)
{
// If we have null region list, make it empty
RegionList ??= [];
// For sake of ease, the first thing we want to do is bucket by game
datFile.Items.BucketBy(ItemKey.Machine, DedupeType.None, norename: true);
// Then we want to get a mapping of all machines to parents
Dictionary> parents = [];
foreach (string key in datFile.Items.Keys)
{
DatItem item = datFile.Items[key]![0];
// Match on CloneOf first
if (!string.IsNullOrEmpty(item.Machine.GetFieldValue(Models.Metadata.Machine.CloneOfKey)))
{
if (!parents.ContainsKey(item.Machine.GetFieldValue(Models.Metadata.Machine.CloneOfKey)!.ToLowerInvariant()))
parents.Add(item.Machine.GetFieldValue(Models.Metadata.Machine.CloneOfKey)!.ToLowerInvariant(), []);
parents[item.Machine.GetFieldValue(Models.Metadata.Machine.CloneOfKey)!.ToLowerInvariant()].Add(item.Machine.GetFieldValue(Models.Metadata.Machine.NameKey)!.ToLowerInvariant());
}
// Then by RomOf
else if (!string.IsNullOrEmpty(item.Machine.GetFieldValue(Models.Metadata.Machine.RomOfKey)))
{
if (!parents.ContainsKey(item.Machine.GetFieldValue(Models.Metadata.Machine.RomOfKey)!.ToLowerInvariant()))
parents.Add(item.Machine.GetFieldValue(Models.Metadata.Machine.RomOfKey)!.ToLowerInvariant(), []);
parents[item.Machine.GetFieldValue(Models.Metadata.Machine.RomOfKey)!.ToLowerInvariant()].Add(item.Machine.GetFieldValue(Models.Metadata.Machine.NameKey)!.ToLowerInvariant());
}
// Otherwise, treat it as a parent
else
{
if (!parents.ContainsKey(item.Machine.GetFieldValue(Models.Metadata.Machine.NameKey)!.ToLowerInvariant()))
parents.Add(item.Machine.GetFieldValue(Models.Metadata.Machine.NameKey)!.ToLowerInvariant(), []);
parents[item.Machine.GetFieldValue(Models.Metadata.Machine.NameKey)!.ToLowerInvariant()].Add(item.Machine.GetFieldValue(Models.Metadata.Machine.NameKey)!.ToLowerInvariant());
}
}
// Once we have the full list of mappings, filter out games to keep
foreach (string key in parents.Keys)
{
// Find the first machine that matches the regions in order, if possible
string? machine = default;
foreach (string region in RegionList)
{
machine = parents[key].FirstOrDefault(m => Regex.IsMatch(m, @"\(.*" + region + @".*\)", RegexOptions.IgnoreCase));
if (machine != default)
break;
}
// If we didn't get a match, use the parent
if (machine == default)
machine = key;
// Remove the key from the list
parents[key].Remove(machine);
// Remove the rest of the items from this key
parents[key].ForEach(k => datFile.Items.Remove(k));
}
// Finally, strip out the parent tags
Splitter.RemoveTagsFromChild(datFile);
}
///
/// Ensure that all roms are in their own game (or at least try to ensure)
///
/// Current DatFile object to run operations on
internal static void SetOneRomPerGame(DatFile datFile)
{
// Because this introduces subfolders, we need to set the SuperDAT type
datFile.Header.Type = "SuperDAT";
// For each rom, we want to update the game to be "/"
#if NET452_OR_GREATER || NETCOREAPP
Parallel.ForEach(datFile.Items.Keys, Globals.ParallelOptions, key =>
#elif NET40_OR_GREATER
Parallel.ForEach(datFile.Items.Keys, key =>
#else
foreach (var key in datFile.Items.Keys)
#endif
{
var items = datFile.Items[key];
if (items == null)
#if NET40_OR_GREATER || NETCOREAPP
return;
#else
continue;
#endif
for (int i = 0; i < items.Count; i++)
{
SetOneRomPerGame(items[i]);
}
#if NET40_OR_GREATER || NETCOREAPP
});
#else
}
#endif
}
///
/// Set internal names to match One Rom Per Game (ORPG) logic
///
/// DatItem to run logic on
internal static void SetOneRomPerGame(DatItem datItem)
{
if (datItem.GetName() == null)
return;
string[] splitname = datItem.GetName()!.Split('.');
#if NET20 || NET35
datItem.Machine.SetFieldValue(Models.Metadata.Machine.NameKey, datItem.Machine.GetFieldValue(Models.Metadata.Machine.NameKey) + $"/{string.Join(".", splitname.Take(splitname.Length > 1 ? splitname.Length - 1 : 1).ToArray())}");
#else
datItem.Machine.SetFieldValue(Models.Metadata.Machine.NameKey, datItem.Machine.GetFieldValue(Models.Metadata.Machine.NameKey) + $"/{string.Join(".", splitname.Take(splitname.Length > 1 ? splitname.Length - 1 : 1))}");
#endif
datItem.SetName(Path.GetFileName(datItem.GetName()));
}
///
/// Strip the dates from the beginning of scene-style set names
///
/// Current DatFile object to run operations on
internal void StripSceneDatesFromItems(DatFile datFile)
{
// Output the logging statement
logger.User("Stripping scene-style dates");
// Set the regex pattern to use
string pattern = @"([0-9]{2}\.[0-9]{2}\.[0-9]{2}-)(.*?-.*?)";
// Now process all of the roms
#if NET452_OR_GREATER || NETCOREAPP
Parallel.ForEach(datFile.Items.Keys, Globals.ParallelOptions, key =>
#elif NET40_OR_GREATER
Parallel.ForEach(datFile.Items.Keys, key =>
#else
foreach (var key in datFile.Items.Keys)
#endif
{
var items = datFile.Items[key];
if (items == null)
#if NET40_OR_GREATER || NETCOREAPP
return;
#else
continue;
#endif
for (int j = 0; j < items.Count; j++)
{
DatItem item = items[j];
if (Regex.IsMatch(item.Machine.GetFieldValue(Models.Metadata.Machine.NameKey)!, pattern))
item.Machine.SetFieldValue(Models.Metadata.Machine.NameKey, Regex.Replace(item.Machine.GetFieldValue(Models.Metadata.Machine.NameKey)!, pattern, "$2"));
if (Regex.IsMatch(item.Machine.GetFieldValue(Models.Metadata.Machine.DescriptionKey)!, pattern))
item.Machine.SetFieldValue(Models.Metadata.Machine.DescriptionKey, Regex.Replace(item.Machine.GetFieldValue(Models.Metadata.Machine.DescriptionKey)!, pattern, "$2"));
items[j] = item;
}
datFile.Items.Remove(key);
datFile.Items.AddRange(key, items);
#if NET40_OR_GREATER || NETCOREAPP
});
#else
}
#endif
}
#endregion
}
}