Files
SabreTools/SabreTools.Filtering/Cleaner.cs

365 lines
16 KiB
C#
Raw Normal View History

using System;
2024-02-28 23:09:31 -05:00
#if NET40_OR_GREATER || NETCOREAPP
using System.Collections.Concurrent;
2024-02-28 23:09:31 -05:00
#endif
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Runtime.CompilerServices;
using System.Text.RegularExpressions;
2024-03-05 03:04:47 -05:00
#if NET40_OR_GREATER || NETCOREAPP
2021-01-29 22:54:16 -08:00
using System.Threading.Tasks;
2024-03-05 03:04:47 -05:00
#endif
2020-12-08 13:23:59 -08:00
using SabreTools.Core;
2023-08-15 01:58:47 -04:00
using SabreTools.Core.Tools;
2020-12-14 11:16:48 -08:00
using SabreTools.DatFiles;
using SabreTools.DatItems;
using SabreTools.Logging;
2020-08-28 13:33:05 -07:00
[assembly: InternalsVisibleTo("SabreTools.Test")]
2020-12-08 13:48:57 -08:00
namespace SabreTools.Filtering
2020-08-28 13:33:05 -07:00
{
/// <summary>
/// Represents the cleaning operations that need to be performed on a set of items, usually a DAT
/// </summary>
2020-08-28 13:33:05 -07:00
public class Cleaner
{
#region Fields
2020-12-13 13:22:06 -08:00
2020-08-28 13:33:05 -07:00
/// <summary>
/// Clean all names to WoD standards
/// </summary>
public bool Clean { get; set; }
/// <summary>
/// Deduplicate items using the given method
/// </summary>
public DedupeType DedupeRoms { get; set; }
2020-08-28 13:33:05 -07:00
/// <summary>
/// Set Machine Description from Machine Name
/// </summary>
public bool DescriptionAsName { get; set; }
/// <summary>
/// Keep machines that don't contain any items
/// </summary>
public bool KeepEmptyGames { get; set; }
/// <summary>
/// Enable "One Rom, One Region (1G1R)" mode
/// </summary>
public bool OneGamePerRegion { get; set; }
/// <summary>
/// Ordered list of regions for "One Rom, One Region (1G1R)" mode
/// </summary>
2024-02-28 19:19:50 -05:00
public List<string>? RegionList { get; set; }
2020-08-28 13:33:05 -07:00
/// <summary>
/// Ensure each rom is in their own game
/// </summary>
public bool OneRomPerGame { get; set; }
/// <summary>
/// Remove all unicode characters
/// </summary>
public bool RemoveUnicode { get; set; }
/// <summary>
/// Include root directory when determing trim sizes
/// </summary>
2024-02-28 19:19:50 -05:00
public string? Root { get; set; }
2020-08-28 13:33:05 -07:00
/// <summary>
/// Remove scene dates from the beginning of machine names
/// </summary>
public bool SceneDateStrip { get; set; }
/// <summary>
/// Change all machine names to "!"
/// </summary>
public bool Single { get; set; }
/// <summary>
/// Trim total machine and item name to not exceed NTFS limits
/// </summary>
public bool Trim { get; set; }
2024-02-28 19:19:50 -05:00
#endregion
#region Logging
/// <summary>
/// Logging object
/// </summary>
private readonly Logger logger = new();
#endregion
#region Running
/// <summary>
/// Apply cleaning methods to the DatFile
/// </summary>
/// <param name="datFile">Current DatFile object to run operations on</param>
/// <param name="throwOnError">True if the error that is thrown should be thrown back to the caller, false otherwise</param>
/// <returns>True if cleaning was successful, false on error</returns>
public bool ApplyCleaning(DatFile datFile, bool throwOnError = false)
{
InternalStopwatch watch = new("Applying cleaning steps to DAT");
2021-02-02 14:09:49 -08:00
try
{
// Perform item-level cleaning
CleanDatItems(datFile);
// Bucket and dedupe according to the flag
if (DedupeRoms == DedupeType.Full)
datFile.Items.BucketBy(ItemKey.CRC, DedupeRoms);
else if (DedupeRoms == DedupeType.Game)
datFile.Items.BucketBy(ItemKey.Machine, DedupeRoms);
// Process description to machine name
if (DescriptionAsName == true)
{
datFile.Items.MachineDescriptionToName(throwOnError);
datFile.ItemsDB.MachineDescriptionToName(throwOnError);
}
// If we are removing scene dates, do that now
if (SceneDateStrip == true)
StripSceneDatesFromItems(datFile);
// Run the one rom per game logic, if required
if (OneGamePerRegion == true)
SetOneGamePerRegion(datFile);
// Run the one rom per game logic, if required
if (OneRomPerGame == true)
SetOneRomPerGame(datFile);
// Remove all marked items
datFile.Items.ClearMarked();
datFile.ItemsDB.ClearMarked();
// We remove any blanks, if we aren't supposed to have any
if (KeepEmptyGames == false)
{
datFile.Items.ClearEmpty();
datFile.ItemsDB.ClearEmpty();
}
}
catch (Exception ex) when (!throwOnError)
{
logger.Error(ex);
return false;
}
2021-02-02 14:09:49 -08:00
finally
{
watch.Stop();
}
return true;
}
/// <summary>
/// Clean individual items based on the current filter
/// </summary>
/// <param name="datFile">Current DatFile object to run operations on</param>
internal void CleanDatItems(DatFile datFile)
{
List<string> keys = datFile.Items.Keys.ToList();
foreach (string key in keys)
{
// For every item in the current key
2024-02-28 19:19:50 -05:00
var items = datFile.Items[key];
if (items == null)
continue;
foreach (DatItem item in items)
{
// If we have a null item, we can't clean it it
if (item == null)
continue;
// Run cleaning per item
CleanDatItem(item);
}
// Assign back for caution
datFile.Items[key] = items;
}
}
/// <summary>
/// Clean a DatItem according to the cleaner
/// </summary>
/// <param name="datItem">DatItem to clean</param>
internal void CleanDatItem(DatItem datItem)
{
// If we're stripping unicode characters, strip machine name and description
2020-12-18 23:31:38 -08:00
if (RemoveUnicode)
{
datItem.GetFieldValue<Machine>(DatItem.MachineKey)!.SetFieldValue<string?>(Models.Metadata.Machine.NameKey, TextHelper.RemoveUnicodeCharacters(datItem.GetFieldValue<Machine>(DatItem.MachineKey)!.GetStringFieldValue(Models.Metadata.Machine.NameKey)));
datItem.GetFieldValue<Machine>(DatItem.MachineKey)!.SetFieldValue<string?>(Models.Metadata.Machine.DescriptionKey, TextHelper.RemoveUnicodeCharacters(datItem.GetFieldValue<Machine>(DatItem.MachineKey)!.GetStringFieldValue(Models.Metadata.Machine.DescriptionKey)));
2023-08-15 01:58:47 -04:00
datItem.SetName(TextHelper.RemoveUnicodeCharacters(datItem.GetName()));
}
// If we're in cleaning mode, sanitize machine name and description
2020-12-18 23:31:38 -08:00
if (Clean)
{
datItem.GetFieldValue<Machine>(DatItem.MachineKey)!.SetFieldValue<string?>(Models.Metadata.Machine.NameKey, TextHelper.NormalizeCharacters(datItem.GetFieldValue<Machine>(DatItem.MachineKey)!.GetStringFieldValue(Models.Metadata.Machine.NameKey)));
datItem.GetFieldValue<Machine>(DatItem.MachineKey)!.SetFieldValue<string?>(Models.Metadata.Machine.DescriptionKey, TextHelper.NormalizeCharacters(datItem.GetFieldValue<Machine>(DatItem.MachineKey)!.GetStringFieldValue(Models.Metadata.Machine.DescriptionKey)));
}
// If we are in single game mode, rename the machine
2020-12-18 23:31:38 -08:00
if (Single)
2024-03-10 16:49:07 -04:00
datItem.GetFieldValue<Machine>(DatItem.MachineKey)!.SetFieldValue<string?>(Models.Metadata.Machine.NameKey, "!");
// If we are in NTFS trim mode, trim the item name
2020-12-18 23:31:38 -08:00
if (Trim && datItem.GetName() != null)
{
// Windows max name length is 260
int usableLength = 260 - datItem.GetFieldValue<Machine>(DatItem.MachineKey)!.GetStringFieldValue(Models.Metadata.Machine.NameKey)!.Length - (Root?.Length ?? 0);
2024-02-28 19:19:50 -05:00
if (datItem.GetName()!.Length > usableLength)
{
2024-02-28 19:19:50 -05:00
string ext = Path.GetExtension(datItem.GetName()!);
2024-02-28 23:09:31 -05:00
datItem.SetName(datItem.GetName()!.Substring(0, usableLength - ext.Length) + ext);
}
}
}
/// <summary>
/// Filter a DAT using 1G1R logic given an ordered set of regions
/// </summary>
/// <param name="datFile">Current DatFile object to run operations on</param>
/// <remarks>
/// In the most technical sense, the way that the region list is being used does not
/// confine its values to be just regions. Since it's essentially acting like a
/// specialized version of the machine name filter, anything that is usually encapsulated
/// in parenthesis would be matched on, including disc numbers, languages, editions,
/// and anything else commonly used. Please note that, unlike other existing 1G1R
/// solutions, this does not have the ability to contain custom mappings of parent
/// to clone sets based on name, nor does it have the ability to match on the
/// Release DatItem type.
/// </remarks>
internal void SetOneGamePerRegion(DatFile datFile)
{
// If we have null region list, make it empty
2024-02-28 19:19:50 -05:00
RegionList ??= [];
// For sake of ease, the first thing we want to do is bucket by game
datFile.Items.BucketBy(ItemKey.Machine, DedupeType.None, norename: true);
// Then we want to get a mapping of all machines to parents
2024-02-28 19:19:50 -05:00
Dictionary<string, List<string>> parents = [];
foreach (string key in datFile.Items.Keys)
{
2024-02-28 19:19:50 -05:00
DatItem item = datFile.Items[key]![0];
// Match on CloneOf first
if (!string.IsNullOrEmpty(item.GetFieldValue<Machine>(DatItem.MachineKey)!.GetStringFieldValue(Models.Metadata.Machine.CloneOfKey)))
{
if (!parents.ContainsKey(item.GetFieldValue<Machine>(DatItem.MachineKey)!.GetStringFieldValue(Models.Metadata.Machine.CloneOfKey)!.ToLowerInvariant()))
parents.Add(item.GetFieldValue<Machine>(DatItem.MachineKey)!.GetStringFieldValue(Models.Metadata.Machine.CloneOfKey)!.ToLowerInvariant(), []);
parents[item.GetFieldValue<Machine>(DatItem.MachineKey)!.GetStringFieldValue(Models.Metadata.Machine.CloneOfKey)!.ToLowerInvariant()].Add(item.GetFieldValue<Machine>(DatItem.MachineKey)!.GetStringFieldValue(Models.Metadata.Machine.NameKey)!.ToLowerInvariant());
}
// Then by RomOf
else if (!string.IsNullOrEmpty(item.GetFieldValue<Machine>(DatItem.MachineKey)!.GetStringFieldValue(Models.Metadata.Machine.RomOfKey)))
{
if (!parents.ContainsKey(item.GetFieldValue<Machine>(DatItem.MachineKey)!.GetStringFieldValue(Models.Metadata.Machine.RomOfKey)!.ToLowerInvariant()))
parents.Add(item.GetFieldValue<Machine>(DatItem.MachineKey)!.GetStringFieldValue(Models.Metadata.Machine.RomOfKey)!.ToLowerInvariant(), []);
parents[item.GetFieldValue<Machine>(DatItem.MachineKey)!.GetStringFieldValue(Models.Metadata.Machine.RomOfKey)!.ToLowerInvariant()].Add(item.GetFieldValue<Machine>(DatItem.MachineKey)!.GetStringFieldValue(Models.Metadata.Machine.NameKey)!.ToLowerInvariant());
}
// Otherwise, treat it as a parent
else
{
if (!parents.ContainsKey(item.GetFieldValue<Machine>(DatItem.MachineKey)!.GetStringFieldValue(Models.Metadata.Machine.NameKey)!.ToLowerInvariant()))
parents.Add(item.GetFieldValue<Machine>(DatItem.MachineKey)!.GetStringFieldValue(Models.Metadata.Machine.NameKey)!.ToLowerInvariant(), []);
parents[item.GetFieldValue<Machine>(DatItem.MachineKey)!.GetStringFieldValue(Models.Metadata.Machine.NameKey)!.ToLowerInvariant()].Add(item.GetFieldValue<Machine>(DatItem.MachineKey)!.GetStringFieldValue(Models.Metadata.Machine.NameKey)!.ToLowerInvariant());
}
}
// Once we have the full list of mappings, filter out games to keep
foreach (string key in parents.Keys)
{
// Find the first machine that matches the regions in order, if possible
2024-02-28 19:19:50 -05:00
string? machine = default;
foreach (string region in RegionList)
{
machine = parents[key].FirstOrDefault(m => Regex.IsMatch(m, @"\(.*" + region + @".*\)", RegexOptions.IgnoreCase));
if (machine != default)
break;
}
// If we didn't get a match, use the parent
if (machine == default)
machine = key;
// Remove the key from the list
parents[key].Remove(machine);
// Remove the rest of the items from this key
parents[key].ForEach(k => datFile.Items.Remove(k));
}
// Finally, strip out the parent tags
Splitter.RemoveTagsFromChild(datFile);
Splitter.RemoveTagsFromChildDB(datFile);
}
/// <summary>
/// Ensure that all roms are in their own game (or at least try to ensure)
/// </summary>
/// <param name="datFile">Current DatFile object to run operations on</param>
internal static void SetOneRomPerGame(DatFile datFile)
{
// Because this introduces subfolders, we need to set the SuperDAT type
2024-03-10 04:10:37 -04:00
datFile.Header.SetFieldValue<string?>(Models.Metadata.Header.TypeKey, "SuperDAT");
2024-03-19 16:18:35 -04:00
datFile.Items.SetOneRomPerGame();
datFile.ItemsDB.SetOneRomPerGame();
}
2021-02-01 12:35:59 -08:00
/// <summary>
/// Set internal names to match One Rom Per Game (ORPG) logic
/// </summary>
/// <param name="datItem">DatItem to run logic on</param>
internal static void SetOneRomPerGame(DatItem datItem)
2021-02-01 12:35:59 -08:00
{
if (datItem.GetName() == null)
return;
2024-02-28 19:19:50 -05:00
string[] splitname = datItem.GetName()!.Split('.');
2024-02-28 23:09:31 -05:00
#if NET20 || NET35
datItem.GetFieldValue<Machine>(DatItem.MachineKey)!.SetFieldValue<string?>(Models.Metadata.Machine.NameKey, datItem.GetFieldValue<Machine>(DatItem.MachineKey)!.GetStringFieldValue(Models.Metadata.Machine.NameKey) + $"/{string.Join(".", splitname.Take(splitname.Length > 1 ? splitname.Length - 1 : 1).ToArray())}");
2024-02-28 23:09:31 -05:00
#else
datItem.GetFieldValue<Machine>(DatItem.MachineKey)!.SetFieldValue<string?>(Models.Metadata.Machine.NameKey, datItem.GetFieldValue<Machine>(DatItem.MachineKey)!.GetStringFieldValue(Models.Metadata.Machine.NameKey) + $"/{string.Join(".", splitname.Take(splitname.Length > 1 ? splitname.Length - 1 : 1))}");
2024-02-28 23:09:31 -05:00
#endif
2021-02-01 12:35:59 -08:00
datItem.SetName(Path.GetFileName(datItem.GetName()));
}
/// <summary>
/// Strip the dates from the beginning of scene-style set names
/// </summary>
/// <param name="datFile">Current DatFile object to run operations on</param>
internal void StripSceneDatesFromItems(DatFile datFile)
{
// Output the logging statement
logger.User("Stripping scene-style dates");
datFile.Items.StripSceneDatesFromItems();
datFile.ItemsDB.StripSceneDatesFromItems();
}
2020-12-13 22:06:47 -08:00
#endregion
2020-08-28 13:33:05 -07:00
}
}