2021-02-01 11:43:38 -08:00
|
|
|
|
using System;
|
2024-02-28 23:09:31 -05:00
|
|
|
|
#if NET40_OR_GREATER || NETCOREAPP
|
2021-02-01 11:43:38 -08:00
|
|
|
|
using System.Collections.Concurrent;
|
2024-02-28 23:09:31 -05:00
|
|
|
|
#endif
|
2021-02-01 11:43:38 -08:00
|
|
|
|
using System.Collections.Generic;
|
2020-12-13 14:01:16 -08:00
|
|
|
|
using System.IO;
|
|
|
|
|
|
using System.Linq;
|
2021-02-01 11:43:38 -08:00
|
|
|
|
using System.Runtime.CompilerServices;
|
2020-12-13 14:01:16 -08:00
|
|
|
|
using System.Text.RegularExpressions;
|
2024-03-05 03:04:47 -05:00
|
|
|
|
#if NET40_OR_GREATER || NETCOREAPP
|
2021-01-29 22:54:16 -08:00
|
|
|
|
using System.Threading.Tasks;
|
2024-03-05 03:04:47 -05:00
|
|
|
|
#endif
|
2020-12-08 13:23:59 -08:00
|
|
|
|
using SabreTools.Core;
|
2023-08-15 01:58:47 -04:00
|
|
|
|
using SabreTools.Core.Tools;
|
2020-12-14 11:16:48 -08:00
|
|
|
|
using SabreTools.DatFiles;
|
2020-12-13 14:01:16 -08:00
|
|
|
|
using SabreTools.DatItems;
|
2020-12-16 10:49:38 -08:00
|
|
|
|
using SabreTools.Logging;
|
2020-08-28 13:33:05 -07:00
|
|
|
|
|
2021-02-01 11:43:38 -08:00
|
|
|
|
[assembly: InternalsVisibleTo("SabreTools.Test")]
|
2020-12-08 13:48:57 -08:00
|
|
|
|
namespace SabreTools.Filtering
|
2020-08-28 13:33:05 -07:00
|
|
|
|
{
|
|
|
|
|
|
/// <summary>
|
|
|
|
|
|
/// Represents the cleaning operations that need to be performed on a set of items, usually a DAT
|
|
|
|
|
|
/// </summary>
|
|
|
|
|
|
public class Cleaner
|
|
|
|
|
|
{
|
2021-02-01 12:11:32 -08:00
|
|
|
|
#region Fields
|
2020-12-13 13:22:06 -08:00
|
|
|
|
|
2020-08-28 13:33:05 -07:00
|
|
|
|
/// <summary>
|
|
|
|
|
|
/// Clean all names to WoD standards
|
|
|
|
|
|
/// </summary>
|
|
|
|
|
|
public bool Clean { get; set; }
|
|
|
|
|
|
|
2020-08-30 23:11:05 -07:00
|
|
|
|
/// <summary>
|
|
|
|
|
|
/// Deduplicate items using the given method
|
|
|
|
|
|
/// </summary>
|
|
|
|
|
|
public DedupeType DedupeRoms { get; set; }
|
|
|
|
|
|
|
2020-08-28 13:33:05 -07:00
|
|
|
|
/// <summary>
|
|
|
|
|
|
/// Set Machine Description from Machine Name
|
|
|
|
|
|
/// </summary>
|
|
|
|
|
|
public bool DescriptionAsName { get; set; }
|
|
|
|
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
|
|
/// Keep machines that don't contain any items
|
|
|
|
|
|
/// </summary>
|
|
|
|
|
|
public bool KeepEmptyGames { get; set; }
|
|
|
|
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
|
|
/// Enable "One Rom, One Region (1G1R)" mode
|
|
|
|
|
|
/// </summary>
|
|
|
|
|
|
public bool OneGamePerRegion { get; set; }
|
|
|
|
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
|
|
/// Ordered list of regions for "One Rom, One Region (1G1R)" mode
|
|
|
|
|
|
/// </summary>
|
2024-02-28 19:19:50 -05:00
|
|
|
|
public List<string>? RegionList { get; set; }
|
2020-08-28 13:33:05 -07:00
|
|
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
|
|
/// Ensure each rom is in their own game
|
|
|
|
|
|
/// </summary>
|
|
|
|
|
|
public bool OneRomPerGame { get; set; }
|
|
|
|
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
|
|
/// Remove all unicode characters
|
|
|
|
|
|
/// </summary>
|
|
|
|
|
|
public bool RemoveUnicode { get; set; }
|
|
|
|
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
|
|
/// Include root directory when determing trim sizes
|
|
|
|
|
|
/// </summary>
|
2024-02-28 19:19:50 -05:00
|
|
|
|
public string? Root { get; set; }
|
2020-08-28 13:33:05 -07:00
|
|
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
|
|
/// Remove scene dates from the beginning of machine names
|
|
|
|
|
|
/// </summary>
|
|
|
|
|
|
public bool SceneDateStrip { get; set; }
|
|
|
|
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
|
|
/// Change all machine names to "!"
|
|
|
|
|
|
/// </summary>
|
|
|
|
|
|
public bool Single { get; set; }
|
|
|
|
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
|
|
/// Trim total machine and item name to not exceed NTFS limits
|
|
|
|
|
|
/// </summary>
|
|
|
|
|
|
public bool Trim { get; set; }
|
2024-02-28 19:19:50 -05:00
|
|
|
|
|
2020-12-16 10:49:38 -08:00
|
|
|
|
#endregion
|
|
|
|
|
|
|
|
|
|
|
|
#region Logging
|
|
|
|
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
|
|
/// Logging object
|
|
|
|
|
|
/// </summary>
|
2023-04-19 16:39:58 -04:00
|
|
|
|
private readonly Logger logger = new();
|
2020-12-16 10:49:38 -08:00
|
|
|
|
|
2020-12-16 11:03:59 -08:00
|
|
|
|
#endregion
|
|
|
|
|
|
|
2021-02-01 12:11:32 -08:00
|
|
|
|
#region Running
|
2020-12-13 14:01:16 -08:00
|
|
|
|
|
2021-02-01 11:43:38 -08:00
|
|
|
|
/// <summary>
|
|
|
|
|
|
/// Apply cleaning methods to the DatFile
|
|
|
|
|
|
/// </summary>
|
|
|
|
|
|
/// <param name="datFile">Current DatFile object to run operations on</param>
|
|
|
|
|
|
/// <param name="throwOnError">True if the error that is thrown should be thrown back to the caller, false otherwise</param>
|
|
|
|
|
|
/// <returns>True if cleaning was successful, false on error</returns>
|
|
|
|
|
|
public bool ApplyCleaning(DatFile datFile, bool throwOnError = false)
|
|
|
|
|
|
{
|
2023-04-19 16:39:58 -04:00
|
|
|
|
InternalStopwatch watch = new("Applying cleaning steps to DAT");
|
2021-02-02 14:09:49 -08:00
|
|
|
|
|
2021-02-01 11:43:38 -08:00
|
|
|
|
try
|
|
|
|
|
|
{
|
|
|
|
|
|
// Perform item-level cleaning
|
|
|
|
|
|
CleanDatItems(datFile);
|
|
|
|
|
|
|
|
|
|
|
|
// Bucket and dedupe according to the flag
|
|
|
|
|
|
if (DedupeRoms == DedupeType.Full)
|
|
|
|
|
|
datFile.Items.BucketBy(ItemKey.CRC, DedupeRoms);
|
|
|
|
|
|
else if (DedupeRoms == DedupeType.Game)
|
|
|
|
|
|
datFile.Items.BucketBy(ItemKey.Machine, DedupeRoms);
|
|
|
|
|
|
|
|
|
|
|
|
// Process description to machine name
|
|
|
|
|
|
if (DescriptionAsName == true)
|
2024-03-19 16:10:00 -04:00
|
|
|
|
{
|
|
|
|
|
|
datFile.Items.MachineDescriptionToName(throwOnError);
|
|
|
|
|
|
datFile.ItemsDB.MachineDescriptionToName(throwOnError);
|
|
|
|
|
|
}
|
2021-02-01 11:43:38 -08:00
|
|
|
|
|
|
|
|
|
|
// If we are removing scene dates, do that now
|
|
|
|
|
|
if (SceneDateStrip == true)
|
|
|
|
|
|
StripSceneDatesFromItems(datFile);
|
|
|
|
|
|
|
|
|
|
|
|
// Run the one rom per game logic, if required
|
2024-03-19 22:10:59 -04:00
|
|
|
|
if (OneGamePerRegion == true && RegionList != null)
|
|
|
|
|
|
{
|
2024-03-19 22:23:45 -04:00
|
|
|
|
datFile.Items.SetOneGamePerRegion(RegionList);
|
2024-03-19 22:10:59 -04:00
|
|
|
|
datFile.ItemsDB.SetOneGamePerRegion(RegionList);
|
|
|
|
|
|
}
|
2021-02-01 11:43:38 -08:00
|
|
|
|
|
|
|
|
|
|
// Run the one rom per game logic, if required
|
|
|
|
|
|
if (OneRomPerGame == true)
|
|
|
|
|
|
SetOneRomPerGame(datFile);
|
|
|
|
|
|
|
|
|
|
|
|
// Remove all marked items
|
|
|
|
|
|
datFile.Items.ClearMarked();
|
2024-03-19 15:31:23 -04:00
|
|
|
|
datFile.ItemsDB.ClearMarked();
|
2021-02-01 11:43:38 -08:00
|
|
|
|
|
|
|
|
|
|
// We remove any blanks, if we aren't supposed to have any
|
|
|
|
|
|
if (KeepEmptyGames == false)
|
2024-03-19 15:31:23 -04:00
|
|
|
|
{
|
2021-02-01 11:43:38 -08:00
|
|
|
|
datFile.Items.ClearEmpty();
|
2024-03-19 15:31:23 -04:00
|
|
|
|
datFile.ItemsDB.ClearEmpty();
|
|
|
|
|
|
}
|
2021-02-01 11:43:38 -08:00
|
|
|
|
}
|
|
|
|
|
|
catch (Exception ex) when (!throwOnError)
|
|
|
|
|
|
{
|
|
|
|
|
|
logger.Error(ex);
|
|
|
|
|
|
return false;
|
|
|
|
|
|
}
|
2021-02-02 14:09:49 -08:00
|
|
|
|
finally
|
|
|
|
|
|
{
|
|
|
|
|
|
watch.Stop();
|
|
|
|
|
|
}
|
2021-02-01 11:43:38 -08:00
|
|
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
|
|
/// Clean individual items based on the current filter
|
|
|
|
|
|
/// </summary>
|
|
|
|
|
|
/// <param name="datFile">Current DatFile object to run operations on</param>
|
|
|
|
|
|
internal void CleanDatItems(DatFile datFile)
|
|
|
|
|
|
{
|
|
|
|
|
|
List<string> keys = datFile.Items.Keys.ToList();
|
|
|
|
|
|
foreach (string key in keys)
|
|
|
|
|
|
{
|
|
|
|
|
|
// For every item in the current key
|
2024-02-28 19:19:50 -05:00
|
|
|
|
var items = datFile.Items[key];
|
|
|
|
|
|
if (items == null)
|
|
|
|
|
|
continue;
|
|
|
|
|
|
|
2021-02-01 11:43:38 -08:00
|
|
|
|
foreach (DatItem item in items)
|
|
|
|
|
|
{
|
|
|
|
|
|
// If we have a null item, we can't clean it it
|
|
|
|
|
|
if (item == null)
|
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
|
|
// Run cleaning per item
|
|
|
|
|
|
CleanDatItem(item);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Assign back for caution
|
|
|
|
|
|
datFile.Items[key] = items;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2020-12-13 14:01:16 -08:00
|
|
|
|
/// <summary>
|
|
|
|
|
|
/// Clean a DatItem according to the cleaner
|
|
|
|
|
|
/// </summary>
|
|
|
|
|
|
/// <param name="datItem">DatItem to clean</param>
|
2021-02-01 11:43:38 -08:00
|
|
|
|
internal void CleanDatItem(DatItem datItem)
|
2020-12-13 14:01:16 -08:00
|
|
|
|
{
|
|
|
|
|
|
// If we're stripping unicode characters, strip machine name and description
|
2020-12-18 23:31:38 -08:00
|
|
|
|
if (RemoveUnicode)
|
2020-12-13 14:01:16 -08:00
|
|
|
|
{
|
2024-03-11 15:46:44 -04:00
|
|
|
|
datItem.GetFieldValue<Machine>(DatItem.MachineKey)!.SetFieldValue<string?>(Models.Metadata.Machine.NameKey, TextHelper.RemoveUnicodeCharacters(datItem.GetFieldValue<Machine>(DatItem.MachineKey)!.GetStringFieldValue(Models.Metadata.Machine.NameKey)));
|
|
|
|
|
|
datItem.GetFieldValue<Machine>(DatItem.MachineKey)!.SetFieldValue<string?>(Models.Metadata.Machine.DescriptionKey, TextHelper.RemoveUnicodeCharacters(datItem.GetFieldValue<Machine>(DatItem.MachineKey)!.GetStringFieldValue(Models.Metadata.Machine.DescriptionKey)));
|
2023-08-15 01:58:47 -04:00
|
|
|
|
datItem.SetName(TextHelper.RemoveUnicodeCharacters(datItem.GetName()));
|
2020-12-13 14:01:16 -08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// If we're in cleaning mode, sanitize machine name and description
|
2020-12-18 23:31:38 -08:00
|
|
|
|
if (Clean)
|
2020-12-13 14:01:16 -08:00
|
|
|
|
{
|
2024-03-11 15:46:44 -04:00
|
|
|
|
datItem.GetFieldValue<Machine>(DatItem.MachineKey)!.SetFieldValue<string?>(Models.Metadata.Machine.NameKey, TextHelper.NormalizeCharacters(datItem.GetFieldValue<Machine>(DatItem.MachineKey)!.GetStringFieldValue(Models.Metadata.Machine.NameKey)));
|
|
|
|
|
|
datItem.GetFieldValue<Machine>(DatItem.MachineKey)!.SetFieldValue<string?>(Models.Metadata.Machine.DescriptionKey, TextHelper.NormalizeCharacters(datItem.GetFieldValue<Machine>(DatItem.MachineKey)!.GetStringFieldValue(Models.Metadata.Machine.DescriptionKey)));
|
2020-12-13 14:01:16 -08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// If we are in single game mode, rename the machine
|
2020-12-18 23:31:38 -08:00
|
|
|
|
if (Single)
|
2024-03-10 16:49:07 -04:00
|
|
|
|
datItem.GetFieldValue<Machine>(DatItem.MachineKey)!.SetFieldValue<string?>(Models.Metadata.Machine.NameKey, "!");
|
2020-12-13 14:01:16 -08:00
|
|
|
|
|
|
|
|
|
|
// If we are in NTFS trim mode, trim the item name
|
2020-12-18 23:31:38 -08:00
|
|
|
|
if (Trim && datItem.GetName() != null)
|
2020-12-13 14:01:16 -08:00
|
|
|
|
{
|
|
|
|
|
|
// Windows max name length is 260
|
2024-03-11 15:46:44 -04:00
|
|
|
|
int usableLength = 260 - datItem.GetFieldValue<Machine>(DatItem.MachineKey)!.GetStringFieldValue(Models.Metadata.Machine.NameKey)!.Length - (Root?.Length ?? 0);
|
2024-02-28 19:19:50 -05:00
|
|
|
|
if (datItem.GetName()!.Length > usableLength)
|
2020-12-13 14:01:16 -08:00
|
|
|
|
{
|
2024-02-28 19:19:50 -05:00
|
|
|
|
string ext = Path.GetExtension(datItem.GetName()!);
|
2024-02-28 23:09:31 -05:00
|
|
|
|
datItem.SetName(datItem.GetName()!.Substring(0, usableLength - ext.Length) + ext);
|
2020-12-13 14:01:16 -08:00
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2021-02-01 11:43:38 -08:00
|
|
|
|
/// <summary>
|
|
|
|
|
|
/// Ensure that all roms are in their own game (or at least try to ensure)
|
|
|
|
|
|
/// </summary>
|
|
|
|
|
|
/// <param name="datFile">Current DatFile object to run operations on</param>
|
2023-04-19 16:39:58 -04:00
|
|
|
|
internal static void SetOneRomPerGame(DatFile datFile)
|
2021-02-01 11:43:38 -08:00
|
|
|
|
{
|
|
|
|
|
|
// Because this introduces subfolders, we need to set the SuperDAT type
|
2024-03-10 04:10:37 -04:00
|
|
|
|
datFile.Header.SetFieldValue<string?>(Models.Metadata.Header.TypeKey, "SuperDAT");
|
2021-02-01 11:43:38 -08:00
|
|
|
|
|
2024-03-19 16:18:35 -04:00
|
|
|
|
datFile.Items.SetOneRomPerGame();
|
|
|
|
|
|
datFile.ItemsDB.SetOneRomPerGame();
|
2021-02-01 11:43:38 -08:00
|
|
|
|
}
|
|
|
|
|
|
|
2021-02-01 12:35:59 -08:00
|
|
|
|
/// <summary>
|
|
|
|
|
|
/// Set internal names to match One Rom Per Game (ORPG) logic
|
|
|
|
|
|
/// </summary>
|
|
|
|
|
|
/// <param name="datItem">DatItem to run logic on</param>
|
2023-04-19 16:39:58 -04:00
|
|
|
|
internal static void SetOneRomPerGame(DatItem datItem)
|
2021-02-01 12:35:59 -08:00
|
|
|
|
{
|
|
|
|
|
|
if (datItem.GetName() == null)
|
|
|
|
|
|
return;
|
|
|
|
|
|
|
2024-02-28 19:19:50 -05:00
|
|
|
|
string[] splitname = datItem.GetName()!.Split('.');
|
2024-02-28 23:09:31 -05:00
|
|
|
|
#if NET20 || NET35
|
2024-03-11 15:46:44 -04:00
|
|
|
|
datItem.GetFieldValue<Machine>(DatItem.MachineKey)!.SetFieldValue<string?>(Models.Metadata.Machine.NameKey, datItem.GetFieldValue<Machine>(DatItem.MachineKey)!.GetStringFieldValue(Models.Metadata.Machine.NameKey) + $"/{string.Join(".", splitname.Take(splitname.Length > 1 ? splitname.Length - 1 : 1).ToArray())}");
|
2024-02-28 23:09:31 -05:00
|
|
|
|
#else
|
2024-03-11 15:46:44 -04:00
|
|
|
|
datItem.GetFieldValue<Machine>(DatItem.MachineKey)!.SetFieldValue<string?>(Models.Metadata.Machine.NameKey, datItem.GetFieldValue<Machine>(DatItem.MachineKey)!.GetStringFieldValue(Models.Metadata.Machine.NameKey) + $"/{string.Join(".", splitname.Take(splitname.Length > 1 ? splitname.Length - 1 : 1))}");
|
2024-02-28 23:09:31 -05:00
|
|
|
|
#endif
|
2021-02-01 12:35:59 -08:00
|
|
|
|
datItem.SetName(Path.GetFileName(datItem.GetName()));
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2021-02-01 11:43:38 -08:00
|
|
|
|
/// <summary>
|
|
|
|
|
|
/// Strip the dates from the beginning of scene-style set names
|
|
|
|
|
|
/// </summary>
|
|
|
|
|
|
/// <param name="datFile">Current DatFile object to run operations on</param>
|
|
|
|
|
|
internal void StripSceneDatesFromItems(DatFile datFile)
|
|
|
|
|
|
{
|
|
|
|
|
|
// Output the logging statement
|
|
|
|
|
|
logger.User("Stripping scene-style dates");
|
|
|
|
|
|
|
2024-03-19 16:22:19 -04:00
|
|
|
|
datFile.Items.StripSceneDatesFromItems();
|
|
|
|
|
|
datFile.ItemsDB.StripSceneDatesFromItems();
|
2021-02-01 11:43:38 -08:00
|
|
|
|
}
|
|
|
|
|
|
|
2020-12-13 22:06:47 -08:00
|
|
|
|
#endregion
|
2020-08-28 13:33:05 -07:00
|
|
|
|
}
|
|
|
|
|
|
}
|