2021-02-01 11:43:38 -08:00
|
|
|
|
using System;
|
|
|
|
|
|
using System.Collections.Concurrent;
|
|
|
|
|
|
using System.Collections.Generic;
|
2020-12-13 14:01:16 -08:00
|
|
|
|
using System.IO;
|
|
|
|
|
|
using System.Linq;
|
2021-02-01 11:43:38 -08:00
|
|
|
|
using System.Runtime.CompilerServices;
|
2020-12-13 14:01:16 -08:00
|
|
|
|
using System.Text.RegularExpressions;
|
2021-01-29 22:54:16 -08:00
|
|
|
|
using System.Threading.Tasks;
|
2020-08-28 13:33:05 -07:00
|
|
|
|
|
2020-12-08 13:23:59 -08:00
|
|
|
|
using SabreTools.Core;
|
2020-12-14 11:16:48 -08:00
|
|
|
|
using SabreTools.DatFiles;
|
2020-12-13 14:01:16 -08:00
|
|
|
|
using SabreTools.DatItems;
|
2020-12-16 10:49:38 -08:00
|
|
|
|
using SabreTools.Logging;
|
2020-08-28 13:33:05 -07:00
|
|
|
|
|
2021-02-01 11:43:38 -08:00
|
|
|
|
[assembly: InternalsVisibleTo("SabreTools.Test")]
|
2020-12-08 13:48:57 -08:00
|
|
|
|
namespace SabreTools.Filtering
|
2020-08-28 13:33:05 -07:00
|
|
|
|
{
|
|
|
|
|
|
/// <summary>
|
|
|
|
|
|
/// Represents the cleaning operations that need to be performed on a set of items, usually a DAT
|
|
|
|
|
|
/// </summary>
|
2021-02-01 11:43:38 -08:00
|
|
|
|
|
2020-08-28 13:33:05 -07:00
|
|
|
|
public class Cleaner
|
|
|
|
|
|
{
|
2021-02-01 12:11:32 -08:00
|
|
|
|
#region Fields
|
2020-12-13 13:22:06 -08:00
|
|
|
|
|
2020-08-28 13:33:05 -07:00
|
|
|
|
/// <summary>
|
|
|
|
|
|
/// Clean all names to WoD standards
|
|
|
|
|
|
/// </summary>
|
|
|
|
|
|
public bool Clean { get; set; }
|
|
|
|
|
|
|
2020-08-30 23:11:05 -07:00
|
|
|
|
/// <summary>
|
|
|
|
|
|
/// Deduplicate items using the given method
|
|
|
|
|
|
/// </summary>
|
|
|
|
|
|
public DedupeType DedupeRoms { get; set; }
|
|
|
|
|
|
|
2020-08-28 13:33:05 -07:00
|
|
|
|
/// <summary>
|
|
|
|
|
|
/// Set Machine Description from Machine Name
|
|
|
|
|
|
/// </summary>
|
|
|
|
|
|
public bool DescriptionAsName { get; set; }
|
|
|
|
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
|
|
/// Keep machines that don't contain any items
|
|
|
|
|
|
/// </summary>
|
|
|
|
|
|
public bool KeepEmptyGames { get; set; }
|
|
|
|
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
|
|
/// Enable "One Rom, One Region (1G1R)" mode
|
|
|
|
|
|
/// </summary>
|
|
|
|
|
|
public bool OneGamePerRegion { get; set; }
|
|
|
|
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
|
|
/// Ordered list of regions for "One Rom, One Region (1G1R)" mode
|
|
|
|
|
|
/// </summary>
|
|
|
|
|
|
public List<string> RegionList { get; set; }
|
|
|
|
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
|
|
/// Ensure each rom is in their own game
|
|
|
|
|
|
/// </summary>
|
|
|
|
|
|
public bool OneRomPerGame { get; set; }
|
|
|
|
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
|
|
/// Remove all unicode characters
|
|
|
|
|
|
/// </summary>
|
|
|
|
|
|
public bool RemoveUnicode { get; set; }
|
|
|
|
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
|
|
/// Include root directory when determing trim sizes
|
|
|
|
|
|
/// </summary>
|
|
|
|
|
|
public string Root { get; set; }
|
|
|
|
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
|
|
/// Remove scene dates from the beginning of machine names
|
|
|
|
|
|
/// </summary>
|
|
|
|
|
|
public bool SceneDateStrip { get; set; }
|
|
|
|
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
|
|
/// Change all machine names to "!"
|
|
|
|
|
|
/// </summary>
|
|
|
|
|
|
public bool Single { get; set; }
|
|
|
|
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
|
|
/// Trim total machine and item name to not exceed NTFS limits
|
|
|
|
|
|
/// </summary>
|
|
|
|
|
|
public bool Trim { get; set; }
|
2020-12-13 13:22:06 -08:00
|
|
|
|
|
2020-12-16 10:49:38 -08:00
|
|
|
|
#endregion
|
|
|
|
|
|
|
|
|
|
|
|
#region Logging
|
|
|
|
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
|
|
/// Logging object
|
|
|
|
|
|
/// </summary>
|
2023-04-19 16:39:58 -04:00
|
|
|
|
private readonly Logger logger = new();
|
2020-12-16 10:49:38 -08:00
|
|
|
|
|
2020-12-16 11:03:59 -08:00
|
|
|
|
#endregion
|
|
|
|
|
|
|
2021-02-01 12:11:32 -08:00
|
|
|
|
#region Running
|
2020-12-13 14:01:16 -08:00
|
|
|
|
|
2021-02-01 11:43:38 -08:00
|
|
|
|
/// <summary>
|
|
|
|
|
|
/// Apply cleaning methods to the DatFile
|
|
|
|
|
|
/// </summary>
|
|
|
|
|
|
/// <param name="datFile">Current DatFile object to run operations on</param>
|
|
|
|
|
|
/// <param name="throwOnError">True if the error that is thrown should be thrown back to the caller, false otherwise</param>
|
|
|
|
|
|
/// <returns>True if cleaning was successful, false on error</returns>
|
|
|
|
|
|
public bool ApplyCleaning(DatFile datFile, bool throwOnError = false)
|
|
|
|
|
|
{
|
2023-04-19 16:39:58 -04:00
|
|
|
|
InternalStopwatch watch = new("Applying cleaning steps to DAT");
|
2021-02-02 14:09:49 -08:00
|
|
|
|
|
2021-02-01 11:43:38 -08:00
|
|
|
|
try
|
|
|
|
|
|
{
|
|
|
|
|
|
// Perform item-level cleaning
|
|
|
|
|
|
CleanDatItems(datFile);
|
|
|
|
|
|
|
|
|
|
|
|
// Bucket and dedupe according to the flag
|
|
|
|
|
|
if (DedupeRoms == DedupeType.Full)
|
|
|
|
|
|
datFile.Items.BucketBy(ItemKey.CRC, DedupeRoms);
|
|
|
|
|
|
else if (DedupeRoms == DedupeType.Game)
|
|
|
|
|
|
datFile.Items.BucketBy(ItemKey.Machine, DedupeRoms);
|
|
|
|
|
|
|
|
|
|
|
|
// Process description to machine name
|
|
|
|
|
|
if (DescriptionAsName == true)
|
|
|
|
|
|
MachineDescriptionToName(datFile);
|
|
|
|
|
|
|
|
|
|
|
|
// If we are removing scene dates, do that now
|
|
|
|
|
|
if (SceneDateStrip == true)
|
|
|
|
|
|
StripSceneDatesFromItems(datFile);
|
|
|
|
|
|
|
|
|
|
|
|
// Run the one rom per game logic, if required
|
|
|
|
|
|
if (OneGamePerRegion == true)
|
|
|
|
|
|
SetOneGamePerRegion(datFile);
|
|
|
|
|
|
|
|
|
|
|
|
// Run the one rom per game logic, if required
|
|
|
|
|
|
if (OneRomPerGame == true)
|
|
|
|
|
|
SetOneRomPerGame(datFile);
|
|
|
|
|
|
|
|
|
|
|
|
// Remove all marked items
|
|
|
|
|
|
datFile.Items.ClearMarked();
|
|
|
|
|
|
|
|
|
|
|
|
// We remove any blanks, if we aren't supposed to have any
|
|
|
|
|
|
if (KeepEmptyGames == false)
|
|
|
|
|
|
datFile.Items.ClearEmpty();
|
|
|
|
|
|
}
|
|
|
|
|
|
catch (Exception ex) when (!throwOnError)
|
|
|
|
|
|
{
|
|
|
|
|
|
logger.Error(ex);
|
|
|
|
|
|
return false;
|
|
|
|
|
|
}
|
2021-02-02 14:09:49 -08:00
|
|
|
|
finally
|
|
|
|
|
|
{
|
|
|
|
|
|
watch.Stop();
|
|
|
|
|
|
}
|
2021-02-01 11:43:38 -08:00
|
|
|
|
|
|
|
|
|
|
return true;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
|
|
/// Clean individual items based on the current filter
|
|
|
|
|
|
/// </summary>
|
|
|
|
|
|
/// <param name="datFile">Current DatFile object to run operations on</param>
|
|
|
|
|
|
internal void CleanDatItems(DatFile datFile)
|
|
|
|
|
|
{
|
|
|
|
|
|
List<string> keys = datFile.Items.Keys.ToList();
|
|
|
|
|
|
foreach (string key in keys)
|
|
|
|
|
|
{
|
|
|
|
|
|
// For every item in the current key
|
2021-07-18 21:00:01 -07:00
|
|
|
|
ConcurrentList<DatItem> items = datFile.Items[key];
|
2021-02-01 11:43:38 -08:00
|
|
|
|
foreach (DatItem item in items)
|
|
|
|
|
|
{
|
|
|
|
|
|
// If we have a null item, we can't clean it it
|
|
|
|
|
|
if (item == null)
|
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
|
|
// Run cleaning per item
|
|
|
|
|
|
CleanDatItem(item);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Assign back for caution
|
|
|
|
|
|
datFile.Items[key] = items;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2020-12-13 14:01:16 -08:00
|
|
|
|
/// <summary>
|
|
|
|
|
|
/// Clean a DatItem according to the cleaner
|
|
|
|
|
|
/// </summary>
|
|
|
|
|
|
/// <param name="datItem">DatItem to clean</param>
|
2021-02-01 11:43:38 -08:00
|
|
|
|
internal void CleanDatItem(DatItem datItem)
|
2020-12-13 14:01:16 -08:00
|
|
|
|
{
|
|
|
|
|
|
// If we're stripping unicode characters, strip machine name and description
|
2020-12-18 23:31:38 -08:00
|
|
|
|
if (RemoveUnicode)
|
2020-12-13 14:01:16 -08:00
|
|
|
|
{
|
|
|
|
|
|
datItem.Machine.Name = RemoveUnicodeCharacters(datItem.Machine.Name);
|
|
|
|
|
|
datItem.Machine.Description = RemoveUnicodeCharacters(datItem.Machine.Description);
|
|
|
|
|
|
datItem.SetName(RemoveUnicodeCharacters(datItem.GetName()));
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// If we're in cleaning mode, sanitize machine name and description
|
2020-12-18 23:31:38 -08:00
|
|
|
|
if (Clean)
|
2020-12-13 14:01:16 -08:00
|
|
|
|
{
|
|
|
|
|
|
datItem.Machine.Name = CleanGameName(datItem.Machine.Name);
|
|
|
|
|
|
datItem.Machine.Description = CleanGameName(datItem.Machine.Description);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// If we are in single game mode, rename the machine
|
2020-12-18 23:31:38 -08:00
|
|
|
|
if (Single)
|
2020-12-13 14:01:16 -08:00
|
|
|
|
datItem.Machine.Name = "!";
|
|
|
|
|
|
|
|
|
|
|
|
// If we are in NTFS trim mode, trim the item name
|
2020-12-18 23:31:38 -08:00
|
|
|
|
if (Trim && datItem.GetName() != null)
|
2020-12-13 14:01:16 -08:00
|
|
|
|
{
|
|
|
|
|
|
// Windows max name length is 260
|
|
|
|
|
|
int usableLength = 260 - datItem.Machine.Name.Length - (Root?.Length ?? 0);
|
|
|
|
|
|
if (datItem.GetName().Length > usableLength)
|
|
|
|
|
|
{
|
|
|
|
|
|
string ext = Path.GetExtension(datItem.GetName());
|
2023-04-19 16:39:58 -04:00
|
|
|
|
datItem.SetName(datItem.GetName()[..(usableLength - ext.Length)] + ext);
|
2020-12-13 14:01:16 -08:00
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
|
|
/// Clean a game (or rom) name to the WoD standard
|
|
|
|
|
|
/// </summary>
|
|
|
|
|
|
/// <param name="game">Name of the game to be cleaned</param>
|
|
|
|
|
|
/// <returns>The cleaned name</returns>
|
2023-04-19 16:39:58 -04:00
|
|
|
|
internal static string CleanGameName(string game)
|
2020-12-13 14:01:16 -08:00
|
|
|
|
{
|
|
|
|
|
|
if (game == null)
|
|
|
|
|
|
return null;
|
|
|
|
|
|
|
|
|
|
|
|
///Run the name through the filters to make sure that it's correct
|
|
|
|
|
|
game = NormalizeChars(game);
|
|
|
|
|
|
game = RussianToLatin(game);
|
|
|
|
|
|
game = SearchPattern(game);
|
|
|
|
|
|
|
|
|
|
|
|
game = new Regex(@"(([[(].*[\)\]] )?([^([]+))").Match(game).Groups[1].Value;
|
|
|
|
|
|
game = game.TrimStart().TrimEnd();
|
|
|
|
|
|
return game;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2021-02-01 11:43:38 -08:00
|
|
|
|
/// <summary>
|
|
|
|
|
|
/// Use game descriptions as names in the DAT, updating cloneof/romof/sampleof
|
|
|
|
|
|
/// </summary>
|
|
|
|
|
|
/// <param name="datFile">Current DatFile object to run operations on</param>
|
|
|
|
|
|
/// <param name="throwOnError">True if the error that is thrown should be thrown back to the caller, false otherwise</param>
|
|
|
|
|
|
internal void MachineDescriptionToName(DatFile datFile, bool throwOnError = false)
|
|
|
|
|
|
{
|
|
|
|
|
|
try
|
|
|
|
|
|
{
|
|
|
|
|
|
// First we want to get a mapping for all games to description
|
2023-04-19 16:39:58 -04:00
|
|
|
|
ConcurrentDictionary<string, string> mapping = new();
|
2021-02-01 11:43:38 -08:00
|
|
|
|
Parallel.ForEach(datFile.Items.Keys, Globals.ParallelOptions, key =>
|
|
|
|
|
|
{
|
2021-07-18 21:00:01 -07:00
|
|
|
|
ConcurrentList<DatItem> items = datFile.Items[key];
|
2021-02-01 11:43:38 -08:00
|
|
|
|
foreach (DatItem item in items)
|
|
|
|
|
|
{
|
|
|
|
|
|
// If the key mapping doesn't exist, add it
|
|
|
|
|
|
mapping.TryAdd(item.Machine.Name, item.Machine.Description.Replace('/', '_').Replace("\"", "''").Replace(":", " -"));
|
|
|
|
|
|
}
|
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
|
|
// Now we loop through every item and update accordingly
|
|
|
|
|
|
Parallel.ForEach(datFile.Items.Keys, Globals.ParallelOptions, key =>
|
|
|
|
|
|
{
|
2021-07-18 21:00:01 -07:00
|
|
|
|
ConcurrentList<DatItem> items = datFile.Items[key];
|
2023-04-19 16:39:58 -04:00
|
|
|
|
ConcurrentList<DatItem> newItems = new();
|
2021-02-01 11:43:38 -08:00
|
|
|
|
foreach (DatItem item in items)
|
|
|
|
|
|
{
|
|
|
|
|
|
// Update machine name
|
|
|
|
|
|
if (!string.IsNullOrWhiteSpace(item.Machine.Name) && mapping.ContainsKey(item.Machine.Name))
|
|
|
|
|
|
item.Machine.Name = mapping[item.Machine.Name];
|
|
|
|
|
|
|
|
|
|
|
|
// Update cloneof
|
|
|
|
|
|
if (!string.IsNullOrWhiteSpace(item.Machine.CloneOf) && mapping.ContainsKey(item.Machine.CloneOf))
|
|
|
|
|
|
item.Machine.CloneOf = mapping[item.Machine.CloneOf];
|
|
|
|
|
|
|
|
|
|
|
|
// Update romof
|
|
|
|
|
|
if (!string.IsNullOrWhiteSpace(item.Machine.RomOf) && mapping.ContainsKey(item.Machine.RomOf))
|
|
|
|
|
|
item.Machine.RomOf = mapping[item.Machine.RomOf];
|
|
|
|
|
|
|
|
|
|
|
|
// Update sampleof
|
|
|
|
|
|
if (!string.IsNullOrWhiteSpace(item.Machine.SampleOf) && mapping.ContainsKey(item.Machine.SampleOf))
|
|
|
|
|
|
item.Machine.SampleOf = mapping[item.Machine.SampleOf];
|
|
|
|
|
|
|
|
|
|
|
|
// Add the new item to the output list
|
|
|
|
|
|
newItems.Add(item);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Replace the old list of roms with the new one
|
|
|
|
|
|
datFile.Items.Remove(key);
|
|
|
|
|
|
datFile.Items.AddRange(key, newItems);
|
|
|
|
|
|
});
|
|
|
|
|
|
}
|
|
|
|
|
|
catch (Exception ex) when (!throwOnError)
|
|
|
|
|
|
{
|
|
|
|
|
|
logger.Warning(ex.ToString());
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2020-12-13 14:01:16 -08:00
|
|
|
|
/// <summary>
|
|
|
|
|
|
/// Replace accented characters
|
|
|
|
|
|
/// </summary>
|
|
|
|
|
|
/// <param name="input">String to be parsed</param>
|
|
|
|
|
|
/// <returns>String with characters replaced</returns>
|
2023-04-19 16:39:58 -04:00
|
|
|
|
internal static string NormalizeChars(string input)
|
2020-12-13 14:01:16 -08:00
|
|
|
|
{
|
|
|
|
|
|
if (input == null)
|
|
|
|
|
|
return null;
|
|
|
|
|
|
|
|
|
|
|
|
string[,] charmap = {
|
|
|
|
|
|
{ "Á", "A" }, { "á", "a" },
|
|
|
|
|
|
{ "À", "A" }, { "à", "a" },
|
|
|
|
|
|
{ "Â", "A" }, { "â", "a" },
|
|
|
|
|
|
{ "Ä", "Ae" }, { "ä", "ae" },
|
|
|
|
|
|
{ "Ã", "A" }, { "ã", "a" },
|
|
|
|
|
|
{ "Å", "A" }, { "å", "a" },
|
|
|
|
|
|
{ "Æ", "Ae" }, { "æ", "ae" },
|
|
|
|
|
|
{ "Ç", "C" }, { "ç", "c" },
|
|
|
|
|
|
{ "Ð", "D" }, { "ð", "d" },
|
|
|
|
|
|
{ "É", "E" }, { "é", "e" },
|
|
|
|
|
|
{ "È", "E" }, { "è", "e" },
|
|
|
|
|
|
{ "Ê", "E" }, { "ê", "e" },
|
|
|
|
|
|
{ "Ë", "E" }, { "ë", "e" },
|
|
|
|
|
|
{ "ƒ", "f" },
|
|
|
|
|
|
{ "Í", "I" }, { "í", "i" },
|
|
|
|
|
|
{ "Ì", "I" }, { "ì", "i" },
|
|
|
|
|
|
{ "Î", "I" }, { "î", "i" },
|
|
|
|
|
|
{ "Ï", "I" }, { "ï", "i" },
|
|
|
|
|
|
{ "Ñ", "N" }, { "ñ", "n" },
|
|
|
|
|
|
{ "Ó", "O" }, { "ó", "o" },
|
|
|
|
|
|
{ "Ò", "O" }, { "ò", "o" },
|
|
|
|
|
|
{ "Ô", "O" }, { "ô", "o" },
|
|
|
|
|
|
{ "Ö", "Oe" }, { "ö", "oe" },
|
|
|
|
|
|
{ "Õ", "O" }, { "õ", "o" },
|
|
|
|
|
|
{ "Ø", "O" }, { "ø", "o" },
|
|
|
|
|
|
{ "Š", "S" }, { "š", "s" },
|
|
|
|
|
|
{ "ß", "ss" },
|
|
|
|
|
|
{ "Þ", "B" }, { "þ", "b" },
|
|
|
|
|
|
{ "Ú", "U" }, { "ú", "u" },
|
|
|
|
|
|
{ "Ù", "U" }, { "ù", "u" },
|
|
|
|
|
|
{ "Û", "U" }, { "û", "u" },
|
|
|
|
|
|
{ "Ü", "Ue" }, { "ü", "ue" },
|
|
|
|
|
|
{ "ÿ", "y" },
|
|
|
|
|
|
{ "Ý", "Y" }, { "ý", "y" },
|
|
|
|
|
|
{ "Ž", "Z" }, { "ž", "z" },
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
for (int i = 0; i < charmap.GetLength(0); i++)
|
|
|
|
|
|
{
|
|
|
|
|
|
input = input.Replace(charmap[i, 0], charmap[i, 1]);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
return input;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
|
|
/// Remove all unicode-specific chars from a string
|
|
|
|
|
|
/// </summary>
|
|
|
|
|
|
/// <param name="s">Input string to clean</param>
|
|
|
|
|
|
/// <returns>Cleaned string</returns>
|
2023-04-19 16:39:58 -04:00
|
|
|
|
internal static string RemoveUnicodeCharacters(string s)
|
2020-12-13 14:01:16 -08:00
|
|
|
|
{
|
|
|
|
|
|
if (s == null)
|
|
|
|
|
|
return null;
|
|
|
|
|
|
|
|
|
|
|
|
return new string(s.Where(c => c <= 255).ToArray());
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
|
|
/// Convert Cyrillic lettering to Latin lettering
|
|
|
|
|
|
/// </summary>
|
|
|
|
|
|
/// <param name="input">String to be parsed</param>
|
|
|
|
|
|
/// <returns>String with characters replaced</returns>
|
2023-04-19 16:39:58 -04:00
|
|
|
|
internal static string RussianToLatin(string input)
|
2020-12-13 14:01:16 -08:00
|
|
|
|
{
|
|
|
|
|
|
if (input == null)
|
|
|
|
|
|
return null;
|
|
|
|
|
|
|
|
|
|
|
|
string[,] charmap = {
|
|
|
|
|
|
{ "А", "A" }, { "Б", "B" }, { "В", "V" }, { "Г", "G" }, { "Д", "D" },
|
|
|
|
|
|
{ "Е", "E" }, { "Ё", "Yo" }, { "Ж", "Zh" }, { "З", "Z" }, { "И", "I" },
|
|
|
|
|
|
{ "Й", "J" }, { "К", "K" }, { "Л", "L" }, { "М", "M" }, { "Н", "N" },
|
|
|
|
|
|
{ "О", "O" }, { "П", "P" }, { "Р", "R" }, { "С", "S" }, { "Т", "T" },
|
|
|
|
|
|
{ "У", "U" }, { "Ф", "f" }, { "Х", "Kh" }, { "Ц", "Ts" }, { "Ч", "Ch" },
|
|
|
|
|
|
{ "Ш", "Sh" }, { "Щ", "Sch" }, { "Ъ", string.Empty }, { "Ы", "y" }, { "Ь", string.Empty },
|
|
|
|
|
|
{ "Э", "e" }, { "Ю", "yu" }, { "Я", "ya" }, { "а", "a" }, { "б", "b" },
|
|
|
|
|
|
{ "в", "v" }, { "г", "g" }, { "д", "d" }, { "е", "e" }, { "ё", "yo" },
|
|
|
|
|
|
{ "ж", "zh" }, { "з", "z" }, { "и", "i" }, { "й", "j" }, { "к", "k" },
|
|
|
|
|
|
{ "л", "l" }, { "м", "m" }, { "н", "n" }, { "о", "o" }, { "п", "p" },
|
|
|
|
|
|
{ "р", "r" }, { "с", "s" }, { "т", "t" }, { "у", "u" }, { "ф", "f" },
|
|
|
|
|
|
{ "х", "kh" }, { "ц", "ts" }, { "ч", "ch" }, { "ш", "sh" }, { "щ", "sch" },
|
|
|
|
|
|
{ "ъ", string.Empty }, { "ы", "y" }, { "ь", string.Empty }, { "э", "e" }, { "ю", "yu" },
|
|
|
|
|
|
{ "я", "ya" },
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
for (int i = 0; i < charmap.GetLength(0); i++)
|
|
|
|
|
|
{
|
|
|
|
|
|
input = input.Replace(charmap[i, 0], charmap[i, 1]);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
return input;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
|
|
/// Replace special characters and patterns
|
|
|
|
|
|
/// </summary>
|
|
|
|
|
|
/// <param name="input">String to be parsed</param>
|
|
|
|
|
|
/// <returns>String with characters replaced</returns>
|
2023-04-19 16:39:58 -04:00
|
|
|
|
internal static string SearchPattern(string input)
|
2020-12-13 14:01:16 -08:00
|
|
|
|
{
|
|
|
|
|
|
if (input == null)
|
|
|
|
|
|
return null;
|
|
|
|
|
|
|
|
|
|
|
|
string[,] charmap = {
|
|
|
|
|
|
{ @"~", " - " },
|
|
|
|
|
|
{ @"_", " " },
|
|
|
|
|
|
{ @":", " " },
|
|
|
|
|
|
{ @">", ")" },
|
|
|
|
|
|
{ @"<", "(" },
|
|
|
|
|
|
{ @"\|", "-" },
|
|
|
|
|
|
{ "\"", "'" },
|
|
|
|
|
|
{ @"\*", "." },
|
|
|
|
|
|
{ @"\\", "-" },
|
|
|
|
|
|
{ @"/", "-" },
|
|
|
|
|
|
{ @"\?", " " },
|
|
|
|
|
|
{ @"\(([^)(]*)\(([^)]*)\)([^)(]*)\)", " " },
|
|
|
|
|
|
{ @"\(([^)]+)\)", " " },
|
|
|
|
|
|
{ @"\[([^]]+)\]", " " },
|
|
|
|
|
|
{ @"\{([^}]+)\}", " " },
|
|
|
|
|
|
{ @"(ZZZJUNK|ZZZ-UNK-|ZZZ-UNK |zzz unknow |zzz unk |Copy of |[.][a-z]{3}[.][a-z]{3}[.]|[.][a-z]{3}[.])", " " },
|
|
|
|
|
|
{ @" (r|rev|v|ver)\s*[\d\.]+[^\s]*", " " },
|
|
|
|
|
|
{ @"(( )|(\A))(\d{6}|\d{8})(( )|(\Z))", " " },
|
|
|
|
|
|
{ @"(( )|(\A))(\d{1,2})-(\d{1,2})-(\d{4}|\d{2})", " " },
|
|
|
|
|
|
{ @"(( )|(\A))(\d{4}|\d{2})-(\d{1,2})-(\d{1,2})", " " },
|
|
|
|
|
|
{ @"[-]+", "-" },
|
|
|
|
|
|
{ @"\A\s*\)", " " },
|
|
|
|
|
|
{ @"\A\s*(,|-)", " " },
|
|
|
|
|
|
{ @"\s+", " " },
|
|
|
|
|
|
{ @"\s+,", "," },
|
|
|
|
|
|
{ @"\s*(,|-)\s*\Z", " " },
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
for (int i = 0; i < charmap.GetLength(0); i++)
|
|
|
|
|
|
{
|
|
|
|
|
|
input = Regex.Replace(input, charmap[i, 0], charmap[i, 1]);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
return input;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2021-02-01 11:43:38 -08:00
|
|
|
|
/// <summary>
|
|
|
|
|
|
/// Filter a DAT using 1G1R logic given an ordered set of regions
|
|
|
|
|
|
/// </summary>
|
|
|
|
|
|
/// <param name="datFile">Current DatFile object to run operations on</param>
|
|
|
|
|
|
/// <remarks>
|
|
|
|
|
|
/// In the most technical sense, the way that the region list is being used does not
|
|
|
|
|
|
/// confine its values to be just regions. Since it's essentially acting like a
|
|
|
|
|
|
/// specialized version of the machine name filter, anything that is usually encapsulated
|
|
|
|
|
|
/// in parenthesis would be matched on, including disc numbers, languages, editions,
|
|
|
|
|
|
/// and anything else commonly used. Please note that, unlike other existing 1G1R
|
|
|
|
|
|
/// solutions, this does not have the ability to contain custom mappings of parent
|
|
|
|
|
|
/// to clone sets based on name, nor does it have the ability to match on the
|
|
|
|
|
|
/// Release DatItem type.
|
|
|
|
|
|
/// </remarks>
|
|
|
|
|
|
internal void SetOneGamePerRegion(DatFile datFile)
|
|
|
|
|
|
{
|
|
|
|
|
|
// If we have null region list, make it empty
|
2023-04-19 16:39:58 -04:00
|
|
|
|
RegionList ??= new List<string>();
|
2021-02-01 11:43:38 -08:00
|
|
|
|
|
|
|
|
|
|
// For sake of ease, the first thing we want to do is bucket by game
|
|
|
|
|
|
datFile.Items.BucketBy(ItemKey.Machine, DedupeType.None, norename: true);
|
|
|
|
|
|
|
|
|
|
|
|
// Then we want to get a mapping of all machines to parents
|
2023-04-19 16:39:58 -04:00
|
|
|
|
Dictionary<string, List<string>> parents = new();
|
2021-02-01 11:43:38 -08:00
|
|
|
|
foreach (string key in datFile.Items.Keys)
|
|
|
|
|
|
{
|
|
|
|
|
|
DatItem item = datFile.Items[key][0];
|
|
|
|
|
|
|
|
|
|
|
|
// Match on CloneOf first
|
|
|
|
|
|
if (!string.IsNullOrEmpty(item.Machine.CloneOf))
|
|
|
|
|
|
{
|
|
|
|
|
|
if (!parents.ContainsKey(item.Machine.CloneOf.ToLowerInvariant()))
|
|
|
|
|
|
parents.Add(item.Machine.CloneOf.ToLowerInvariant(), new List<string>());
|
|
|
|
|
|
|
|
|
|
|
|
parents[item.Machine.CloneOf.ToLowerInvariant()].Add(item.Machine.Name.ToLowerInvariant());
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Then by RomOf
|
|
|
|
|
|
else if (!string.IsNullOrEmpty(item.Machine.RomOf))
|
|
|
|
|
|
{
|
|
|
|
|
|
if (!parents.ContainsKey(item.Machine.RomOf.ToLowerInvariant()))
|
|
|
|
|
|
parents.Add(item.Machine.RomOf.ToLowerInvariant(), new List<string>());
|
|
|
|
|
|
|
|
|
|
|
|
parents[item.Machine.RomOf.ToLowerInvariant()].Add(item.Machine.Name.ToLowerInvariant());
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Otherwise, treat it as a parent
|
|
|
|
|
|
else
|
|
|
|
|
|
{
|
|
|
|
|
|
if (!parents.ContainsKey(item.Machine.Name.ToLowerInvariant()))
|
|
|
|
|
|
parents.Add(item.Machine.Name.ToLowerInvariant(), new List<string>());
|
|
|
|
|
|
|
|
|
|
|
|
parents[item.Machine.Name.ToLowerInvariant()].Add(item.Machine.Name.ToLowerInvariant());
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Once we have the full list of mappings, filter out games to keep
|
|
|
|
|
|
foreach (string key in parents.Keys)
|
|
|
|
|
|
{
|
|
|
|
|
|
// Find the first machine that matches the regions in order, if possible
|
|
|
|
|
|
string machine = default;
|
|
|
|
|
|
foreach (string region in RegionList)
|
|
|
|
|
|
{
|
|
|
|
|
|
machine = parents[key].FirstOrDefault(m => Regex.IsMatch(m, @"\(.*" + region + @".*\)", RegexOptions.IgnoreCase));
|
|
|
|
|
|
if (machine != default)
|
|
|
|
|
|
break;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// If we didn't get a match, use the parent
|
|
|
|
|
|
if (machine == default)
|
|
|
|
|
|
machine = key;
|
|
|
|
|
|
|
|
|
|
|
|
// Remove the key from the list
|
|
|
|
|
|
parents[key].Remove(machine);
|
|
|
|
|
|
|
|
|
|
|
|
// Remove the rest of the items from this key
|
|
|
|
|
|
parents[key].ForEach(k => datFile.Items.Remove(k));
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// Finally, strip out the parent tags
|
|
|
|
|
|
Splitter.RemoveTagsFromChild(datFile);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
|
|
/// Ensure that all roms are in their own game (or at least try to ensure)
|
|
|
|
|
|
/// </summary>
|
|
|
|
|
|
/// <param name="datFile">Current DatFile object to run operations on</param>
|
2023-04-19 16:39:58 -04:00
|
|
|
|
internal static void SetOneRomPerGame(DatFile datFile)
|
2021-02-01 11:43:38 -08:00
|
|
|
|
{
|
|
|
|
|
|
// Because this introduces subfolders, we need to set the SuperDAT type
|
|
|
|
|
|
datFile.Header.Type = "SuperDAT";
|
|
|
|
|
|
|
|
|
|
|
|
// For each rom, we want to update the game to be "<game name>/<rom name>"
|
|
|
|
|
|
Parallel.ForEach(datFile.Items.Keys, Globals.ParallelOptions, key =>
|
|
|
|
|
|
{
|
2021-07-18 21:00:01 -07:00
|
|
|
|
ConcurrentList<DatItem> items = datFile.Items[key];
|
2021-02-01 11:43:38 -08:00
|
|
|
|
for (int i = 0; i < items.Count; i++)
|
|
|
|
|
|
{
|
2021-02-01 12:35:59 -08:00
|
|
|
|
SetOneRomPerGame(items[i]);
|
2021-02-01 11:43:38 -08:00
|
|
|
|
}
|
|
|
|
|
|
});
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2021-02-01 12:35:59 -08:00
|
|
|
|
/// <summary>
|
|
|
|
|
|
/// Set internal names to match One Rom Per Game (ORPG) logic
|
|
|
|
|
|
/// </summary>
|
|
|
|
|
|
/// <param name="datItem">DatItem to run logic on</param>
|
2023-04-19 16:39:58 -04:00
|
|
|
|
internal static void SetOneRomPerGame(DatItem datItem)
|
2021-02-01 12:35:59 -08:00
|
|
|
|
{
|
|
|
|
|
|
if (datItem.GetName() == null)
|
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
|
|
string[] splitname = datItem.GetName().Split('.');
|
|
|
|
|
|
datItem.Machine.Name += $"/{string.Join(".", splitname.Take(splitname.Length > 1 ? splitname.Length - 1 : 1))}";
|
|
|
|
|
|
datItem.SetName(Path.GetFileName(datItem.GetName()));
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2021-02-01 11:43:38 -08:00
|
|
|
|
/// <summary>
|
|
|
|
|
|
/// Strip the dates from the beginning of scene-style set names
|
|
|
|
|
|
/// </summary>
|
|
|
|
|
|
/// <param name="datFile">Current DatFile object to run operations on</param>
|
|
|
|
|
|
internal void StripSceneDatesFromItems(DatFile datFile)
|
|
|
|
|
|
{
|
|
|
|
|
|
// Output the logging statement
|
|
|
|
|
|
logger.User("Stripping scene-style dates");
|
|
|
|
|
|
|
|
|
|
|
|
// Set the regex pattern to use
|
|
|
|
|
|
string pattern = @"([0-9]{2}\.[0-9]{2}\.[0-9]{2}-)(.*?-.*?)";
|
|
|
|
|
|
|
|
|
|
|
|
// Now process all of the roms
|
|
|
|
|
|
Parallel.ForEach(datFile.Items.Keys, Globals.ParallelOptions, key =>
|
|
|
|
|
|
{
|
2021-07-18 21:00:01 -07:00
|
|
|
|
ConcurrentList<DatItem> items = datFile.Items[key];
|
2021-02-01 11:43:38 -08:00
|
|
|
|
for (int j = 0; j < items.Count; j++)
|
|
|
|
|
|
{
|
|
|
|
|
|
DatItem item = items[j];
|
|
|
|
|
|
if (Regex.IsMatch(item.Machine.Name, pattern))
|
|
|
|
|
|
item.Machine.Name = Regex.Replace(item.Machine.Name, pattern, "$2");
|
|
|
|
|
|
|
|
|
|
|
|
if (Regex.IsMatch(item.Machine.Description, pattern))
|
|
|
|
|
|
item.Machine.Description = Regex.Replace(item.Machine.Description, pattern, "$2");
|
|
|
|
|
|
|
|
|
|
|
|
items[j] = item;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
datFile.Items.Remove(key);
|
|
|
|
|
|
datFile.Items.AddRange(key, items);
|
|
|
|
|
|
});
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2020-12-13 22:06:47 -08:00
|
|
|
|
#endregion
|
2020-08-28 13:33:05 -07:00
|
|
|
|
}
|
|
|
|
|
|
}
|