mirror of
https://github.com/claunia/SabreTools.git
synced 2025-12-16 19:14:27 +00:00
Overhaul bucketing and add deduplication to IDDB
This commit is contained in:
@@ -250,47 +250,156 @@ namespace SabreTools.DatFiles
|
|||||||
/// Update the bucketing dictionary
|
/// Update the bucketing dictionary
|
||||||
/// </summary>
|
/// </summary>
|
||||||
/// <param name="bucketBy">ItemKey enum representing how to bucket the individual items</param>
|
/// <param name="bucketBy">ItemKey enum representing how to bucket the individual items</param>
|
||||||
|
/// <param name="dedupeType">Dedupe type that should be used</param>
|
||||||
/// <param name="lower">True if the key should be lowercased (default), false otherwise</param>
|
/// <param name="lower">True if the key should be lowercased (default), false otherwise</param>
|
||||||
/// <param name="norename">True if games should only be compared on game and file name, false if system and source are counted</param>
|
/// <param name="norename">True if games should only be compared on game and file name, false if system and source are counted</param>
|
||||||
/// <returns></returns>
|
/// <returns></returns>
|
||||||
public void UpdateBucketBy(ItemKey bucketBy, bool lower = true, bool norename = true)
|
public void BucketBy(ItemKey bucketBy, DedupeType dedupeType, bool lower = true, bool norename = true)
|
||||||
{
|
{
|
||||||
// If the bucketing value is the same or null
|
// If the sorted type isn't the same, we want to sort the dictionary accordingly
|
||||||
if (bucketBy == _bucketedBy || bucketBy == ItemKey.NULL)
|
if (_bucketedBy != bucketBy && bucketBy != ItemKey.NULL)
|
||||||
return;
|
PerformBucketing(bucketBy, lower, norename);
|
||||||
|
|
||||||
// Reset the bucketing values
|
// If the merge type isn't the same, we want to merge the dictionary accordingly
|
||||||
_bucketedBy = bucketBy;
|
if (dedupeType != DedupeType.None)
|
||||||
_buckets.Clear();
|
|
||||||
|
|
||||||
// Get the current list of item indicies
|
|
||||||
long[] itemIndicies = [.. _items.Keys];
|
|
||||||
|
|
||||||
#if NET452_OR_GREATER || NETCOREAPP
|
|
||||||
Parallel.For(0, itemIndicies.Length, Globals.ParallelOptions, i =>
|
|
||||||
#elif NET40_OR_GREATER
|
|
||||||
Parallel.For(0, itemIndicies.Length, i =>
|
|
||||||
#else
|
|
||||||
for (int i = 0; i < itemIndicies.Length; i++)
|
|
||||||
#endif
|
|
||||||
{
|
{
|
||||||
string? bucketKey = GetBucketKey(i, bucketBy);
|
PerformDeduplication(bucketBy, dedupeType);
|
||||||
EnsureBucketingKey(bucketKey);
|
|
||||||
_buckets[bucketKey].Add(i);
|
|
||||||
#if NET40_OR_GREATER || NETCOREAPP
|
|
||||||
});
|
|
||||||
#else
|
|
||||||
}
|
}
|
||||||
#endif
|
// If the merge type is the same, we want to sort the dictionary to be consistent
|
||||||
|
else
|
||||||
|
{
|
||||||
|
PerformSorting(norename);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Sort the buckets that have been created for consistency
|
/// <summary>
|
||||||
PerformSorting(norename);
|
/// Merge an arbitrary set of item pairs based on the supplied information
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="itemMappings">List of pairs representing the items to be merged</param>
|
||||||
|
private List<(long, DatItem)> Deduplicate(List<(long, DatItem)> itemMappings)
|
||||||
|
{
|
||||||
|
// Check for null or blank roms first
|
||||||
|
if (itemMappings == null || !itemMappings.Any())
|
||||||
|
return [];
|
||||||
|
|
||||||
|
// Create output list
|
||||||
|
List<(long, DatItem)> output = [];
|
||||||
|
|
||||||
|
// Then deduplicate them by checking to see if data matches previous saved roms
|
||||||
|
int nodumpCount = 0;
|
||||||
|
for (int f = 0; f < itemMappings.Count; f++)
|
||||||
|
{
|
||||||
|
long itemIndex = itemMappings[f].Item1;
|
||||||
|
DatItem datItem = itemMappings[f].Item2;
|
||||||
|
|
||||||
|
// If we somehow have a null item, skip
|
||||||
|
if (datItem == null)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
// If we don't have a Disk, File, Media, or Rom, we skip checking for duplicates
|
||||||
|
if (datItem is not Disk && datItem is not DatItems.Formats.File && datItem is not Media && datItem is not Rom)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
// If it's a nodump, add and skip
|
||||||
|
if (datItem is Rom rom && rom.GetStringFieldValue(Models.Metadata.Rom.StatusKey).AsEnumValue<ItemStatus>() == ItemStatus.Nodump)
|
||||||
|
{
|
||||||
|
output.Add((itemIndex, datItem));
|
||||||
|
nodumpCount++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
else if (datItem is Disk disk && disk.GetStringFieldValue(Models.Metadata.Disk.StatusKey).AsEnumValue<ItemStatus>() == ItemStatus.Nodump)
|
||||||
|
{
|
||||||
|
output.Add((itemIndex, datItem));
|
||||||
|
nodumpCount++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
// If it's the first non-nodump rom in the list, don't touch it
|
||||||
|
else if (output.Count == 0 || output.Count == nodumpCount)
|
||||||
|
{
|
||||||
|
output.Add((itemIndex, datItem));
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if the rom is a duplicate
|
||||||
|
DupeType dupetype = 0x00;
|
||||||
|
long savedIndex = -1;
|
||||||
|
DatItem saveditem = new Blank();
|
||||||
|
int pos = -1;
|
||||||
|
for (int i = 0; i < output.Count; i++)
|
||||||
|
{
|
||||||
|
long lastIndex = output[i].Item1;
|
||||||
|
DatItem lastrom = output[i].Item2;
|
||||||
|
|
||||||
|
// Get the duplicate status
|
||||||
|
dupetype = datItem.GetDuplicateStatus(lastrom);
|
||||||
|
|
||||||
|
// If it's a duplicate, skip adding it to the output but add any missing information
|
||||||
|
if (dupetype != 0x00)
|
||||||
|
{
|
||||||
|
savedIndex = lastIndex;
|
||||||
|
saveditem = lastrom;
|
||||||
|
pos = i;
|
||||||
|
|
||||||
|
// Disks, Media, and Roms have more information to fill
|
||||||
|
if (datItem is Disk disk && saveditem is Disk savedDisk)
|
||||||
|
savedDisk.FillMissingInformation(disk);
|
||||||
|
else if (datItem is DatItems.Formats.File fileItem && saveditem is DatItems.Formats.File savedFile)
|
||||||
|
savedFile.FillMissingInformation(fileItem);
|
||||||
|
else if (datItem is Media media && saveditem is Media savedMedia)
|
||||||
|
savedMedia.FillMissingInformation(media);
|
||||||
|
else if (datItem is Rom romItem && saveditem is Rom savedRom)
|
||||||
|
savedRom.FillMissingInformation(romItem);
|
||||||
|
|
||||||
|
saveditem.SetFieldValue<DupeType>(DatItem.DupeTypeKey, dupetype);
|
||||||
|
|
||||||
|
// Get the machines associated with the items
|
||||||
|
var savedMachine = _machines[_itemToMachineMapping[savedIndex]];
|
||||||
|
var itemMachine = _machines[_itemToMachineMapping[itemIndex]];
|
||||||
|
|
||||||
|
// If the current system has a lower ID than the previous, set the system accordingly
|
||||||
|
if (datItem.GetFieldValue<Source?>(DatItem.SourceKey)?.Index < saveditem.GetFieldValue<Source?>(DatItem.SourceKey)?.Index)
|
||||||
|
{
|
||||||
|
datItem.SetFieldValue<Source?>(DatItem.SourceKey, datItem.GetFieldValue<Source?>(DatItem.SourceKey)!.Clone() as Source);
|
||||||
|
_machines[_itemToMachineMapping[savedIndex]] = (itemMachine.Clone() as Machine)!;
|
||||||
|
saveditem.SetName(datItem.GetName());
|
||||||
|
}
|
||||||
|
|
||||||
|
// If the current machine is a child of the new machine, use the new machine instead
|
||||||
|
if (savedMachine.GetStringFieldValue(Models.Metadata.Machine.CloneOfKey) == itemMachine.GetStringFieldValue(Models.Metadata.Machine.NameKey)
|
||||||
|
|| savedMachine.GetStringFieldValue(Models.Metadata.Machine.RomOfKey) == itemMachine.GetStringFieldValue(Models.Metadata.Machine.NameKey))
|
||||||
|
{
|
||||||
|
_machines[_itemToMachineMapping[savedIndex]] = (itemMachine.Clone() as Machine)!;
|
||||||
|
saveditem.SetName(datItem.GetName());
|
||||||
|
}
|
||||||
|
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// If no duplicate is found, add it to the list
|
||||||
|
if (dupetype == 0x00)
|
||||||
|
{
|
||||||
|
output.Add((itemIndex, datItem));
|
||||||
|
}
|
||||||
|
// Otherwise, if a new rom information is found, add that
|
||||||
|
else
|
||||||
|
{
|
||||||
|
output.RemoveAt(pos);
|
||||||
|
output.Insert(pos, (savedIndex, saveditem));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return output;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Get the bucketing key for a given item index
|
/// Get the bucketing key for a given item index
|
||||||
|
/// <param name="itemIndex">Index of the current item</param>
|
||||||
|
/// <param name="bucketBy">ItemKey value representing what key to get</param>
|
||||||
|
/// <param name="lower">True if the key should be lowercased, false otherwise</param>
|
||||||
|
/// <param name="norename">True if games should only be compared on game and file name, false if system and source are counted</param>
|
||||||
/// </summary>
|
/// </summary>
|
||||||
private string GetBucketKey(long itemIndex, ItemKey bucketBy)
|
private string GetBucketKey(long itemIndex, ItemKey bucketBy, bool lower, bool norename)
|
||||||
{
|
{
|
||||||
if (!_items.ContainsKey(itemIndex))
|
if (!_items.ContainsKey(itemIndex))
|
||||||
return string.Empty;
|
return string.Empty;
|
||||||
@@ -310,11 +419,19 @@ namespace SabreTools.DatFiles
|
|||||||
if (machine == null)
|
if (machine == null)
|
||||||
return string.Empty;
|
return string.Empty;
|
||||||
|
|
||||||
return bucketBy switch
|
string sourceKeyPadded = datItem.GetFieldValue<Source?>(DatItem.SourceKey)?.Index.ToString().PadLeft(10, '0') + '-';
|
||||||
|
string machineName = machine.GetStringFieldValue(Models.Metadata.Machine.NameKey) ?? "Default";
|
||||||
|
|
||||||
|
string bucketKey = bucketBy switch
|
||||||
{
|
{
|
||||||
ItemKey.Machine => machine.GetStringFieldValue(Models.Metadata.Machine.NameKey) ?? string.Empty,
|
ItemKey.Machine => (norename ? string.Empty : sourceKeyPadded) + machineName,
|
||||||
_ => GetBucketHashValue(datItem, bucketBy),
|
_ => GetBucketHashValue(datItem, bucketBy),
|
||||||
};
|
};
|
||||||
|
|
||||||
|
if (lower)
|
||||||
|
bucketKey = bucketKey.ToLowerInvariant();
|
||||||
|
|
||||||
|
return bucketKey;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
@@ -385,6 +502,84 @@ namespace SabreTools.DatFiles
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Perform bucketing based on the item key provided
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="bucketBy">ItemKey enum representing how to bucket the individual items</param>
|
||||||
|
/// <param name="lower">True if the key should be lowercased, false otherwise</param>
|
||||||
|
/// <param name="norename">True if games should only be compared on game and file name, false if system and source are counted</param>
|
||||||
|
private void PerformBucketing(ItemKey bucketBy, bool lower, bool norename)
|
||||||
|
{
|
||||||
|
// Reset the bucketing values
|
||||||
|
_bucketedBy = bucketBy;
|
||||||
|
_buckets.Clear();
|
||||||
|
|
||||||
|
// Get the current list of item indicies
|
||||||
|
long[] itemIndicies = [.. _items.Keys];
|
||||||
|
|
||||||
|
#if NET452_OR_GREATER || NETCOREAPP
|
||||||
|
Parallel.For(0, itemIndicies.Length, Globals.ParallelOptions, i =>
|
||||||
|
#elif NET40_OR_GREATER
|
||||||
|
Parallel.For(0, itemIndicies.Length, i =>
|
||||||
|
#else
|
||||||
|
for (int i = 0; i < itemIndicies.Length; i++)
|
||||||
|
#endif
|
||||||
|
{
|
||||||
|
string? bucketKey = GetBucketKey(i, bucketBy, lower, norename);
|
||||||
|
EnsureBucketingKey(bucketKey);
|
||||||
|
_buckets[bucketKey].Add(i);
|
||||||
|
#if NET40_OR_GREATER || NETCOREAPP
|
||||||
|
});
|
||||||
|
#else
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Perform deduplication based on the deduplication type provided
|
||||||
|
/// </summary>
|
||||||
|
/// <param name="bucketBy">ItemKey enum representing how to bucket the individual items</param>
|
||||||
|
/// <param name="dedupeType">Dedupe type that should be used</param>
|
||||||
|
private void PerformDeduplication(ItemKey bucketBy, DedupeType dedupeType)
|
||||||
|
{
|
||||||
|
// Get the current list of bucket keys
|
||||||
|
string[] bucketKeys = [.. _buckets.Keys];
|
||||||
|
|
||||||
|
#if NET452_OR_GREATER || NETCOREAPP
|
||||||
|
Parallel.For(0, bucketKeys.Length, Globals.ParallelOptions, i =>
|
||||||
|
#elif NET40_OR_GREATER
|
||||||
|
Parallel.For(0, bucketKeys.Length, i =>
|
||||||
|
#else
|
||||||
|
for (int i = 0; i < bucketKeys.Length; i++)
|
||||||
|
#endif
|
||||||
|
{
|
||||||
|
var itemIndices = _buckets[bucketKeys[i]];
|
||||||
|
if (itemIndices == null || !itemIndices.Any())
|
||||||
|
#if NET40_OR_GREATER || NETCOREAPP
|
||||||
|
return;
|
||||||
|
#else
|
||||||
|
continue;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
var datItems = itemIndices
|
||||||
|
.Where(i => _items.ContainsKey(i))
|
||||||
|
.Select(i => (i, _items[i]))
|
||||||
|
.ToList();
|
||||||
|
|
||||||
|
Sort(ref datItems, false);
|
||||||
|
|
||||||
|
// If we're merging the roms, do so
|
||||||
|
if (dedupeType == DedupeType.Full || (dedupeType == DedupeType.Game && bucketBy == ItemKey.Machine))
|
||||||
|
datItems = Deduplicate(datItems);
|
||||||
|
|
||||||
|
_buckets[bucketKeys[i]] = datItems.Select(m => m.Item1).ToConcurrentList();
|
||||||
|
#if NET40_OR_GREATER || NETCOREAPP
|
||||||
|
});
|
||||||
|
#else
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Sort existing buckets for consistency
|
/// Sort existing buckets for consistency
|
||||||
/// </summary>
|
/// </summary>
|
||||||
@@ -429,9 +624,9 @@ namespace SabreTools.DatFiles
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// Sort a list of File objects by SourceID, Game, and Name (in order)
|
/// Sort a list of item pairs by SourceID, Game, and Name (in order)
|
||||||
/// </summary>
|
/// </summary>
|
||||||
/// <param name="itemMappings">List of File objects representing the roms to be sorted</param>
|
/// <param name="itemMappings">List of pairs representing the items to be sorted</param>
|
||||||
/// <param name="norename">True if files are not renamed, false otherwise</param>
|
/// <param name="norename">True if files are not renamed, false otherwise</param>
|
||||||
/// <returns>True if it sorted correctly, false otherwise</returns>
|
/// <returns>True if it sorted correctly, false otherwise</returns>
|
||||||
private bool Sort(ref List<(long, DatItem)> itemMappings, bool norename)
|
private bool Sort(ref List<(long, DatItem)> itemMappings, bool norename)
|
||||||
@@ -481,8 +676,6 @@ namespace SabreTools.DatFiles
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: Write a method that deduplicates items based on any of the fields selected
|
|
||||||
|
|
||||||
#endregion
|
#endregion
|
||||||
|
|
||||||
#region Statistics
|
#region Statistics
|
||||||
|
|||||||
Reference in New Issue
Block a user