diff --git a/SabreTools.DatFiles/ItemDictionaryDB.cs b/SabreTools.DatFiles/ItemDictionaryDB.cs
index 5ea8a075..38cb87c5 100644
--- a/SabreTools.DatFiles/ItemDictionaryDB.cs
+++ b/SabreTools.DatFiles/ItemDictionaryDB.cs
@@ -250,47 +250,156 @@ namespace SabreTools.DatFiles
/// Update the bucketing dictionary
///
/// ItemKey enum representing how to bucket the individual items
+ /// Dedupe type that should be used
/// True if the key should be lowercased (default), false otherwise
/// True if games should only be compared on game and file name, false if system and source are counted
///
- public void UpdateBucketBy(ItemKey bucketBy, bool lower = true, bool norename = true)
+ public void BucketBy(ItemKey bucketBy, DedupeType dedupeType, bool lower = true, bool norename = true)
{
- // If the bucketing value is the same or null
- if (bucketBy == _bucketedBy || bucketBy == ItemKey.NULL)
- return;
+ // If the sorted type isn't the same, we want to sort the dictionary accordingly
+ if (_bucketedBy != bucketBy && bucketBy != ItemKey.NULL)
+ PerformBucketing(bucketBy, lower, norename);
- // Reset the bucketing values
- _bucketedBy = bucketBy;
- _buckets.Clear();
-
- // Get the current list of item indicies
- long[] itemIndicies = [.. _items.Keys];
-
-#if NET452_OR_GREATER || NETCOREAPP
- Parallel.For(0, itemIndicies.Length, Globals.ParallelOptions, i =>
-#elif NET40_OR_GREATER
- Parallel.For(0, itemIndicies.Length, i =>
-#else
- for (int i = 0; i < itemIndicies.Length; i++)
-#endif
+ // If the merge type isn't the same, we want to merge the dictionary accordingly
+ if (dedupeType != DedupeType.None)
{
- string? bucketKey = GetBucketKey(i, bucketBy);
- EnsureBucketingKey(bucketKey);
- _buckets[bucketKey].Add(i);
-#if NET40_OR_GREATER || NETCOREAPP
- });
-#else
+ PerformDeduplication(bucketBy, dedupeType);
}
-#endif
+ // If the merge type is the same, we want to sort the dictionary to be consistent
+ else
+ {
+ PerformSorting(norename);
+ }
+ }
- // Sort the buckets that have been created for consistency
- PerformSorting(norename);
+ ///
+ /// Merge an arbitrary set of item pairs based on the supplied information
+ ///
+ /// List of pairs representing the items to be merged
+ private List<(long, DatItem)> Deduplicate(List<(long, DatItem)> itemMappings)
+ {
+ // Check for null or blank roms first
+ if (itemMappings == null || !itemMappings.Any())
+ return [];
+
+ // Create output list
+ List<(long, DatItem)> output = [];
+
+ // Then deduplicate them by checking to see if data matches previous saved roms
+ int nodumpCount = 0;
+ for (int f = 0; f < itemMappings.Count; f++)
+ {
+ long itemIndex = itemMappings[f].Item1;
+ DatItem datItem = itemMappings[f].Item2;
+
+ // If we somehow have a null item, skip
+ if (datItem == null)
+ continue;
+
+ // If we don't have a Disk, File, Media, or Rom, we skip checking for duplicates
+ if (datItem is not Disk && datItem is not DatItems.Formats.File && datItem is not Media && datItem is not Rom)
+ continue;
+
+ // If it's a nodump, add and skip
+ if (datItem is Rom rom && rom.GetStringFieldValue(Models.Metadata.Rom.StatusKey).AsEnumValue() == ItemStatus.Nodump)
+ {
+ output.Add((itemIndex, datItem));
+ nodumpCount++;
+ continue;
+ }
+ else if (datItem is Disk disk && disk.GetStringFieldValue(Models.Metadata.Disk.StatusKey).AsEnumValue() == ItemStatus.Nodump)
+ {
+ output.Add((itemIndex, datItem));
+ nodumpCount++;
+ continue;
+ }
+ // If it's the first non-nodump rom in the list, don't touch it
+ else if (output.Count == 0 || output.Count == nodumpCount)
+ {
+ output.Add((itemIndex, datItem));
+ continue;
+ }
+
+ // Check if the rom is a duplicate
+ DupeType dupetype = 0x00;
+ long savedIndex = -1;
+ DatItem saveditem = new Blank();
+ int pos = -1;
+ for (int i = 0; i < output.Count; i++)
+ {
+ long lastIndex = output[i].Item1;
+ DatItem lastrom = output[i].Item2;
+
+ // Get the duplicate status
+ dupetype = datItem.GetDuplicateStatus(lastrom);
+
+ // If it's a duplicate, skip adding it to the output but add any missing information
+ if (dupetype != 0x00)
+ {
+ savedIndex = lastIndex;
+ saveditem = lastrom;
+ pos = i;
+
+ // Disks, Media, and Roms have more information to fill
+ if (datItem is Disk disk && saveditem is Disk savedDisk)
+ savedDisk.FillMissingInformation(disk);
+ else if (datItem is DatItems.Formats.File fileItem && saveditem is DatItems.Formats.File savedFile)
+ savedFile.FillMissingInformation(fileItem);
+ else if (datItem is Media media && saveditem is Media savedMedia)
+ savedMedia.FillMissingInformation(media);
+ else if (datItem is Rom romItem && saveditem is Rom savedRom)
+ savedRom.FillMissingInformation(romItem);
+
+ saveditem.SetFieldValue(DatItem.DupeTypeKey, dupetype);
+
+ // Get the machines associated with the items
+ var savedMachine = _machines[_itemToMachineMapping[savedIndex]];
+ var itemMachine = _machines[_itemToMachineMapping[itemIndex]];
+
+ // If the current system has a lower ID than the previous, set the system accordingly
+ if (datItem.GetFieldValue(DatItem.SourceKey)?.Index < saveditem.GetFieldValue(DatItem.SourceKey)?.Index)
+ {
+ datItem.SetFieldValue(DatItem.SourceKey, datItem.GetFieldValue(DatItem.SourceKey)!.Clone() as Source);
+ _machines[_itemToMachineMapping[savedIndex]] = (itemMachine.Clone() as Machine)!;
+ saveditem.SetName(datItem.GetName());
+ }
+
+ // If the current machine is a child of the new machine, use the new machine instead
+ if (savedMachine.GetStringFieldValue(Models.Metadata.Machine.CloneOfKey) == itemMachine.GetStringFieldValue(Models.Metadata.Machine.NameKey)
+ || savedMachine.GetStringFieldValue(Models.Metadata.Machine.RomOfKey) == itemMachine.GetStringFieldValue(Models.Metadata.Machine.NameKey))
+ {
+ _machines[_itemToMachineMapping[savedIndex]] = (itemMachine.Clone() as Machine)!;
+ saveditem.SetName(datItem.GetName());
+ }
+
+ break;
+ }
+ }
+
+ // If no duplicate is found, add it to the list
+ if (dupetype == 0x00)
+ {
+ output.Add((itemIndex, datItem));
+ }
+ // Otherwise, if a new rom information is found, add that
+ else
+ {
+ output.RemoveAt(pos);
+ output.Insert(pos, (savedIndex, saveditem));
+ }
+ }
+
+ return output;
}
///
/// Get the bucketing key for a given item index
+ /// Index of the current item
+ /// ItemKey value representing what key to get
+ /// True if the key should be lowercased, false otherwise
+ /// True if games should only be compared on game and file name, false if system and source are counted
///
- private string GetBucketKey(long itemIndex, ItemKey bucketBy)
+ private string GetBucketKey(long itemIndex, ItemKey bucketBy, bool lower, bool norename)
{
if (!_items.ContainsKey(itemIndex))
return string.Empty;
@@ -310,11 +419,19 @@ namespace SabreTools.DatFiles
if (machine == null)
return string.Empty;
- return bucketBy switch
+ string sourceKeyPadded = datItem.GetFieldValue(DatItem.SourceKey)?.Index.ToString().PadLeft(10, '0') + '-';
+ string machineName = machine.GetStringFieldValue(Models.Metadata.Machine.NameKey) ?? "Default";
+
+ string bucketKey = bucketBy switch
{
- ItemKey.Machine => machine.GetStringFieldValue(Models.Metadata.Machine.NameKey) ?? string.Empty,
+ ItemKey.Machine => (norename ? string.Empty : sourceKeyPadded) + machineName,
_ => GetBucketHashValue(datItem, bucketBy),
};
+
+ if (lower)
+ bucketKey = bucketKey.ToLowerInvariant();
+
+ return bucketKey;
}
///
@@ -385,6 +502,84 @@ namespace SabreTools.DatFiles
#endif
}
+ ///
+ /// Perform bucketing based on the item key provided
+ ///
+ /// ItemKey enum representing how to bucket the individual items
+ /// True if the key should be lowercased, false otherwise
+ /// True if games should only be compared on game and file name, false if system and source are counted
+ private void PerformBucketing(ItemKey bucketBy, bool lower, bool norename)
+ {
+ // Reset the bucketing values
+ _bucketedBy = bucketBy;
+ _buckets.Clear();
+
+ // Get the current list of item indicies
+ long[] itemIndicies = [.. _items.Keys];
+
+#if NET452_OR_GREATER || NETCOREAPP
+ Parallel.For(0, itemIndicies.Length, Globals.ParallelOptions, i =>
+#elif NET40_OR_GREATER
+ Parallel.For(0, itemIndicies.Length, i =>
+#else
+ for (int i = 0; i < itemIndicies.Length; i++)
+#endif
+ {
+ string? bucketKey = GetBucketKey(i, bucketBy, lower, norename);
+ EnsureBucketingKey(bucketKey);
+ _buckets[bucketKey].Add(i);
+#if NET40_OR_GREATER || NETCOREAPP
+ });
+#else
+ }
+#endif
+ }
+
+ ///
+ /// Perform deduplication based on the deduplication type provided
+ ///
+ /// ItemKey enum representing how to bucket the individual items
+ /// Dedupe type that should be used
+ private void PerformDeduplication(ItemKey bucketBy, DedupeType dedupeType)
+ {
+ // Get the current list of bucket keys
+ string[] bucketKeys = [.. _buckets.Keys];
+
+#if NET452_OR_GREATER || NETCOREAPP
+ Parallel.For(0, bucketKeys.Length, Globals.ParallelOptions, i =>
+#elif NET40_OR_GREATER
+ Parallel.For(0, bucketKeys.Length, i =>
+#else
+ for (int i = 0; i < bucketKeys.Length; i++)
+#endif
+ {
+ var itemIndices = _buckets[bucketKeys[i]];
+ if (itemIndices == null || !itemIndices.Any())
+#if NET40_OR_GREATER || NETCOREAPP
+ return;
+#else
+ continue;
+#endif
+
+ var datItems = itemIndices
+ .Where(i => _items.ContainsKey(i))
+ .Select(i => (i, _items[i]))
+ .ToList();
+
+ Sort(ref datItems, false);
+
+ // If we're merging the roms, do so
+ if (dedupeType == DedupeType.Full || (dedupeType == DedupeType.Game && bucketBy == ItemKey.Machine))
+ datItems = Deduplicate(datItems);
+
+ _buckets[bucketKeys[i]] = datItems.Select(m => m.Item1).ToConcurrentList();
+#if NET40_OR_GREATER || NETCOREAPP
+ });
+#else
+ }
+#endif
+ }
+
///
/// Sort existing buckets for consistency
///
@@ -429,9 +624,9 @@ namespace SabreTools.DatFiles
}
///
- /// Sort a list of File objects by SourceID, Game, and Name (in order)
+ /// Sort a list of item pairs by SourceID, Game, and Name (in order)
///
- /// List of File objects representing the roms to be sorted
+ /// List of pairs representing the items to be sorted
/// True if files are not renamed, false otherwise
/// True if it sorted correctly, false otherwise
private bool Sort(ref List<(long, DatItem)> itemMappings, bool norename)
@@ -481,8 +676,6 @@ namespace SabreTools.DatFiles
return true;
}
- // TODO: Write a method that deduplicates items based on any of the fields selected
-
#endregion
#region Statistics