Split deduplication from bucketing, add short-circuit

This commit is contained in:
Matt Nadareski
2025-01-14 20:21:54 -05:00
parent 0e67113200
commit 6e365c3f03
16 changed files with 196 additions and 181 deletions

View File

@@ -106,9 +106,15 @@ namespace SabreTools.DatTools
// Bucket and dedupe according to the flag
if (DedupeRoms == DedupeType.Full)
datFile.BucketBy(ItemKey.CRC, DedupeRoms);
{
datFile.BucketBy(ItemKey.CRC);
datFile.Deduplicate(DedupeRoms);
}
else if (DedupeRoms == DedupeType.Game)
datFile.BucketBy(ItemKey.Machine, DedupeRoms);
{
datFile.BucketBy(ItemKey.Machine);
datFile.Deduplicate(DedupeRoms);
}
// Process description to machine name
if (DescriptionAsName == true)

View File

@@ -95,7 +95,7 @@ namespace SabreTools.DatTools
try
{
// Bucket by game first
datFile.BucketBy(ItemKey.Machine, DedupeType.None);
datFile.BucketBy(ItemKey.Machine);
// Create mappings based on the extra items
var combinedMaps = CombineExtras();
@@ -158,7 +158,7 @@ namespace SabreTools.DatTools
try
{
// Bucket by game first
datFile.BucketBy(ItemKey.Machine, DedupeType.None);
datFile.BucketBy(ItemKey.Machine);
// Create mappings based on the extra items
var combinedMaps = CombineExtras();

View File

@@ -106,7 +106,7 @@ namespace SabreTools.DatTools
return success;
// Now that we have a list of depots, we want to bucket the input DAT by SHA-1
datFile.BucketBy(ItemKey.SHA1, DedupeType.None);
datFile.BucketBy(ItemKey.SHA1);
// Then we want to loop through each of the hashes and see if we can rebuild
foreach (string hash in datFile.Items.SortedKeys)
@@ -450,7 +450,7 @@ namespace SabreTools.DatTools
if (outputFormat == OutputFormat.Folder && datFile.Header.GetStringFieldValue(Models.Metadata.Header.ForcePackingKey).AsEnumValue<PackingFlag>() == PackingFlag.Partial)
{
shouldCheck = true;
datFile.BucketBy(ItemKey.Machine, DedupeType.None, lower: false);
datFile.BucketBy(ItemKey.Machine, lower: false);
}
// Now loop through the list and rebuild accordingly

View File

@@ -493,7 +493,7 @@ namespace SabreTools.DatTools
InternalStopwatch watch = new($"Splitting DAT by level");
// First, bucket by games so that we can do the right thing
datFile.BucketBy(ItemKey.Machine, DedupeType.None, lower: false, norename: true);
datFile.BucketBy(ItemKey.Machine, lower: false, norename: true);
// Create a temporary DAT to add things to
DatFile tempDat = DatFileTool.CreateDatFile(datFile.Header);
@@ -777,7 +777,7 @@ namespace SabreTools.DatTools
InternalStopwatch watch = new($"Splitting DAT by total size");
// Sort the DatFile by machine name
datFile.BucketBy(ItemKey.Machine, DedupeType.None);
datFile.BucketBy(ItemKey.Machine);
// Get the keys in a known order for easier sorting
var keys = datFile.Items.SortedKeys;

View File

@@ -4,7 +4,6 @@ using System.Linq;
using SabreTools.Core.Tools;
using SabreTools.DatFiles;
using SabreTools.DatItems;
using SabreTools.DatItems.Formats;
using SabreTools.FileTypes;
using SabreTools.FileTypes.Archives;
using SabreTools.Hashing;
@@ -55,7 +54,7 @@ namespace SabreTools.DatTools
return success;
// Now that we have a list of depots, we want to bucket the input DAT by SHA-1
datFile.BucketBy(ItemKey.SHA1, DedupeType.None);
datFile.BucketBy(ItemKey.SHA1);
// Then we want to loop through each of the hashes and see if we can rebuild
foreach (string hash in datFile.Items.SortedKeys)
@@ -139,7 +138,7 @@ namespace SabreTools.DatTools
return success;
// Now that we have a list of depots, we want to bucket the input DAT by SHA-1
datFile.BucketBy(ItemKey.SHA1, DedupeType.None);
datFile.BucketBy(ItemKey.SHA1);
// Then we want to loop through each of the hashes and see if we can rebuild
List<string> keys = [.. datFile.ItemsDB.SortedKeys];
@@ -210,9 +209,15 @@ namespace SabreTools.DatTools
// Force bucketing according to the flags
datFile.Items.SetBucketedBy(ItemKey.NULL);
if (hashOnly)
datFile.BucketBy(ItemKey.CRC, DedupeType.Full);
{
datFile.BucketBy(ItemKey.CRC);
datFile.Deduplicate(DedupeType.Full);
}
else
datFile.BucketBy(ItemKey.Machine, DedupeType.Full);
{
datFile.BucketBy(ItemKey.Machine);
datFile.Deduplicate(DedupeType.Full);
}
// Then mark items for removal
foreach (string key in datFile.Items.SortedKeys)
@@ -254,9 +259,15 @@ namespace SabreTools.DatTools
// Force bucketing according to the flags
if (hashOnly)
datFile.BucketBy(ItemKey.CRC, DedupeType.Full);
{
datFile.BucketBy(ItemKey.CRC);
datFile.Deduplicate(DedupeType.Full);
}
else
datFile.BucketBy(ItemKey.Machine, DedupeType.Full);
{
datFile.BucketBy(ItemKey.Machine);
datFile.Deduplicate(DedupeType.Full);
}
// Then mark items for removal
List<string> keys = [.. datFile.ItemsDB.SortedKeys];

View File

@@ -77,7 +77,7 @@ namespace SabreTools.DatTools
EnsureHeaderFields(datFile);
// Bucket roms by game name, if not already
datFile.BucketBy(ItemKey.Machine, DedupeType.None);
datFile.BucketBy(ItemKey.Machine);
// Output the number of items we're going to be writing
_staticLogger.User($"A total of {datFile.DatStatistics.TotalCount - datFile.DatStatistics.RemovedCount} items will be written out to '{datFile.Header.GetStringFieldValue(DatHeader.FileNameKey)}'");
@@ -138,7 +138,7 @@ namespace SabreTools.DatTools
if (diskCount + mediaCount + romCount == 0)
datFile.RecalculateStats();
datFile.BucketBy(ItemKey.Machine, DedupeType.None, norename: true);
datFile.BucketBy(ItemKey.Machine, norename: true);
datFile.DatStatistics.DisplayName = datFile.Header.GetStringFieldValue(DatHeader.FileNameKey);
datFile.DatStatistics.MachineCount = datFile.Items.SortedKeys.Length;