mirror of
https://github.com/claunia/SabreTools.git
synced 2025-12-16 19:14:27 +00:00
Split deduplication from bucketing, add short-circuit
This commit is contained in:
@@ -31,7 +31,7 @@ namespace SabreTools.DatFiles.Test
|
||||
DatFile datFile = new Logiqx(datFile: null, deprecated: false);
|
||||
datFile.AddItem(datItem, statsOnly: false);
|
||||
|
||||
datFile.BucketBy(ItemKey.Machine, DedupeType.None);
|
||||
datFile.BucketBy(ItemKey.Machine);
|
||||
datFile.ExecuteFilters(filterRunner);
|
||||
|
||||
var actualDatItems = datFile.GetItemsForBucket("machine");
|
||||
@@ -61,7 +61,7 @@ namespace SabreTools.DatFiles.Test
|
||||
long machineIndex = datFile.AddMachineDB(machine);
|
||||
_ = datFile.AddItemDB(datItem, machineIndex, sourceIndex, statsOnly: false);
|
||||
|
||||
datFile.BucketBy(ItemKey.Machine, DedupeType.None);
|
||||
datFile.BucketBy(ItemKey.Machine);
|
||||
datFile.ExecuteFilters(filterRunner);
|
||||
|
||||
var actualDatItems = datFile.GetItemsForBucketDB("machine");
|
||||
@@ -142,7 +142,7 @@ namespace SabreTools.DatFiles.Test
|
||||
datFile.AddItem(rom, statsOnly: false);
|
||||
datFile.AddItem(disk, statsOnly: false);
|
||||
|
||||
datFile.BucketBy(ItemKey.Machine, DedupeType.None);
|
||||
datFile.BucketBy(ItemKey.Machine);
|
||||
datFile.SetOneRomPerGame();
|
||||
|
||||
var actualDatItems = datFile.GetItemsForBucket("machine");
|
||||
@@ -179,7 +179,7 @@ namespace SabreTools.DatFiles.Test
|
||||
_ = datFile.AddItemDB(rom, machineIndex, sourceIndex, statsOnly: false);
|
||||
_ = datFile.AddItemDB(disk, machineIndex, sourceIndex, statsOnly: false);
|
||||
|
||||
datFile.BucketBy(ItemKey.Machine, DedupeType.None);
|
||||
datFile.BucketBy(ItemKey.Machine);
|
||||
datFile.SetOneRomPerGame();
|
||||
|
||||
var actualDatItems = datFile.GetItemsForBucketDB("machine");
|
||||
|
||||
@@ -48,7 +48,7 @@ namespace SabreTools.DatFiles.Test
|
||||
datFile.AddItem(matchChildItem, statsOnly: false);
|
||||
datFile.AddItem(noMatchChildItem, statsOnly: false);
|
||||
|
||||
datFile.BucketBy(ItemKey.Machine, DedupeType.None);
|
||||
datFile.BucketBy(ItemKey.Machine);
|
||||
datFile.AddItemsFromChildren(subfolder: true, skipDedup: false);
|
||||
|
||||
Assert.Equal(2, datFile.GetItemsForBucket("parent").Count);
|
||||
@@ -93,7 +93,7 @@ namespace SabreTools.DatFiles.Test
|
||||
datFile.AddItem(matchChildItem, statsOnly: false);
|
||||
datFile.AddItem(noMatchChildItem, statsOnly: false);
|
||||
|
||||
datFile.BucketBy(ItemKey.Machine, DedupeType.None);
|
||||
datFile.BucketBy(ItemKey.Machine);
|
||||
datFile.AddItemsFromChildren(subfolder: true, skipDedup: true);
|
||||
|
||||
Assert.Equal(3, datFile.GetItemsForBucket("parent").Count);
|
||||
@@ -141,7 +141,7 @@ namespace SabreTools.DatFiles.Test
|
||||
_ = datFile.AddItemDB(matchChildItem, deviceMachineIndex, sourceIndex, statsOnly: false);
|
||||
_ = datFile.AddItemDB(noMatchChildItem, deviceMachineIndex, sourceIndex, statsOnly: false);
|
||||
|
||||
datFile.BucketBy(ItemKey.Machine, DedupeType.None);
|
||||
datFile.BucketBy(ItemKey.Machine);
|
||||
datFile.AddItemsFromChildren(subfolder: true, skipDedup: false);
|
||||
|
||||
Assert.Equal(2, datFile.GetItemsForBucketDB("parent").Count);
|
||||
@@ -189,7 +189,7 @@ namespace SabreTools.DatFiles.Test
|
||||
_ = datFile.AddItemDB(matchChildItem, deviceMachineIndex, sourceIndex, statsOnly: false);
|
||||
_ = datFile.AddItemDB(noMatchChildItem, deviceMachineIndex, sourceIndex, statsOnly: false);
|
||||
|
||||
datFile.BucketBy(ItemKey.Machine, DedupeType.None);
|
||||
datFile.BucketBy(ItemKey.Machine);
|
||||
datFile.AddItemsFromChildren(subfolder: true, skipDedup: true);
|
||||
|
||||
Assert.Equal(3, datFile.GetItemsForBucketDB("parent").Count);
|
||||
@@ -238,7 +238,7 @@ namespace SabreTools.DatFiles.Test
|
||||
datFile.AddItem(matchChildItem, statsOnly: false);
|
||||
datFile.AddItem(noMatchChildItem, statsOnly: false);
|
||||
|
||||
datFile.BucketBy(ItemKey.Machine, DedupeType.None);
|
||||
datFile.BucketBy(ItemKey.Machine);
|
||||
datFile.AddItemsFromCloneOfParent();
|
||||
|
||||
Assert.Equal(2, datFile.GetItemsForBucket("child").Count);
|
||||
@@ -286,7 +286,7 @@ namespace SabreTools.DatFiles.Test
|
||||
_ = datFile.AddItemDB(matchChildItem, deviceMachineIndex, sourceIndex, statsOnly: false);
|
||||
_ = datFile.AddItemDB(noMatchChildItem, deviceMachineIndex, sourceIndex, statsOnly: false);
|
||||
|
||||
datFile.BucketBy(ItemKey.Machine, DedupeType.None);
|
||||
datFile.BucketBy(ItemKey.Machine);
|
||||
datFile.AddItemsFromCloneOfParent();
|
||||
|
||||
Assert.Equal(2, datFile.GetItemsForBucketDB("child").Count);
|
||||
@@ -349,7 +349,7 @@ namespace SabreTools.DatFiles.Test
|
||||
datFile.AddItem(deviceRef, statsOnly: false);
|
||||
datFile.AddItem(slotOption, statsOnly: false);
|
||||
|
||||
datFile.BucketBy(ItemKey.Machine, DedupeType.None);
|
||||
datFile.BucketBy(ItemKey.Machine);
|
||||
datFile.AddItemsFromDevices(deviceOnly, useSlotOptions);
|
||||
|
||||
Assert.Equal(expected, datFile.GetItemsForBucket("machine").Count);
|
||||
@@ -402,7 +402,7 @@ namespace SabreTools.DatFiles.Test
|
||||
_ = datFile.AddItemDB(deviceRef, itemMachineIndex, sourceIndex, statsOnly: false);
|
||||
_ = datFile.AddItemDB(slotOption, itemMachineIndex, sourceIndex, statsOnly: false);
|
||||
|
||||
datFile.BucketBy(ItemKey.Machine, DedupeType.None);
|
||||
datFile.BucketBy(ItemKey.Machine);
|
||||
datFile.AddItemsFromDevices(deviceOnly, useSlotOptions);
|
||||
|
||||
Assert.Equal(expected, datFile.GetItemsForBucketDB("machine").Count);
|
||||
@@ -451,7 +451,7 @@ namespace SabreTools.DatFiles.Test
|
||||
datFile.AddItem(matchChildItem, statsOnly: false);
|
||||
datFile.AddItem(noMatchChildItem, statsOnly: false);
|
||||
|
||||
datFile.BucketBy(ItemKey.Machine, DedupeType.None);
|
||||
datFile.BucketBy(ItemKey.Machine);
|
||||
datFile.AddItemsFromRomOfParent();
|
||||
|
||||
Assert.Equal(2, datFile.GetItemsForBucket("child").Count);
|
||||
@@ -499,7 +499,7 @@ namespace SabreTools.DatFiles.Test
|
||||
_ = datFile.AddItemDB(matchChildItem, deviceMachineIndex, sourceIndex, statsOnly: false);
|
||||
_ = datFile.AddItemDB(noMatchChildItem, deviceMachineIndex, sourceIndex, statsOnly: false);
|
||||
|
||||
datFile.BucketBy(ItemKey.Machine, DedupeType.None);
|
||||
datFile.BucketBy(ItemKey.Machine);
|
||||
datFile.AddItemsFromRomOfParent();
|
||||
|
||||
Assert.Equal(2, datFile.GetItemsForBucketDB("child").Count);
|
||||
@@ -534,7 +534,7 @@ namespace SabreTools.DatFiles.Test
|
||||
datFile.AddItem(biosItem, statsOnly: false);
|
||||
datFile.AddItem(deviceItem, statsOnly: false);
|
||||
|
||||
datFile.BucketBy(ItemKey.Machine, DedupeType.None);
|
||||
datFile.BucketBy(ItemKey.Machine);
|
||||
datFile.RemoveBiosAndDeviceSets();
|
||||
|
||||
Assert.Empty(datFile.GetItemsForBucket("bios"));
|
||||
@@ -564,7 +564,7 @@ namespace SabreTools.DatFiles.Test
|
||||
_ = datFile.AddItemDB(biosItem, biosMachineIndex, sourceIndex, statsOnly: false);
|
||||
_ = datFile.AddItemDB(deviceItem, deviceMachineIndex, sourceIndex, statsOnly: false);
|
||||
|
||||
datFile.BucketBy(ItemKey.Machine, DedupeType.None);
|
||||
datFile.BucketBy(ItemKey.Machine);
|
||||
datFile.RemoveBiosAndDeviceSets();
|
||||
|
||||
Assert.Empty(datFile.GetMachinesDB());
|
||||
@@ -613,7 +613,7 @@ namespace SabreTools.DatFiles.Test
|
||||
datFile.AddItem(matchChildItem, statsOnly: false);
|
||||
datFile.AddItem(noMatchChildItem, statsOnly: false);
|
||||
|
||||
datFile.BucketBy(ItemKey.Machine, DedupeType.None);
|
||||
datFile.BucketBy(ItemKey.Machine);
|
||||
datFile.RemoveItemsFromCloneOfChild();
|
||||
|
||||
Assert.Single(datFile.GetItemsForBucket("parent"));
|
||||
@@ -666,7 +666,7 @@ namespace SabreTools.DatFiles.Test
|
||||
_ = datFile.AddItemDB(matchChildItem, deviceMachineIndex, sourceIndex, statsOnly: false);
|
||||
_ = datFile.AddItemDB(noMatchChildItem, deviceMachineIndex, sourceIndex, statsOnly: false);
|
||||
|
||||
datFile.BucketBy(ItemKey.Machine, DedupeType.None);
|
||||
datFile.BucketBy(ItemKey.Machine);
|
||||
datFile.RemoveItemsFromCloneOfChild();
|
||||
|
||||
Assert.Single(datFile.GetItemsForBucketDB("parent"));
|
||||
@@ -720,7 +720,7 @@ namespace SabreTools.DatFiles.Test
|
||||
datFile.AddItem(matchChildItem, statsOnly: false);
|
||||
datFile.AddItem(noMatchChildItem, statsOnly: false);
|
||||
|
||||
datFile.BucketBy(ItemKey.Machine, DedupeType.None);
|
||||
datFile.BucketBy(ItemKey.Machine);
|
||||
datFile.RemoveItemsFromRomOfChild();
|
||||
|
||||
Assert.Single(datFile.GetItemsForBucket("parent"));
|
||||
@@ -772,7 +772,7 @@ namespace SabreTools.DatFiles.Test
|
||||
_ = datFile.AddItemDB(matchChildItem, deviceMachineIndex, sourceIndex, statsOnly: false);
|
||||
_ = datFile.AddItemDB(noMatchChildItem, deviceMachineIndex, sourceIndex, statsOnly: false);
|
||||
|
||||
datFile.BucketBy(ItemKey.Machine, DedupeType.None);
|
||||
datFile.BucketBy(ItemKey.Machine);
|
||||
datFile.RemoveItemsFromRomOfChild();
|
||||
|
||||
Assert.Single(datFile.GetItemsForBucketDB("parent"));
|
||||
@@ -804,7 +804,7 @@ namespace SabreTools.DatFiles.Test
|
||||
DatFile datFile = new Logiqx(datFile: null, deprecated: false);
|
||||
datFile.AddItem(datItem, statsOnly: false);
|
||||
|
||||
datFile.BucketBy(ItemKey.Machine, DedupeType.None);
|
||||
datFile.BucketBy(ItemKey.Machine);
|
||||
datFile.RemoveMachineRelationshipTags();
|
||||
|
||||
DatItem actualItem = Assert.Single(datFile.GetItemsForBucket("machine"));
|
||||
@@ -833,7 +833,7 @@ namespace SabreTools.DatFiles.Test
|
||||
long sourceIndex = datFile.AddSourceDB(source);
|
||||
_ = datFile.AddItemDB(datItem, machineIndex, sourceIndex, statsOnly: false);
|
||||
|
||||
datFile.BucketBy(ItemKey.Machine, DedupeType.None);
|
||||
datFile.BucketBy(ItemKey.Machine);
|
||||
datFile.RemoveMachineRelationshipTags();
|
||||
|
||||
Machine actual = Assert.Single(datFile.GetMachinesDB()).Value;
|
||||
|
||||
@@ -47,7 +47,7 @@ namespace SabreTools.DatFiles.Test
|
||||
datFile.AddItemDB(rom, machineIndex, sourceIndex, statsOnly: false);
|
||||
|
||||
DatFile created = new Formats.Logiqx(datFile, deprecated: false);
|
||||
created.BucketBy(ItemKey.Machine, DedupeType.None);
|
||||
created.BucketBy(ItemKey.Machine);
|
||||
|
||||
Assert.NotNull(created.Header);
|
||||
Assert.Equal("name", created.Header.GetStringFieldValue(Models.Metadata.Header.NameKey));
|
||||
|
||||
@@ -322,7 +322,7 @@ namespace SabreTools.DatFiles
|
||||
private void SetOneGamePerRegionImpl(List<string> regionList)
|
||||
{
|
||||
// For sake of ease, the first thing we want to do is bucket by game
|
||||
BucketBy(ItemKey.Machine, DedupeType.None, norename: true);
|
||||
BucketBy(ItemKey.Machine, norename: true);
|
||||
|
||||
// Then we want to get a mapping of all machines to parents
|
||||
Dictionary<string, List<string>> parents = [];
|
||||
|
||||
@@ -19,7 +19,7 @@ namespace SabreTools.DatFiles
|
||||
_logger.User("Creating device non-merged sets from the DAT");
|
||||
|
||||
// For sake of ease, the first thing we want to do is bucket by game
|
||||
BucketBy(ItemKey.Machine, DedupeType.None, norename: true);
|
||||
BucketBy(ItemKey.Machine, norename: true);
|
||||
|
||||
// Now we want to loop through all of the games and set the correct information
|
||||
while (AddItemsFromDevices(false, false)) ;
|
||||
@@ -38,7 +38,7 @@ namespace SabreTools.DatFiles
|
||||
_logger.User("Creating fully merged sets from the DAT");
|
||||
|
||||
// For sake of ease, the first thing we want to do is bucket by game
|
||||
BucketBy(ItemKey.Machine, DedupeType.None, norename: true);
|
||||
BucketBy(ItemKey.Machine, norename: true);
|
||||
|
||||
// Now we want to loop through all of the games and set the correct information
|
||||
AddItemsFromChildren(true, false);
|
||||
@@ -59,7 +59,7 @@ namespace SabreTools.DatFiles
|
||||
_logger.User("Creating fully non-merged sets from the DAT");
|
||||
|
||||
// For sake of ease, the first thing we want to do is bucket by game
|
||||
BucketBy(ItemKey.Machine, DedupeType.None, norename: true);
|
||||
BucketBy(ItemKey.Machine, norename: true);
|
||||
|
||||
// Now we want to loop through all of the games and set the correct information
|
||||
while (AddItemsFromDevices(true, true)) ;
|
||||
@@ -82,7 +82,7 @@ namespace SabreTools.DatFiles
|
||||
_logger.User("Creating merged sets from the DAT");
|
||||
|
||||
// For sake of ease, the first thing we want to do is bucket by game
|
||||
BucketBy(ItemKey.Machine, DedupeType.None, norename: true);
|
||||
BucketBy(ItemKey.Machine, norename: true);
|
||||
|
||||
// Now we want to loop through all of the games and set the correct information
|
||||
AddItemsFromChildren(true, true);
|
||||
@@ -103,7 +103,7 @@ namespace SabreTools.DatFiles
|
||||
_logger.User("Creating non-merged sets from the DAT");
|
||||
|
||||
// For sake of ease, the first thing we want to do is bucket by game
|
||||
BucketBy(ItemKey.Machine, DedupeType.None, norename: true);
|
||||
BucketBy(ItemKey.Machine, norename: true);
|
||||
|
||||
// Now we want to loop through all of the games and set the correct information
|
||||
AddItemsFromCloneOfParent();
|
||||
@@ -124,7 +124,7 @@ namespace SabreTools.DatFiles
|
||||
_logger.User("Creating split sets from the DAT");
|
||||
|
||||
// For sake of ease, the first thing we want to do is bucket by game
|
||||
BucketBy(ItemKey.Machine, DedupeType.None, norename: true);
|
||||
BucketBy(ItemKey.Machine, norename: true);
|
||||
|
||||
// Now we want to loop through all of the games and set the correct information
|
||||
RemoveItemsFromCloneOfChild();
|
||||
|
||||
@@ -254,13 +254,22 @@ namespace SabreTools.DatFiles
|
||||
/// Take the arbitrarily bucketed Files Dictionary and convert to one bucketed by a user-defined method
|
||||
/// </summary>
|
||||
/// <param name="bucketBy">ItemKey enum representing how to bucket the individual items</param>
|
||||
/// <param name="dedupeType">Dedupe type that should be used</param>
|
||||
/// <param name="lower">True if the key should be lowercased (default), false otherwise</param>
|
||||
/// <param name="norename">True if games should only be compared on game and file name, false if system and source are counted</param>
|
||||
public void BucketBy(ItemKey bucketBy, DedupeType dedupeType, bool lower = true, bool norename = true)
|
||||
public void BucketBy(ItemKey bucketBy, bool lower = true, bool norename = true)
|
||||
{
|
||||
Items.BucketBy(bucketBy, dedupeType, lower, norename);
|
||||
ItemsDB.BucketBy(bucketBy, dedupeType, lower, norename);
|
||||
Items.BucketBy(bucketBy, lower, norename);
|
||||
ItemsDB.BucketBy(bucketBy, lower, norename);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Perform deduplication based on the deduplication type provided
|
||||
/// </summary>
|
||||
/// <param name="dedupeType">Dedupe type that should be used</param>
|
||||
public void Deduplicate(DedupeType dedupeType)
|
||||
{
|
||||
Items.Deduplicate(dedupeType);
|
||||
ItemsDB.Deduplicate(dedupeType);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
|
||||
@@ -461,8 +461,9 @@ namespace SabreTools.DatFiles
|
||||
if (itemFieldNames.Count > 0)
|
||||
{
|
||||
// For comparison's sake, we want to use CRC as the base bucketing
|
||||
datFile.BucketBy(ItemKey.CRC, DedupeType.Full);
|
||||
intDat.BucketBy(ItemKey.CRC, DedupeType.None);
|
||||
datFile.BucketBy(ItemKey.CRC);
|
||||
datFile.Deduplicate(DedupeType.Full);
|
||||
intDat.BucketBy(ItemKey.CRC);
|
||||
|
||||
// Then we do a hashwise comparison against the base DAT
|
||||
#if NET452_OR_GREATER || NETCOREAPP
|
||||
@@ -509,8 +510,9 @@ namespace SabreTools.DatFiles
|
||||
if (machineFieldNames.Count > 0)
|
||||
{
|
||||
// For comparison's sake, we want to use Machine Name as the base bucketing
|
||||
datFile.BucketBy(ItemKey.Machine, DedupeType.Full);
|
||||
intDat.BucketBy(ItemKey.Machine, DedupeType.None);
|
||||
datFile.BucketBy(ItemKey.Machine);
|
||||
datFile.Deduplicate(DedupeType.Full);
|
||||
intDat.BucketBy(ItemKey.Machine);
|
||||
|
||||
// Then we do a namewise comparison against the base DAT
|
||||
#if NET452_OR_GREATER || NETCOREAPP
|
||||
@@ -579,8 +581,9 @@ namespace SabreTools.DatFiles
|
||||
if (itemFieldNames.Count > 0)
|
||||
{
|
||||
// For comparison's sake, we want to use CRC as the base bucketing
|
||||
datFile.BucketBy(ItemKey.CRC, DedupeType.Full);
|
||||
intDat.BucketBy(ItemKey.CRC, DedupeType.None);
|
||||
datFile.BucketBy(ItemKey.CRC);
|
||||
datFile.Deduplicate(DedupeType.Full);
|
||||
intDat.BucketBy(ItemKey.CRC);
|
||||
|
||||
// Then we do a hashwise comparison against the base DAT
|
||||
#if NET452_OR_GREATER || NETCOREAPP
|
||||
@@ -620,8 +623,9 @@ namespace SabreTools.DatFiles
|
||||
if (machineFieldNames.Count > 0)
|
||||
{
|
||||
// For comparison's sake, we want to use Machine Name as the base bucketing
|
||||
datFile.BucketBy(ItemKey.Machine, DedupeType.Full);
|
||||
intDat.BucketBy(ItemKey.Machine, DedupeType.None);
|
||||
datFile.BucketBy(ItemKey.Machine);
|
||||
datFile.Deduplicate(DedupeType.Full);
|
||||
intDat.BucketBy(ItemKey.Machine);
|
||||
|
||||
// Then we do a namewise comparison against the base DAT
|
||||
#if NET452_OR_GREATER || NETCOREAPP
|
||||
@@ -669,19 +673,18 @@ namespace SabreTools.DatFiles
|
||||
/// <param name="useGames">True to diff using games, false to use hashes</param>
|
||||
public static void DiffAgainst(DatFile datFile, DatFile intDat, bool useGames)
|
||||
{
|
||||
// For comparison's sake, we want to use a base ordering
|
||||
if (useGames)
|
||||
datFile.BucketBy(ItemKey.Machine, DedupeType.None);
|
||||
else
|
||||
datFile.BucketBy(ItemKey.CRC, DedupeType.None);
|
||||
|
||||
InternalStopwatch watch = new($"Comparing '{intDat.Header.GetStringFieldValue(DatHeader.FileNameKey)}' to base DAT");
|
||||
|
||||
// For comparison's sake, we want to a the base bucketing
|
||||
if (useGames)
|
||||
intDat.BucketBy(ItemKey.Machine, DedupeType.None);
|
||||
{
|
||||
intDat.BucketBy(ItemKey.Machine);
|
||||
}
|
||||
else
|
||||
intDat.BucketBy(ItemKey.CRC, DedupeType.Full);
|
||||
{
|
||||
intDat.BucketBy(ItemKey.CRC);
|
||||
intDat.Deduplicate(DedupeType.Full);
|
||||
}
|
||||
|
||||
// Then we compare against the base DAT
|
||||
#if NET452_OR_GREATER || NETCOREAPP
|
||||
@@ -781,7 +784,7 @@ namespace SabreTools.DatFiles
|
||||
List<DatFile> outDats = [];
|
||||
|
||||
// Ensure the current DatFile is sorted optimally
|
||||
datFile.BucketBy(ItemKey.CRC, DedupeType.None);
|
||||
datFile.BucketBy(ItemKey.CRC);
|
||||
|
||||
// Loop through each of the inputs and get or create a new DatData object
|
||||
InternalStopwatch watch = new("Initializing and filling all output DATs");
|
||||
|
||||
@@ -417,10 +417,9 @@ namespace SabreTools.DatFiles
|
||||
/// Take the arbitrarily bucketed Files Dictionary and convert to one bucketed by a user-defined method
|
||||
/// </summary>
|
||||
/// <param name="bucketBy">ItemKey enum representing how to bucket the individual items</param>
|
||||
/// <param name="dedupeType">Dedupe type that should be used</param>
|
||||
/// <param name="lower">True if the key should be lowercased (default), false otherwise</param>
|
||||
/// <param name="norename">True if games should only be compared on game and file name, false if system and source are counted</param>
|
||||
public void BucketBy(ItemKey bucketBy, DedupeType dedupeType, bool lower = true, bool norename = true)
|
||||
public void BucketBy(ItemKey bucketBy, bool lower = true, bool norename = true)
|
||||
{
|
||||
// If we have a situation where there's no dictionary or no keys at all, we skip
|
||||
if (_items == null || _items.Count == 0)
|
||||
@@ -433,18 +432,50 @@ namespace SabreTools.DatFiles
|
||||
PerformBucketing(bucketBy, lower, norename);
|
||||
}
|
||||
|
||||
// If the merge type isn't the same, we want to merge the dictionary accordingly
|
||||
if (_mergedBy != dedupeType)
|
||||
{
|
||||
_logger.User($"Deduping roms by {dedupeType}");
|
||||
PerformDeduplication(bucketBy, dedupeType);
|
||||
}
|
||||
// If the merge type is the same, we want to sort the dictionary to be consistent
|
||||
else
|
||||
{
|
||||
// Sort the dictionary to be consistent
|
||||
_logger.User($"Sorting roms by {bucketBy}");
|
||||
PerformSorting();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Perform deduplication based on the deduplication type provided
|
||||
/// </summary>
|
||||
/// <param name="dedupeType">Dedupe type that should be used</param>
|
||||
public void Deduplicate(DedupeType dedupeType)
|
||||
{
|
||||
// Set the sorted type
|
||||
_mergedBy = dedupeType;
|
||||
|
||||
// If no deduplication is requested, just return
|
||||
if (dedupeType == DedupeType.None)
|
||||
return;
|
||||
|
||||
#if NET452_OR_GREATER || NETCOREAPP
|
||||
Parallel.ForEach(SortedKeys, Core.Globals.ParallelOptions, key =>
|
||||
#elif NET40_OR_GREATER
|
||||
Parallel.ForEach(SortedKeys, key =>
|
||||
#else
|
||||
foreach (var key in SortedKeys)
|
||||
#endif
|
||||
{
|
||||
// Get the possibly unsorted list
|
||||
List<DatItem> sortedList = GetItemsForBucket(key);
|
||||
|
||||
// Sort the list of items to be consistent
|
||||
Sort(ref sortedList, false);
|
||||
|
||||
// If we're merging the roms, do so
|
||||
if (dedupeType == DedupeType.Full || (dedupeType == DedupeType.Game && _bucketedBy == ItemKey.Machine))
|
||||
sortedList = DatFileTool.Merge(sortedList);
|
||||
|
||||
// Add the list back to the dictionary
|
||||
RemoveBucket(key);
|
||||
sortedList.ForEach(item => AddItem(key, item));
|
||||
#if NET40_OR_GREATER || NETCOREAPP
|
||||
});
|
||||
#else
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
@@ -638,44 +669,6 @@ namespace SabreTools.DatFiles
|
||||
#endif
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Perform deduplication based on the deduplication type provided
|
||||
/// </summary>
|
||||
/// <param name="bucketBy">ItemKey enum representing how to bucket the individual items</param>
|
||||
/// <param name="dedupeType">Dedupe type that should be used</param>
|
||||
private void PerformDeduplication(ItemKey bucketBy, DedupeType dedupeType)
|
||||
{
|
||||
// Set the sorted type
|
||||
_mergedBy = dedupeType;
|
||||
|
||||
#if NET452_OR_GREATER || NETCOREAPP
|
||||
Parallel.ForEach(SortedKeys, Core.Globals.ParallelOptions, key =>
|
||||
#elif NET40_OR_GREATER
|
||||
Parallel.ForEach(SortedKeys, key =>
|
||||
#else
|
||||
foreach (var key in SortedKeys)
|
||||
#endif
|
||||
{
|
||||
// Get the possibly unsorted list
|
||||
List<DatItem> sortedList = GetItemsForBucket(key);
|
||||
|
||||
// Sort the list of items to be consistent
|
||||
Sort(ref sortedList, false);
|
||||
|
||||
// If we're merging the roms, do so
|
||||
if (dedupeType == DedupeType.Full || (dedupeType == DedupeType.Game && bucketBy == ItemKey.Machine))
|
||||
sortedList = DatFileTool.Merge(sortedList);
|
||||
|
||||
// Add the list back to the dictionary
|
||||
RemoveBucket(key);
|
||||
sortedList.ForEach(item => AddItem(key, item));
|
||||
#if NET40_OR_GREATER || NETCOREAPP
|
||||
});
|
||||
#else
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Perform inplace sorting of the dictionary
|
||||
/// </summary>
|
||||
@@ -772,7 +765,7 @@ namespace SabreTools.DatFiles
|
||||
{
|
||||
// If we're not already sorted, take care of it
|
||||
if (!sorted)
|
||||
BucketBy(GetBestAvailable(), DedupeType.None);
|
||||
BucketBy(GetBestAvailable());
|
||||
|
||||
// Now that we have the sorted type, we get the proper key
|
||||
return GetBucketKey(datItem, _bucketedBy, lower: true, norename: true);
|
||||
|
||||
@@ -693,11 +693,10 @@ namespace SabreTools.DatFiles
|
||||
/// Update the bucketing dictionary
|
||||
/// </summary>
|
||||
/// <param name="bucketBy">ItemKey enum representing how to bucket the individual items</param>
|
||||
/// <param name="dedupeType">Dedupe type that should be used</param>
|
||||
/// <param name="lower">True if the key should be lowercased (default), false otherwise</param>
|
||||
/// <param name="norename">True if games should only be compared on game and file name, false if system and source are counted</param>
|
||||
/// <returns></returns>
|
||||
public void BucketBy(ItemKey bucketBy, DedupeType dedupeType, bool lower = true, bool norename = true)
|
||||
public void BucketBy(ItemKey bucketBy, bool lower = true, bool norename = true)
|
||||
{
|
||||
// If the sorted type isn't the same, we want to sort the dictionary accordingly
|
||||
if (_bucketedBy != bucketBy && bucketBy != ItemKey.NULL)
|
||||
@@ -706,18 +705,60 @@ namespace SabreTools.DatFiles
|
||||
PerformBucketing(bucketBy, lower, norename);
|
||||
}
|
||||
|
||||
// If the merge type isn't the same, we want to merge the dictionary accordingly
|
||||
if (dedupeType != DedupeType.None)
|
||||
{
|
||||
_logger.User($"Deduping roms by {dedupeType}");
|
||||
PerformDeduplication(bucketBy, dedupeType);
|
||||
}
|
||||
// If the merge type is the same, we want to sort the dictionary to be consistent
|
||||
else
|
||||
{
|
||||
// Sort the dictionary to be consistent
|
||||
_logger.User($"Sorting roms by {bucketBy}");
|
||||
PerformSorting(norename);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Perform deduplication based on the deduplication type provided
|
||||
/// </summary>
|
||||
/// <param name="dedupeType">Dedupe type that should be used</param>
|
||||
public void Deduplicate(DedupeType dedupeType)
|
||||
{
|
||||
// If no deduplication is requested, just return
|
||||
if (dedupeType == DedupeType.None)
|
||||
return;
|
||||
|
||||
// Get the current list of bucket keys
|
||||
string[] bucketKeys = [.. _buckets.Keys];
|
||||
|
||||
#if NET452_OR_GREATER || NETCOREAPP
|
||||
Parallel.For(0, bucketKeys.Length, Core.Globals.ParallelOptions, i =>
|
||||
#elif NET40_OR_GREATER
|
||||
Parallel.For(0, bucketKeys.Length, i =>
|
||||
#else
|
||||
for (int i = 0; i < bucketKeys.Length; i++)
|
||||
#endif
|
||||
{
|
||||
#if NET40_OR_GREATER || NETCOREAPP
|
||||
if (!_buckets.TryGetValue(bucketKeys[i], out var itemIndices))
|
||||
return;
|
||||
#else
|
||||
var itemIndices = _buckets[bucketKeys[i]];
|
||||
#endif
|
||||
|
||||
if (itemIndices == null || itemIndices.Count == 0)
|
||||
return;
|
||||
|
||||
var datItems = itemIndices
|
||||
.FindAll(i => _items.ContainsKey(i))
|
||||
.Select(i => new KeyValuePair<long, DatItem>(i, _items[i]))
|
||||
.ToList();
|
||||
|
||||
Sort(ref datItems, false);
|
||||
|
||||
// If we're merging the roms, do so
|
||||
if (dedupeType == DedupeType.Full || (dedupeType == DedupeType.Game && _bucketedBy == ItemKey.Machine))
|
||||
datItems = Merge(datItems);
|
||||
|
||||
#if NET40_OR_GREATER || NETCOREAPP
|
||||
_buckets.TryAdd(bucketKeys[i], [.. datItems.Select(kvp => kvp.Key)]);
|
||||
});
|
||||
#else
|
||||
_buckets[bucketKeys[i]] = [.. datItems.Select(kvp => kvp.Key)];
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
@@ -1034,54 +1075,6 @@ namespace SabreTools.DatFiles
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Perform deduplication based on the deduplication type provided
|
||||
/// </summary>
|
||||
/// <param name="bucketBy">ItemKey enum representing how to bucket the individual items</param>
|
||||
/// <param name="dedupeType">Dedupe type that should be used</param>
|
||||
private void PerformDeduplication(ItemKey bucketBy, DedupeType dedupeType)
|
||||
{
|
||||
// Get the current list of bucket keys
|
||||
string[] bucketKeys = [.. _buckets.Keys];
|
||||
|
||||
#if NET452_OR_GREATER || NETCOREAPP
|
||||
Parallel.For(0, bucketKeys.Length, Core.Globals.ParallelOptions, i =>
|
||||
#elif NET40_OR_GREATER
|
||||
Parallel.For(0, bucketKeys.Length, i =>
|
||||
#else
|
||||
for (int i = 0; i < bucketKeys.Length; i++)
|
||||
#endif
|
||||
{
|
||||
#if NET40_OR_GREATER || NETCOREAPP
|
||||
if (!_buckets.TryGetValue(bucketKeys[i], out var itemIndices))
|
||||
return;
|
||||
#else
|
||||
var itemIndices = _buckets[bucketKeys[i]];
|
||||
#endif
|
||||
|
||||
if (itemIndices == null || itemIndices.Count == 0)
|
||||
return;
|
||||
|
||||
var datItems = itemIndices
|
||||
.FindAll(i => _items.ContainsKey(i))
|
||||
.Select(i => new KeyValuePair<long, DatItem>(i, _items[i]))
|
||||
.ToList();
|
||||
|
||||
Sort(ref datItems, false);
|
||||
|
||||
// If we're merging the roms, do so
|
||||
if (dedupeType == DedupeType.Full || (dedupeType == DedupeType.Game && bucketBy == ItemKey.Machine))
|
||||
datItems = Merge(datItems);
|
||||
|
||||
#if NET40_OR_GREATER || NETCOREAPP
|
||||
_buckets.TryAdd(bucketKeys[i], [.. datItems.Select(kvp => kvp.Key)]);
|
||||
});
|
||||
#else
|
||||
_buckets[bucketKeys[i]] = [.. datItems.Select(kvp => kvp.Key)];
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Sort existing buckets for consistency
|
||||
/// </summary>
|
||||
@@ -1197,7 +1190,7 @@ namespace SabreTools.DatFiles
|
||||
{
|
||||
// If we're not already sorted, take care of it
|
||||
if (!sorted)
|
||||
BucketBy(GetBestAvailable(), DedupeType.None);
|
||||
BucketBy(GetBestAvailable());
|
||||
|
||||
// Now that we have the sorted type, we get the proper key
|
||||
return GetBucketKey(datItem.Key, _bucketedBy, lower: true, norename: true);
|
||||
|
||||
@@ -106,9 +106,15 @@ namespace SabreTools.DatTools
|
||||
|
||||
// Bucket and dedupe according to the flag
|
||||
if (DedupeRoms == DedupeType.Full)
|
||||
datFile.BucketBy(ItemKey.CRC, DedupeRoms);
|
||||
{
|
||||
datFile.BucketBy(ItemKey.CRC);
|
||||
datFile.Deduplicate(DedupeRoms);
|
||||
}
|
||||
else if (DedupeRoms == DedupeType.Game)
|
||||
datFile.BucketBy(ItemKey.Machine, DedupeRoms);
|
||||
{
|
||||
datFile.BucketBy(ItemKey.Machine);
|
||||
datFile.Deduplicate(DedupeRoms);
|
||||
}
|
||||
|
||||
// Process description to machine name
|
||||
if (DescriptionAsName == true)
|
||||
|
||||
@@ -95,7 +95,7 @@ namespace SabreTools.DatTools
|
||||
try
|
||||
{
|
||||
// Bucket by game first
|
||||
datFile.BucketBy(ItemKey.Machine, DedupeType.None);
|
||||
datFile.BucketBy(ItemKey.Machine);
|
||||
|
||||
// Create mappings based on the extra items
|
||||
var combinedMaps = CombineExtras();
|
||||
@@ -158,7 +158,7 @@ namespace SabreTools.DatTools
|
||||
try
|
||||
{
|
||||
// Bucket by game first
|
||||
datFile.BucketBy(ItemKey.Machine, DedupeType.None);
|
||||
datFile.BucketBy(ItemKey.Machine);
|
||||
|
||||
// Create mappings based on the extra items
|
||||
var combinedMaps = CombineExtras();
|
||||
|
||||
@@ -106,7 +106,7 @@ namespace SabreTools.DatTools
|
||||
return success;
|
||||
|
||||
// Now that we have a list of depots, we want to bucket the input DAT by SHA-1
|
||||
datFile.BucketBy(ItemKey.SHA1, DedupeType.None);
|
||||
datFile.BucketBy(ItemKey.SHA1);
|
||||
|
||||
// Then we want to loop through each of the hashes and see if we can rebuild
|
||||
foreach (string hash in datFile.Items.SortedKeys)
|
||||
@@ -450,7 +450,7 @@ namespace SabreTools.DatTools
|
||||
if (outputFormat == OutputFormat.Folder && datFile.Header.GetStringFieldValue(Models.Metadata.Header.ForcePackingKey).AsEnumValue<PackingFlag>() == PackingFlag.Partial)
|
||||
{
|
||||
shouldCheck = true;
|
||||
datFile.BucketBy(ItemKey.Machine, DedupeType.None, lower: false);
|
||||
datFile.BucketBy(ItemKey.Machine, lower: false);
|
||||
}
|
||||
|
||||
// Now loop through the list and rebuild accordingly
|
||||
|
||||
@@ -493,7 +493,7 @@ namespace SabreTools.DatTools
|
||||
InternalStopwatch watch = new($"Splitting DAT by level");
|
||||
|
||||
// First, bucket by games so that we can do the right thing
|
||||
datFile.BucketBy(ItemKey.Machine, DedupeType.None, lower: false, norename: true);
|
||||
datFile.BucketBy(ItemKey.Machine, lower: false, norename: true);
|
||||
|
||||
// Create a temporary DAT to add things to
|
||||
DatFile tempDat = DatFileTool.CreateDatFile(datFile.Header);
|
||||
@@ -777,7 +777,7 @@ namespace SabreTools.DatTools
|
||||
InternalStopwatch watch = new($"Splitting DAT by total size");
|
||||
|
||||
// Sort the DatFile by machine name
|
||||
datFile.BucketBy(ItemKey.Machine, DedupeType.None);
|
||||
datFile.BucketBy(ItemKey.Machine);
|
||||
|
||||
// Get the keys in a known order for easier sorting
|
||||
var keys = datFile.Items.SortedKeys;
|
||||
|
||||
@@ -4,7 +4,6 @@ using System.Linq;
|
||||
using SabreTools.Core.Tools;
|
||||
using SabreTools.DatFiles;
|
||||
using SabreTools.DatItems;
|
||||
using SabreTools.DatItems.Formats;
|
||||
using SabreTools.FileTypes;
|
||||
using SabreTools.FileTypes.Archives;
|
||||
using SabreTools.Hashing;
|
||||
@@ -55,7 +54,7 @@ namespace SabreTools.DatTools
|
||||
return success;
|
||||
|
||||
// Now that we have a list of depots, we want to bucket the input DAT by SHA-1
|
||||
datFile.BucketBy(ItemKey.SHA1, DedupeType.None);
|
||||
datFile.BucketBy(ItemKey.SHA1);
|
||||
|
||||
// Then we want to loop through each of the hashes and see if we can rebuild
|
||||
foreach (string hash in datFile.Items.SortedKeys)
|
||||
@@ -139,7 +138,7 @@ namespace SabreTools.DatTools
|
||||
return success;
|
||||
|
||||
// Now that we have a list of depots, we want to bucket the input DAT by SHA-1
|
||||
datFile.BucketBy(ItemKey.SHA1, DedupeType.None);
|
||||
datFile.BucketBy(ItemKey.SHA1);
|
||||
|
||||
// Then we want to loop through each of the hashes and see if we can rebuild
|
||||
List<string> keys = [.. datFile.ItemsDB.SortedKeys];
|
||||
@@ -210,9 +209,15 @@ namespace SabreTools.DatTools
|
||||
// Force bucketing according to the flags
|
||||
datFile.Items.SetBucketedBy(ItemKey.NULL);
|
||||
if (hashOnly)
|
||||
datFile.BucketBy(ItemKey.CRC, DedupeType.Full);
|
||||
{
|
||||
datFile.BucketBy(ItemKey.CRC);
|
||||
datFile.Deduplicate(DedupeType.Full);
|
||||
}
|
||||
else
|
||||
datFile.BucketBy(ItemKey.Machine, DedupeType.Full);
|
||||
{
|
||||
datFile.BucketBy(ItemKey.Machine);
|
||||
datFile.Deduplicate(DedupeType.Full);
|
||||
}
|
||||
|
||||
// Then mark items for removal
|
||||
foreach (string key in datFile.Items.SortedKeys)
|
||||
@@ -254,9 +259,15 @@ namespace SabreTools.DatTools
|
||||
|
||||
// Force bucketing according to the flags
|
||||
if (hashOnly)
|
||||
datFile.BucketBy(ItemKey.CRC, DedupeType.Full);
|
||||
{
|
||||
datFile.BucketBy(ItemKey.CRC);
|
||||
datFile.Deduplicate(DedupeType.Full);
|
||||
}
|
||||
else
|
||||
datFile.BucketBy(ItemKey.Machine, DedupeType.Full);
|
||||
{
|
||||
datFile.BucketBy(ItemKey.Machine);
|
||||
datFile.Deduplicate(DedupeType.Full);
|
||||
}
|
||||
|
||||
// Then mark items for removal
|
||||
List<string> keys = [.. datFile.ItemsDB.SortedKeys];
|
||||
|
||||
@@ -77,7 +77,7 @@ namespace SabreTools.DatTools
|
||||
EnsureHeaderFields(datFile);
|
||||
|
||||
// Bucket roms by game name, if not already
|
||||
datFile.BucketBy(ItemKey.Machine, DedupeType.None);
|
||||
datFile.BucketBy(ItemKey.Machine);
|
||||
|
||||
// Output the number of items we're going to be writing
|
||||
_staticLogger.User($"A total of {datFile.DatStatistics.TotalCount - datFile.DatStatistics.RemovedCount} items will be written out to '{datFile.Header.GetStringFieldValue(DatHeader.FileNameKey)}'");
|
||||
@@ -138,7 +138,7 @@ namespace SabreTools.DatTools
|
||||
if (diskCount + mediaCount + romCount == 0)
|
||||
datFile.RecalculateStats();
|
||||
|
||||
datFile.BucketBy(ItemKey.Machine, DedupeType.None, norename: true);
|
||||
datFile.BucketBy(ItemKey.Machine, norename: true);
|
||||
|
||||
datFile.DatStatistics.DisplayName = datFile.Header.GetStringFieldValue(DatHeader.FileNameKey);
|
||||
datFile.DatStatistics.MachineCount = datFile.Items.SortedKeys.Length;
|
||||
|
||||
@@ -57,7 +57,7 @@ namespace SabreTools.Test.DatFiles
|
||||
dict.AddItem(rom3, statsOnly: false);
|
||||
dict.AddItem(rom4, statsOnly: false);
|
||||
|
||||
dict.BucketBy(itemKey, DedupeType.None);
|
||||
dict.BucketBy(itemKey);
|
||||
Assert.Equal(expected, dict.SortedKeys.Length);
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user