diff --git a/SabreTools.DatFiles.Test/DatFileTests.Filtering.cs b/SabreTools.DatFiles.Test/DatFileTests.Filtering.cs index 54ee7ba3..4fd3906d 100644 --- a/SabreTools.DatFiles.Test/DatFileTests.Filtering.cs +++ b/SabreTools.DatFiles.Test/DatFileTests.Filtering.cs @@ -31,7 +31,7 @@ namespace SabreTools.DatFiles.Test DatFile datFile = new Logiqx(datFile: null, deprecated: false); datFile.AddItem(datItem, statsOnly: false); - datFile.BucketBy(ItemKey.Machine, DedupeType.None); + datFile.BucketBy(ItemKey.Machine); datFile.ExecuteFilters(filterRunner); var actualDatItems = datFile.GetItemsForBucket("machine"); @@ -61,7 +61,7 @@ namespace SabreTools.DatFiles.Test long machineIndex = datFile.AddMachineDB(machine); _ = datFile.AddItemDB(datItem, machineIndex, sourceIndex, statsOnly: false); - datFile.BucketBy(ItemKey.Machine, DedupeType.None); + datFile.BucketBy(ItemKey.Machine); datFile.ExecuteFilters(filterRunner); var actualDatItems = datFile.GetItemsForBucketDB("machine"); @@ -142,7 +142,7 @@ namespace SabreTools.DatFiles.Test datFile.AddItem(rom, statsOnly: false); datFile.AddItem(disk, statsOnly: false); - datFile.BucketBy(ItemKey.Machine, DedupeType.None); + datFile.BucketBy(ItemKey.Machine); datFile.SetOneRomPerGame(); var actualDatItems = datFile.GetItemsForBucket("machine"); @@ -179,7 +179,7 @@ namespace SabreTools.DatFiles.Test _ = datFile.AddItemDB(rom, machineIndex, sourceIndex, statsOnly: false); _ = datFile.AddItemDB(disk, machineIndex, sourceIndex, statsOnly: false); - datFile.BucketBy(ItemKey.Machine, DedupeType.None); + datFile.BucketBy(ItemKey.Machine); datFile.SetOneRomPerGame(); var actualDatItems = datFile.GetItemsForBucketDB("machine"); diff --git a/SabreTools.DatFiles.Test/DatFileTests.Splitting.cs b/SabreTools.DatFiles.Test/DatFileTests.Splitting.cs index f9c496d5..90d259ea 100644 --- a/SabreTools.DatFiles.Test/DatFileTests.Splitting.cs +++ b/SabreTools.DatFiles.Test/DatFileTests.Splitting.cs @@ -48,7 +48,7 @@ namespace SabreTools.DatFiles.Test datFile.AddItem(matchChildItem, statsOnly: false); datFile.AddItem(noMatchChildItem, statsOnly: false); - datFile.BucketBy(ItemKey.Machine, DedupeType.None); + datFile.BucketBy(ItemKey.Machine); datFile.AddItemsFromChildren(subfolder: true, skipDedup: false); Assert.Equal(2, datFile.GetItemsForBucket("parent").Count); @@ -93,7 +93,7 @@ namespace SabreTools.DatFiles.Test datFile.AddItem(matchChildItem, statsOnly: false); datFile.AddItem(noMatchChildItem, statsOnly: false); - datFile.BucketBy(ItemKey.Machine, DedupeType.None); + datFile.BucketBy(ItemKey.Machine); datFile.AddItemsFromChildren(subfolder: true, skipDedup: true); Assert.Equal(3, datFile.GetItemsForBucket("parent").Count); @@ -141,7 +141,7 @@ namespace SabreTools.DatFiles.Test _ = datFile.AddItemDB(matchChildItem, deviceMachineIndex, sourceIndex, statsOnly: false); _ = datFile.AddItemDB(noMatchChildItem, deviceMachineIndex, sourceIndex, statsOnly: false); - datFile.BucketBy(ItemKey.Machine, DedupeType.None); + datFile.BucketBy(ItemKey.Machine); datFile.AddItemsFromChildren(subfolder: true, skipDedup: false); Assert.Equal(2, datFile.GetItemsForBucketDB("parent").Count); @@ -189,7 +189,7 @@ namespace SabreTools.DatFiles.Test _ = datFile.AddItemDB(matchChildItem, deviceMachineIndex, sourceIndex, statsOnly: false); _ = datFile.AddItemDB(noMatchChildItem, deviceMachineIndex, sourceIndex, statsOnly: false); - datFile.BucketBy(ItemKey.Machine, DedupeType.None); + datFile.BucketBy(ItemKey.Machine); datFile.AddItemsFromChildren(subfolder: true, skipDedup: true); Assert.Equal(3, datFile.GetItemsForBucketDB("parent").Count); @@ -238,7 +238,7 @@ namespace SabreTools.DatFiles.Test datFile.AddItem(matchChildItem, statsOnly: false); datFile.AddItem(noMatchChildItem, statsOnly: false); - datFile.BucketBy(ItemKey.Machine, DedupeType.None); + datFile.BucketBy(ItemKey.Machine); datFile.AddItemsFromCloneOfParent(); Assert.Equal(2, datFile.GetItemsForBucket("child").Count); @@ -286,7 +286,7 @@ namespace SabreTools.DatFiles.Test _ = datFile.AddItemDB(matchChildItem, deviceMachineIndex, sourceIndex, statsOnly: false); _ = datFile.AddItemDB(noMatchChildItem, deviceMachineIndex, sourceIndex, statsOnly: false); - datFile.BucketBy(ItemKey.Machine, DedupeType.None); + datFile.BucketBy(ItemKey.Machine); datFile.AddItemsFromCloneOfParent(); Assert.Equal(2, datFile.GetItemsForBucketDB("child").Count); @@ -349,7 +349,7 @@ namespace SabreTools.DatFiles.Test datFile.AddItem(deviceRef, statsOnly: false); datFile.AddItem(slotOption, statsOnly: false); - datFile.BucketBy(ItemKey.Machine, DedupeType.None); + datFile.BucketBy(ItemKey.Machine); datFile.AddItemsFromDevices(deviceOnly, useSlotOptions); Assert.Equal(expected, datFile.GetItemsForBucket("machine").Count); @@ -402,7 +402,7 @@ namespace SabreTools.DatFiles.Test _ = datFile.AddItemDB(deviceRef, itemMachineIndex, sourceIndex, statsOnly: false); _ = datFile.AddItemDB(slotOption, itemMachineIndex, sourceIndex, statsOnly: false); - datFile.BucketBy(ItemKey.Machine, DedupeType.None); + datFile.BucketBy(ItemKey.Machine); datFile.AddItemsFromDevices(deviceOnly, useSlotOptions); Assert.Equal(expected, datFile.GetItemsForBucketDB("machine").Count); @@ -451,7 +451,7 @@ namespace SabreTools.DatFiles.Test datFile.AddItem(matchChildItem, statsOnly: false); datFile.AddItem(noMatchChildItem, statsOnly: false); - datFile.BucketBy(ItemKey.Machine, DedupeType.None); + datFile.BucketBy(ItemKey.Machine); datFile.AddItemsFromRomOfParent(); Assert.Equal(2, datFile.GetItemsForBucket("child").Count); @@ -499,7 +499,7 @@ namespace SabreTools.DatFiles.Test _ = datFile.AddItemDB(matchChildItem, deviceMachineIndex, sourceIndex, statsOnly: false); _ = datFile.AddItemDB(noMatchChildItem, deviceMachineIndex, sourceIndex, statsOnly: false); - datFile.BucketBy(ItemKey.Machine, DedupeType.None); + datFile.BucketBy(ItemKey.Machine); datFile.AddItemsFromRomOfParent(); Assert.Equal(2, datFile.GetItemsForBucketDB("child").Count); @@ -534,7 +534,7 @@ namespace SabreTools.DatFiles.Test datFile.AddItem(biosItem, statsOnly: false); datFile.AddItem(deviceItem, statsOnly: false); - datFile.BucketBy(ItemKey.Machine, DedupeType.None); + datFile.BucketBy(ItemKey.Machine); datFile.RemoveBiosAndDeviceSets(); Assert.Empty(datFile.GetItemsForBucket("bios")); @@ -564,7 +564,7 @@ namespace SabreTools.DatFiles.Test _ = datFile.AddItemDB(biosItem, biosMachineIndex, sourceIndex, statsOnly: false); _ = datFile.AddItemDB(deviceItem, deviceMachineIndex, sourceIndex, statsOnly: false); - datFile.BucketBy(ItemKey.Machine, DedupeType.None); + datFile.BucketBy(ItemKey.Machine); datFile.RemoveBiosAndDeviceSets(); Assert.Empty(datFile.GetMachinesDB()); @@ -613,7 +613,7 @@ namespace SabreTools.DatFiles.Test datFile.AddItem(matchChildItem, statsOnly: false); datFile.AddItem(noMatchChildItem, statsOnly: false); - datFile.BucketBy(ItemKey.Machine, DedupeType.None); + datFile.BucketBy(ItemKey.Machine); datFile.RemoveItemsFromCloneOfChild(); Assert.Single(datFile.GetItemsForBucket("parent")); @@ -666,7 +666,7 @@ namespace SabreTools.DatFiles.Test _ = datFile.AddItemDB(matchChildItem, deviceMachineIndex, sourceIndex, statsOnly: false); _ = datFile.AddItemDB(noMatchChildItem, deviceMachineIndex, sourceIndex, statsOnly: false); - datFile.BucketBy(ItemKey.Machine, DedupeType.None); + datFile.BucketBy(ItemKey.Machine); datFile.RemoveItemsFromCloneOfChild(); Assert.Single(datFile.GetItemsForBucketDB("parent")); @@ -720,7 +720,7 @@ namespace SabreTools.DatFiles.Test datFile.AddItem(matchChildItem, statsOnly: false); datFile.AddItem(noMatchChildItem, statsOnly: false); - datFile.BucketBy(ItemKey.Machine, DedupeType.None); + datFile.BucketBy(ItemKey.Machine); datFile.RemoveItemsFromRomOfChild(); Assert.Single(datFile.GetItemsForBucket("parent")); @@ -772,7 +772,7 @@ namespace SabreTools.DatFiles.Test _ = datFile.AddItemDB(matchChildItem, deviceMachineIndex, sourceIndex, statsOnly: false); _ = datFile.AddItemDB(noMatchChildItem, deviceMachineIndex, sourceIndex, statsOnly: false); - datFile.BucketBy(ItemKey.Machine, DedupeType.None); + datFile.BucketBy(ItemKey.Machine); datFile.RemoveItemsFromRomOfChild(); Assert.Single(datFile.GetItemsForBucketDB("parent")); @@ -804,7 +804,7 @@ namespace SabreTools.DatFiles.Test DatFile datFile = new Logiqx(datFile: null, deprecated: false); datFile.AddItem(datItem, statsOnly: false); - datFile.BucketBy(ItemKey.Machine, DedupeType.None); + datFile.BucketBy(ItemKey.Machine); datFile.RemoveMachineRelationshipTags(); DatItem actualItem = Assert.Single(datFile.GetItemsForBucket("machine")); @@ -833,7 +833,7 @@ namespace SabreTools.DatFiles.Test long sourceIndex = datFile.AddSourceDB(source); _ = datFile.AddItemDB(datItem, machineIndex, sourceIndex, statsOnly: false); - datFile.BucketBy(ItemKey.Machine, DedupeType.None); + datFile.BucketBy(ItemKey.Machine); datFile.RemoveMachineRelationshipTags(); Machine actual = Assert.Single(datFile.GetMachinesDB()).Value; diff --git a/SabreTools.DatFiles.Test/DatFileTests.cs b/SabreTools.DatFiles.Test/DatFileTests.cs index cc48153b..960ed013 100644 --- a/SabreTools.DatFiles.Test/DatFileTests.cs +++ b/SabreTools.DatFiles.Test/DatFileTests.cs @@ -47,7 +47,7 @@ namespace SabreTools.DatFiles.Test datFile.AddItemDB(rom, machineIndex, sourceIndex, statsOnly: false); DatFile created = new Formats.Logiqx(datFile, deprecated: false); - created.BucketBy(ItemKey.Machine, DedupeType.None); + created.BucketBy(ItemKey.Machine); Assert.NotNull(created.Header); Assert.Equal("name", created.Header.GetStringFieldValue(Models.Metadata.Header.NameKey)); diff --git a/SabreTools.DatFiles/DatFile.Filtering.cs b/SabreTools.DatFiles/DatFile.Filtering.cs index 2211a486..6838f1e4 100644 --- a/SabreTools.DatFiles/DatFile.Filtering.cs +++ b/SabreTools.DatFiles/DatFile.Filtering.cs @@ -322,7 +322,7 @@ namespace SabreTools.DatFiles private void SetOneGamePerRegionImpl(List regionList) { // For sake of ease, the first thing we want to do is bucket by game - BucketBy(ItemKey.Machine, DedupeType.None, norename: true); + BucketBy(ItemKey.Machine, norename: true); // Then we want to get a mapping of all machines to parents Dictionary> parents = []; diff --git a/SabreTools.DatFiles/DatFile.Splitting.cs b/SabreTools.DatFiles/DatFile.Splitting.cs index 30259faf..d3228667 100644 --- a/SabreTools.DatFiles/DatFile.Splitting.cs +++ b/SabreTools.DatFiles/DatFile.Splitting.cs @@ -19,7 +19,7 @@ namespace SabreTools.DatFiles _logger.User("Creating device non-merged sets from the DAT"); // For sake of ease, the first thing we want to do is bucket by game - BucketBy(ItemKey.Machine, DedupeType.None, norename: true); + BucketBy(ItemKey.Machine, norename: true); // Now we want to loop through all of the games and set the correct information while (AddItemsFromDevices(false, false)) ; @@ -38,7 +38,7 @@ namespace SabreTools.DatFiles _logger.User("Creating fully merged sets from the DAT"); // For sake of ease, the first thing we want to do is bucket by game - BucketBy(ItemKey.Machine, DedupeType.None, norename: true); + BucketBy(ItemKey.Machine, norename: true); // Now we want to loop through all of the games and set the correct information AddItemsFromChildren(true, false); @@ -59,7 +59,7 @@ namespace SabreTools.DatFiles _logger.User("Creating fully non-merged sets from the DAT"); // For sake of ease, the first thing we want to do is bucket by game - BucketBy(ItemKey.Machine, DedupeType.None, norename: true); + BucketBy(ItemKey.Machine, norename: true); // Now we want to loop through all of the games and set the correct information while (AddItemsFromDevices(true, true)) ; @@ -82,7 +82,7 @@ namespace SabreTools.DatFiles _logger.User("Creating merged sets from the DAT"); // For sake of ease, the first thing we want to do is bucket by game - BucketBy(ItemKey.Machine, DedupeType.None, norename: true); + BucketBy(ItemKey.Machine, norename: true); // Now we want to loop through all of the games and set the correct information AddItemsFromChildren(true, true); @@ -103,7 +103,7 @@ namespace SabreTools.DatFiles _logger.User("Creating non-merged sets from the DAT"); // For sake of ease, the first thing we want to do is bucket by game - BucketBy(ItemKey.Machine, DedupeType.None, norename: true); + BucketBy(ItemKey.Machine, norename: true); // Now we want to loop through all of the games and set the correct information AddItemsFromCloneOfParent(); @@ -124,7 +124,7 @@ namespace SabreTools.DatFiles _logger.User("Creating split sets from the DAT"); // For sake of ease, the first thing we want to do is bucket by game - BucketBy(ItemKey.Machine, DedupeType.None, norename: true); + BucketBy(ItemKey.Machine, norename: true); // Now we want to loop through all of the games and set the correct information RemoveItemsFromCloneOfChild(); diff --git a/SabreTools.DatFiles/DatFile.cs b/SabreTools.DatFiles/DatFile.cs index 8ae389d5..03f41249 100644 --- a/SabreTools.DatFiles/DatFile.cs +++ b/SabreTools.DatFiles/DatFile.cs @@ -254,13 +254,22 @@ namespace SabreTools.DatFiles /// Take the arbitrarily bucketed Files Dictionary and convert to one bucketed by a user-defined method /// /// ItemKey enum representing how to bucket the individual items - /// Dedupe type that should be used /// True if the key should be lowercased (default), false otherwise /// True if games should only be compared on game and file name, false if system and source are counted - public void BucketBy(ItemKey bucketBy, DedupeType dedupeType, bool lower = true, bool norename = true) + public void BucketBy(ItemKey bucketBy, bool lower = true, bool norename = true) { - Items.BucketBy(bucketBy, dedupeType, lower, norename); - ItemsDB.BucketBy(bucketBy, dedupeType, lower, norename); + Items.BucketBy(bucketBy, lower, norename); + ItemsDB.BucketBy(bucketBy, lower, norename); + } + + /// + /// Perform deduplication based on the deduplication type provided + /// + /// Dedupe type that should be used + public void Deduplicate(DedupeType dedupeType) + { + Items.Deduplicate(dedupeType); + ItemsDB.Deduplicate(dedupeType); } /// diff --git a/SabreTools.DatFiles/DatFileTool.cs b/SabreTools.DatFiles/DatFileTool.cs index 5ef7bb62..08f078bd 100644 --- a/SabreTools.DatFiles/DatFileTool.cs +++ b/SabreTools.DatFiles/DatFileTool.cs @@ -461,8 +461,9 @@ namespace SabreTools.DatFiles if (itemFieldNames.Count > 0) { // For comparison's sake, we want to use CRC as the base bucketing - datFile.BucketBy(ItemKey.CRC, DedupeType.Full); - intDat.BucketBy(ItemKey.CRC, DedupeType.None); + datFile.BucketBy(ItemKey.CRC); + datFile.Deduplicate(DedupeType.Full); + intDat.BucketBy(ItemKey.CRC); // Then we do a hashwise comparison against the base DAT #if NET452_OR_GREATER || NETCOREAPP @@ -509,8 +510,9 @@ namespace SabreTools.DatFiles if (machineFieldNames.Count > 0) { // For comparison's sake, we want to use Machine Name as the base bucketing - datFile.BucketBy(ItemKey.Machine, DedupeType.Full); - intDat.BucketBy(ItemKey.Machine, DedupeType.None); + datFile.BucketBy(ItemKey.Machine); + datFile.Deduplicate(DedupeType.Full); + intDat.BucketBy(ItemKey.Machine); // Then we do a namewise comparison against the base DAT #if NET452_OR_GREATER || NETCOREAPP @@ -579,8 +581,9 @@ namespace SabreTools.DatFiles if (itemFieldNames.Count > 0) { // For comparison's sake, we want to use CRC as the base bucketing - datFile.BucketBy(ItemKey.CRC, DedupeType.Full); - intDat.BucketBy(ItemKey.CRC, DedupeType.None); + datFile.BucketBy(ItemKey.CRC); + datFile.Deduplicate(DedupeType.Full); + intDat.BucketBy(ItemKey.CRC); // Then we do a hashwise comparison against the base DAT #if NET452_OR_GREATER || NETCOREAPP @@ -620,8 +623,9 @@ namespace SabreTools.DatFiles if (machineFieldNames.Count > 0) { // For comparison's sake, we want to use Machine Name as the base bucketing - datFile.BucketBy(ItemKey.Machine, DedupeType.Full); - intDat.BucketBy(ItemKey.Machine, DedupeType.None); + datFile.BucketBy(ItemKey.Machine); + datFile.Deduplicate(DedupeType.Full); + intDat.BucketBy(ItemKey.Machine); // Then we do a namewise comparison against the base DAT #if NET452_OR_GREATER || NETCOREAPP @@ -669,19 +673,18 @@ namespace SabreTools.DatFiles /// True to diff using games, false to use hashes public static void DiffAgainst(DatFile datFile, DatFile intDat, bool useGames) { - // For comparison's sake, we want to use a base ordering - if (useGames) - datFile.BucketBy(ItemKey.Machine, DedupeType.None); - else - datFile.BucketBy(ItemKey.CRC, DedupeType.None); - InternalStopwatch watch = new($"Comparing '{intDat.Header.GetStringFieldValue(DatHeader.FileNameKey)}' to base DAT"); // For comparison's sake, we want to a the base bucketing if (useGames) - intDat.BucketBy(ItemKey.Machine, DedupeType.None); + { + intDat.BucketBy(ItemKey.Machine); + } else - intDat.BucketBy(ItemKey.CRC, DedupeType.Full); + { + intDat.BucketBy(ItemKey.CRC); + intDat.Deduplicate(DedupeType.Full); + } // Then we compare against the base DAT #if NET452_OR_GREATER || NETCOREAPP @@ -781,7 +784,7 @@ namespace SabreTools.DatFiles List outDats = []; // Ensure the current DatFile is sorted optimally - datFile.BucketBy(ItemKey.CRC, DedupeType.None); + datFile.BucketBy(ItemKey.CRC); // Loop through each of the inputs and get or create a new DatData object InternalStopwatch watch = new("Initializing and filling all output DATs"); diff --git a/SabreTools.DatFiles/ItemDictionary.cs b/SabreTools.DatFiles/ItemDictionary.cs index d8589229..1b18d23f 100644 --- a/SabreTools.DatFiles/ItemDictionary.cs +++ b/SabreTools.DatFiles/ItemDictionary.cs @@ -417,10 +417,9 @@ namespace SabreTools.DatFiles /// Take the arbitrarily bucketed Files Dictionary and convert to one bucketed by a user-defined method /// /// ItemKey enum representing how to bucket the individual items - /// Dedupe type that should be used /// True if the key should be lowercased (default), false otherwise /// True if games should only be compared on game and file name, false if system and source are counted - public void BucketBy(ItemKey bucketBy, DedupeType dedupeType, bool lower = true, bool norename = true) + public void BucketBy(ItemKey bucketBy, bool lower = true, bool norename = true) { // If we have a situation where there's no dictionary or no keys at all, we skip if (_items == null || _items.Count == 0) @@ -433,18 +432,50 @@ namespace SabreTools.DatFiles PerformBucketing(bucketBy, lower, norename); } - // If the merge type isn't the same, we want to merge the dictionary accordingly - if (_mergedBy != dedupeType) + // Sort the dictionary to be consistent + _logger.User($"Sorting roms by {bucketBy}"); + PerformSorting(); + } + + /// + /// Perform deduplication based on the deduplication type provided + /// + /// Dedupe type that should be used + public void Deduplicate(DedupeType dedupeType) + { + // Set the sorted type + _mergedBy = dedupeType; + + // If no deduplication is requested, just return + if (dedupeType == DedupeType.None) + return; + +#if NET452_OR_GREATER || NETCOREAPP + Parallel.ForEach(SortedKeys, Core.Globals.ParallelOptions, key => +#elif NET40_OR_GREATER + Parallel.ForEach(SortedKeys, key => +#else + foreach (var key in SortedKeys) +#endif { - _logger.User($"Deduping roms by {dedupeType}"); - PerformDeduplication(bucketBy, dedupeType); - } - // If the merge type is the same, we want to sort the dictionary to be consistent - else - { - _logger.User($"Sorting roms by {bucketBy}"); - PerformSorting(); + // Get the possibly unsorted list + List sortedList = GetItemsForBucket(key); + + // Sort the list of items to be consistent + Sort(ref sortedList, false); + + // If we're merging the roms, do so + if (dedupeType == DedupeType.Full || (dedupeType == DedupeType.Game && _bucketedBy == ItemKey.Machine)) + sortedList = DatFileTool.Merge(sortedList); + + // Add the list back to the dictionary + RemoveBucket(key); + sortedList.ForEach(item => AddItem(key, item)); +#if NET40_OR_GREATER || NETCOREAPP + }); +#else } +#endif } /// @@ -638,44 +669,6 @@ namespace SabreTools.DatFiles #endif } - /// - /// Perform deduplication based on the deduplication type provided - /// - /// ItemKey enum representing how to bucket the individual items - /// Dedupe type that should be used - private void PerformDeduplication(ItemKey bucketBy, DedupeType dedupeType) - { - // Set the sorted type - _mergedBy = dedupeType; - -#if NET452_OR_GREATER || NETCOREAPP - Parallel.ForEach(SortedKeys, Core.Globals.ParallelOptions, key => -#elif NET40_OR_GREATER - Parallel.ForEach(SortedKeys, key => -#else - foreach (var key in SortedKeys) -#endif - { - // Get the possibly unsorted list - List sortedList = GetItemsForBucket(key); - - // Sort the list of items to be consistent - Sort(ref sortedList, false); - - // If we're merging the roms, do so - if (dedupeType == DedupeType.Full || (dedupeType == DedupeType.Game && bucketBy == ItemKey.Machine)) - sortedList = DatFileTool.Merge(sortedList); - - // Add the list back to the dictionary - RemoveBucket(key); - sortedList.ForEach(item => AddItem(key, item)); -#if NET40_OR_GREATER || NETCOREAPP - }); -#else - } -#endif - } - /// /// Perform inplace sorting of the dictionary /// @@ -772,7 +765,7 @@ namespace SabreTools.DatFiles { // If we're not already sorted, take care of it if (!sorted) - BucketBy(GetBestAvailable(), DedupeType.None); + BucketBy(GetBestAvailable()); // Now that we have the sorted type, we get the proper key return GetBucketKey(datItem, _bucketedBy, lower: true, norename: true); diff --git a/SabreTools.DatFiles/ItemDictionaryDB.cs b/SabreTools.DatFiles/ItemDictionaryDB.cs index 38aead22..94312e94 100644 --- a/SabreTools.DatFiles/ItemDictionaryDB.cs +++ b/SabreTools.DatFiles/ItemDictionaryDB.cs @@ -693,11 +693,10 @@ namespace SabreTools.DatFiles /// Update the bucketing dictionary /// /// ItemKey enum representing how to bucket the individual items - /// Dedupe type that should be used /// True if the key should be lowercased (default), false otherwise /// True if games should only be compared on game and file name, false if system and source are counted /// - public void BucketBy(ItemKey bucketBy, DedupeType dedupeType, bool lower = true, bool norename = true) + public void BucketBy(ItemKey bucketBy, bool lower = true, bool norename = true) { // If the sorted type isn't the same, we want to sort the dictionary accordingly if (_bucketedBy != bucketBy && bucketBy != ItemKey.NULL) @@ -706,18 +705,60 @@ namespace SabreTools.DatFiles PerformBucketing(bucketBy, lower, norename); } - // If the merge type isn't the same, we want to merge the dictionary accordingly - if (dedupeType != DedupeType.None) + // Sort the dictionary to be consistent + _logger.User($"Sorting roms by {bucketBy}"); + PerformSorting(norename); + } + + /// + /// Perform deduplication based on the deduplication type provided + /// + /// Dedupe type that should be used + public void Deduplicate(DedupeType dedupeType) + { + // If no deduplication is requested, just return + if (dedupeType == DedupeType.None) + return; + + // Get the current list of bucket keys + string[] bucketKeys = [.. _buckets.Keys]; + +#if NET452_OR_GREATER || NETCOREAPP + Parallel.For(0, bucketKeys.Length, Core.Globals.ParallelOptions, i => +#elif NET40_OR_GREATER + Parallel.For(0, bucketKeys.Length, i => +#else + for (int i = 0; i < bucketKeys.Length; i++) +#endif { - _logger.User($"Deduping roms by {dedupeType}"); - PerformDeduplication(bucketBy, dedupeType); - } - // If the merge type is the same, we want to sort the dictionary to be consistent - else - { - _logger.User($"Sorting roms by {bucketBy}"); - PerformSorting(norename); +#if NET40_OR_GREATER || NETCOREAPP + if (!_buckets.TryGetValue(bucketKeys[i], out var itemIndices)) + return; +#else + var itemIndices = _buckets[bucketKeys[i]]; +#endif + + if (itemIndices == null || itemIndices.Count == 0) + return; + + var datItems = itemIndices + .FindAll(i => _items.ContainsKey(i)) + .Select(i => new KeyValuePair(i, _items[i])) + .ToList(); + + Sort(ref datItems, false); + + // If we're merging the roms, do so + if (dedupeType == DedupeType.Full || (dedupeType == DedupeType.Game && _bucketedBy == ItemKey.Machine)) + datItems = Merge(datItems); + +#if NET40_OR_GREATER || NETCOREAPP + _buckets.TryAdd(bucketKeys[i], [.. datItems.Select(kvp => kvp.Key)]); + }); +#else + _buckets[bucketKeys[i]] = [.. datItems.Select(kvp => kvp.Key)]; } +#endif } /// @@ -1034,54 +1075,6 @@ namespace SabreTools.DatFiles } } - /// - /// Perform deduplication based on the deduplication type provided - /// - /// ItemKey enum representing how to bucket the individual items - /// Dedupe type that should be used - private void PerformDeduplication(ItemKey bucketBy, DedupeType dedupeType) - { - // Get the current list of bucket keys - string[] bucketKeys = [.. _buckets.Keys]; - -#if NET452_OR_GREATER || NETCOREAPP - Parallel.For(0, bucketKeys.Length, Core.Globals.ParallelOptions, i => -#elif NET40_OR_GREATER - Parallel.For(0, bucketKeys.Length, i => -#else - for (int i = 0; i < bucketKeys.Length; i++) -#endif - { -#if NET40_OR_GREATER || NETCOREAPP - if (!_buckets.TryGetValue(bucketKeys[i], out var itemIndices)) - return; -#else - var itemIndices = _buckets[bucketKeys[i]]; -#endif - - if (itemIndices == null || itemIndices.Count == 0) - return; - - var datItems = itemIndices - .FindAll(i => _items.ContainsKey(i)) - .Select(i => new KeyValuePair(i, _items[i])) - .ToList(); - - Sort(ref datItems, false); - - // If we're merging the roms, do so - if (dedupeType == DedupeType.Full || (dedupeType == DedupeType.Game && bucketBy == ItemKey.Machine)) - datItems = Merge(datItems); - -#if NET40_OR_GREATER || NETCOREAPP - _buckets.TryAdd(bucketKeys[i], [.. datItems.Select(kvp => kvp.Key)]); - }); -#else - _buckets[bucketKeys[i]] = [.. datItems.Select(kvp => kvp.Key)]; - } -#endif - } - /// /// Sort existing buckets for consistency /// @@ -1197,7 +1190,7 @@ namespace SabreTools.DatFiles { // If we're not already sorted, take care of it if (!sorted) - BucketBy(GetBestAvailable(), DedupeType.None); + BucketBy(GetBestAvailable()); // Now that we have the sorted type, we get the proper key return GetBucketKey(datItem.Key, _bucketedBy, lower: true, norename: true); diff --git a/SabreTools.DatTools/Cleaner.cs b/SabreTools.DatTools/Cleaner.cs index 16f626aa..a4a06c06 100644 --- a/SabreTools.DatTools/Cleaner.cs +++ b/SabreTools.DatTools/Cleaner.cs @@ -106,9 +106,15 @@ namespace SabreTools.DatTools // Bucket and dedupe according to the flag if (DedupeRoms == DedupeType.Full) - datFile.BucketBy(ItemKey.CRC, DedupeRoms); + { + datFile.BucketBy(ItemKey.CRC); + datFile.Deduplicate(DedupeRoms); + } else if (DedupeRoms == DedupeType.Game) - datFile.BucketBy(ItemKey.Machine, DedupeRoms); + { + datFile.BucketBy(ItemKey.Machine); + datFile.Deduplicate(DedupeRoms); + } // Process description to machine name if (DescriptionAsName == true) diff --git a/SabreTools.DatTools/ExtraIni.cs b/SabreTools.DatTools/ExtraIni.cs index 9c7a4c2e..f2480bd4 100644 --- a/SabreTools.DatTools/ExtraIni.cs +++ b/SabreTools.DatTools/ExtraIni.cs @@ -95,7 +95,7 @@ namespace SabreTools.DatTools try { // Bucket by game first - datFile.BucketBy(ItemKey.Machine, DedupeType.None); + datFile.BucketBy(ItemKey.Machine); // Create mappings based on the extra items var combinedMaps = CombineExtras(); @@ -158,7 +158,7 @@ namespace SabreTools.DatTools try { // Bucket by game first - datFile.BucketBy(ItemKey.Machine, DedupeType.None); + datFile.BucketBy(ItemKey.Machine); // Create mappings based on the extra items var combinedMaps = CombineExtras(); diff --git a/SabreTools.DatTools/Rebuilder.cs b/SabreTools.DatTools/Rebuilder.cs index d11cd596..b3a36de0 100644 --- a/SabreTools.DatTools/Rebuilder.cs +++ b/SabreTools.DatTools/Rebuilder.cs @@ -106,7 +106,7 @@ namespace SabreTools.DatTools return success; // Now that we have a list of depots, we want to bucket the input DAT by SHA-1 - datFile.BucketBy(ItemKey.SHA1, DedupeType.None); + datFile.BucketBy(ItemKey.SHA1); // Then we want to loop through each of the hashes and see if we can rebuild foreach (string hash in datFile.Items.SortedKeys) @@ -450,7 +450,7 @@ namespace SabreTools.DatTools if (outputFormat == OutputFormat.Folder && datFile.Header.GetStringFieldValue(Models.Metadata.Header.ForcePackingKey).AsEnumValue() == PackingFlag.Partial) { shouldCheck = true; - datFile.BucketBy(ItemKey.Machine, DedupeType.None, lower: false); + datFile.BucketBy(ItemKey.Machine, lower: false); } // Now loop through the list and rebuild accordingly diff --git a/SabreTools.DatTools/Splitter.cs b/SabreTools.DatTools/Splitter.cs index 1a11280a..1b8ae73e 100644 --- a/SabreTools.DatTools/Splitter.cs +++ b/SabreTools.DatTools/Splitter.cs @@ -493,7 +493,7 @@ namespace SabreTools.DatTools InternalStopwatch watch = new($"Splitting DAT by level"); // First, bucket by games so that we can do the right thing - datFile.BucketBy(ItemKey.Machine, DedupeType.None, lower: false, norename: true); + datFile.BucketBy(ItemKey.Machine, lower: false, norename: true); // Create a temporary DAT to add things to DatFile tempDat = DatFileTool.CreateDatFile(datFile.Header); @@ -777,7 +777,7 @@ namespace SabreTools.DatTools InternalStopwatch watch = new($"Splitting DAT by total size"); // Sort the DatFile by machine name - datFile.BucketBy(ItemKey.Machine, DedupeType.None); + datFile.BucketBy(ItemKey.Machine); // Get the keys in a known order for easier sorting var keys = datFile.Items.SortedKeys; diff --git a/SabreTools.DatTools/Verification.cs b/SabreTools.DatTools/Verification.cs index a983aa85..6c4930ba 100644 --- a/SabreTools.DatTools/Verification.cs +++ b/SabreTools.DatTools/Verification.cs @@ -4,7 +4,6 @@ using System.Linq; using SabreTools.Core.Tools; using SabreTools.DatFiles; using SabreTools.DatItems; -using SabreTools.DatItems.Formats; using SabreTools.FileTypes; using SabreTools.FileTypes.Archives; using SabreTools.Hashing; @@ -55,7 +54,7 @@ namespace SabreTools.DatTools return success; // Now that we have a list of depots, we want to bucket the input DAT by SHA-1 - datFile.BucketBy(ItemKey.SHA1, DedupeType.None); + datFile.BucketBy(ItemKey.SHA1); // Then we want to loop through each of the hashes and see if we can rebuild foreach (string hash in datFile.Items.SortedKeys) @@ -139,7 +138,7 @@ namespace SabreTools.DatTools return success; // Now that we have a list of depots, we want to bucket the input DAT by SHA-1 - datFile.BucketBy(ItemKey.SHA1, DedupeType.None); + datFile.BucketBy(ItemKey.SHA1); // Then we want to loop through each of the hashes and see if we can rebuild List keys = [.. datFile.ItemsDB.SortedKeys]; @@ -210,9 +209,15 @@ namespace SabreTools.DatTools // Force bucketing according to the flags datFile.Items.SetBucketedBy(ItemKey.NULL); if (hashOnly) - datFile.BucketBy(ItemKey.CRC, DedupeType.Full); + { + datFile.BucketBy(ItemKey.CRC); + datFile.Deduplicate(DedupeType.Full); + } else - datFile.BucketBy(ItemKey.Machine, DedupeType.Full); + { + datFile.BucketBy(ItemKey.Machine); + datFile.Deduplicate(DedupeType.Full); + } // Then mark items for removal foreach (string key in datFile.Items.SortedKeys) @@ -254,9 +259,15 @@ namespace SabreTools.DatTools // Force bucketing according to the flags if (hashOnly) - datFile.BucketBy(ItemKey.CRC, DedupeType.Full); + { + datFile.BucketBy(ItemKey.CRC); + datFile.Deduplicate(DedupeType.Full); + } else - datFile.BucketBy(ItemKey.Machine, DedupeType.Full); + { + datFile.BucketBy(ItemKey.Machine); + datFile.Deduplicate(DedupeType.Full); + } // Then mark items for removal List keys = [.. datFile.ItemsDB.SortedKeys]; diff --git a/SabreTools.DatTools/Writer.cs b/SabreTools.DatTools/Writer.cs index 799833ff..0a7b9139 100644 --- a/SabreTools.DatTools/Writer.cs +++ b/SabreTools.DatTools/Writer.cs @@ -77,7 +77,7 @@ namespace SabreTools.DatTools EnsureHeaderFields(datFile); // Bucket roms by game name, if not already - datFile.BucketBy(ItemKey.Machine, DedupeType.None); + datFile.BucketBy(ItemKey.Machine); // Output the number of items we're going to be writing _staticLogger.User($"A total of {datFile.DatStatistics.TotalCount - datFile.DatStatistics.RemovedCount} items will be written out to '{datFile.Header.GetStringFieldValue(DatHeader.FileNameKey)}'"); @@ -138,7 +138,7 @@ namespace SabreTools.DatTools if (diskCount + mediaCount + romCount == 0) datFile.RecalculateStats(); - datFile.BucketBy(ItemKey.Machine, DedupeType.None, norename: true); + datFile.BucketBy(ItemKey.Machine, norename: true); datFile.DatStatistics.DisplayName = datFile.Header.GetStringFieldValue(DatHeader.FileNameKey); datFile.DatStatistics.MachineCount = datFile.Items.SortedKeys.Length; diff --git a/SabreTools.Test/DatFiles/ItemDictionaryTests.cs b/SabreTools.Test/DatFiles/ItemDictionaryTests.cs index 4c61efaf..9c60210e 100644 --- a/SabreTools.Test/DatFiles/ItemDictionaryTests.cs +++ b/SabreTools.Test/DatFiles/ItemDictionaryTests.cs @@ -57,7 +57,7 @@ namespace SabreTools.Test.DatFiles dict.AddItem(rom3, statsOnly: false); dict.AddItem(rom4, statsOnly: false); - dict.BucketBy(itemKey, DedupeType.None); + dict.BucketBy(itemKey); Assert.Equal(expected, dict.SortedKeys.Length); }