diff --git a/SabreTools.DatFiles/ItemDictionaryDB.cs b/SabreTools.DatFiles/ItemDictionaryDB.cs index 5ea8a075..38cb87c5 100644 --- a/SabreTools.DatFiles/ItemDictionaryDB.cs +++ b/SabreTools.DatFiles/ItemDictionaryDB.cs @@ -250,47 +250,156 @@ namespace SabreTools.DatFiles /// Update the bucketing dictionary /// /// ItemKey enum representing how to bucket the individual items + /// Dedupe type that should be used /// True if the key should be lowercased (default), false otherwise /// True if games should only be compared on game and file name, false if system and source are counted /// - public void UpdateBucketBy(ItemKey bucketBy, bool lower = true, bool norename = true) + public void BucketBy(ItemKey bucketBy, DedupeType dedupeType, bool lower = true, bool norename = true) { - // If the bucketing value is the same or null - if (bucketBy == _bucketedBy || bucketBy == ItemKey.NULL) - return; + // If the sorted type isn't the same, we want to sort the dictionary accordingly + if (_bucketedBy != bucketBy && bucketBy != ItemKey.NULL) + PerformBucketing(bucketBy, lower, norename); - // Reset the bucketing values - _bucketedBy = bucketBy; - _buckets.Clear(); - - // Get the current list of item indicies - long[] itemIndicies = [.. _items.Keys]; - -#if NET452_OR_GREATER || NETCOREAPP - Parallel.For(0, itemIndicies.Length, Globals.ParallelOptions, i => -#elif NET40_OR_GREATER - Parallel.For(0, itemIndicies.Length, i => -#else - for (int i = 0; i < itemIndicies.Length; i++) -#endif + // If the merge type isn't the same, we want to merge the dictionary accordingly + if (dedupeType != DedupeType.None) { - string? bucketKey = GetBucketKey(i, bucketBy); - EnsureBucketingKey(bucketKey); - _buckets[bucketKey].Add(i); -#if NET40_OR_GREATER || NETCOREAPP - }); -#else + PerformDeduplication(bucketBy, dedupeType); } -#endif + // If the merge type is the same, we want to sort the dictionary to be consistent + else + { + PerformSorting(norename); + } + } - // Sort the buckets that have been created for consistency - PerformSorting(norename); + /// + /// Merge an arbitrary set of item pairs based on the supplied information + /// + /// List of pairs representing the items to be merged + private List<(long, DatItem)> Deduplicate(List<(long, DatItem)> itemMappings) + { + // Check for null or blank roms first + if (itemMappings == null || !itemMappings.Any()) + return []; + + // Create output list + List<(long, DatItem)> output = []; + + // Then deduplicate them by checking to see if data matches previous saved roms + int nodumpCount = 0; + for (int f = 0; f < itemMappings.Count; f++) + { + long itemIndex = itemMappings[f].Item1; + DatItem datItem = itemMappings[f].Item2; + + // If we somehow have a null item, skip + if (datItem == null) + continue; + + // If we don't have a Disk, File, Media, or Rom, we skip checking for duplicates + if (datItem is not Disk && datItem is not DatItems.Formats.File && datItem is not Media && datItem is not Rom) + continue; + + // If it's a nodump, add and skip + if (datItem is Rom rom && rom.GetStringFieldValue(Models.Metadata.Rom.StatusKey).AsEnumValue() == ItemStatus.Nodump) + { + output.Add((itemIndex, datItem)); + nodumpCount++; + continue; + } + else if (datItem is Disk disk && disk.GetStringFieldValue(Models.Metadata.Disk.StatusKey).AsEnumValue() == ItemStatus.Nodump) + { + output.Add((itemIndex, datItem)); + nodumpCount++; + continue; + } + // If it's the first non-nodump rom in the list, don't touch it + else if (output.Count == 0 || output.Count == nodumpCount) + { + output.Add((itemIndex, datItem)); + continue; + } + + // Check if the rom is a duplicate + DupeType dupetype = 0x00; + long savedIndex = -1; + DatItem saveditem = new Blank(); + int pos = -1; + for (int i = 0; i < output.Count; i++) + { + long lastIndex = output[i].Item1; + DatItem lastrom = output[i].Item2; + + // Get the duplicate status + dupetype = datItem.GetDuplicateStatus(lastrom); + + // If it's a duplicate, skip adding it to the output but add any missing information + if (dupetype != 0x00) + { + savedIndex = lastIndex; + saveditem = lastrom; + pos = i; + + // Disks, Media, and Roms have more information to fill + if (datItem is Disk disk && saveditem is Disk savedDisk) + savedDisk.FillMissingInformation(disk); + else if (datItem is DatItems.Formats.File fileItem && saveditem is DatItems.Formats.File savedFile) + savedFile.FillMissingInformation(fileItem); + else if (datItem is Media media && saveditem is Media savedMedia) + savedMedia.FillMissingInformation(media); + else if (datItem is Rom romItem && saveditem is Rom savedRom) + savedRom.FillMissingInformation(romItem); + + saveditem.SetFieldValue(DatItem.DupeTypeKey, dupetype); + + // Get the machines associated with the items + var savedMachine = _machines[_itemToMachineMapping[savedIndex]]; + var itemMachine = _machines[_itemToMachineMapping[itemIndex]]; + + // If the current system has a lower ID than the previous, set the system accordingly + if (datItem.GetFieldValue(DatItem.SourceKey)?.Index < saveditem.GetFieldValue(DatItem.SourceKey)?.Index) + { + datItem.SetFieldValue(DatItem.SourceKey, datItem.GetFieldValue(DatItem.SourceKey)!.Clone() as Source); + _machines[_itemToMachineMapping[savedIndex]] = (itemMachine.Clone() as Machine)!; + saveditem.SetName(datItem.GetName()); + } + + // If the current machine is a child of the new machine, use the new machine instead + if (savedMachine.GetStringFieldValue(Models.Metadata.Machine.CloneOfKey) == itemMachine.GetStringFieldValue(Models.Metadata.Machine.NameKey) + || savedMachine.GetStringFieldValue(Models.Metadata.Machine.RomOfKey) == itemMachine.GetStringFieldValue(Models.Metadata.Machine.NameKey)) + { + _machines[_itemToMachineMapping[savedIndex]] = (itemMachine.Clone() as Machine)!; + saveditem.SetName(datItem.GetName()); + } + + break; + } + } + + // If no duplicate is found, add it to the list + if (dupetype == 0x00) + { + output.Add((itemIndex, datItem)); + } + // Otherwise, if a new rom information is found, add that + else + { + output.RemoveAt(pos); + output.Insert(pos, (savedIndex, saveditem)); + } + } + + return output; } /// /// Get the bucketing key for a given item index + /// Index of the current item + /// ItemKey value representing what key to get + /// True if the key should be lowercased, false otherwise + /// True if games should only be compared on game and file name, false if system and source are counted /// - private string GetBucketKey(long itemIndex, ItemKey bucketBy) + private string GetBucketKey(long itemIndex, ItemKey bucketBy, bool lower, bool norename) { if (!_items.ContainsKey(itemIndex)) return string.Empty; @@ -310,11 +419,19 @@ namespace SabreTools.DatFiles if (machine == null) return string.Empty; - return bucketBy switch + string sourceKeyPadded = datItem.GetFieldValue(DatItem.SourceKey)?.Index.ToString().PadLeft(10, '0') + '-'; + string machineName = machine.GetStringFieldValue(Models.Metadata.Machine.NameKey) ?? "Default"; + + string bucketKey = bucketBy switch { - ItemKey.Machine => machine.GetStringFieldValue(Models.Metadata.Machine.NameKey) ?? string.Empty, + ItemKey.Machine => (norename ? string.Empty : sourceKeyPadded) + machineName, _ => GetBucketHashValue(datItem, bucketBy), }; + + if (lower) + bucketKey = bucketKey.ToLowerInvariant(); + + return bucketKey; } /// @@ -385,6 +502,84 @@ namespace SabreTools.DatFiles #endif } + /// + /// Perform bucketing based on the item key provided + /// + /// ItemKey enum representing how to bucket the individual items + /// True if the key should be lowercased, false otherwise + /// True if games should only be compared on game and file name, false if system and source are counted + private void PerformBucketing(ItemKey bucketBy, bool lower, bool norename) + { + // Reset the bucketing values + _bucketedBy = bucketBy; + _buckets.Clear(); + + // Get the current list of item indicies + long[] itemIndicies = [.. _items.Keys]; + +#if NET452_OR_GREATER || NETCOREAPP + Parallel.For(0, itemIndicies.Length, Globals.ParallelOptions, i => +#elif NET40_OR_GREATER + Parallel.For(0, itemIndicies.Length, i => +#else + for (int i = 0; i < itemIndicies.Length; i++) +#endif + { + string? bucketKey = GetBucketKey(i, bucketBy, lower, norename); + EnsureBucketingKey(bucketKey); + _buckets[bucketKey].Add(i); +#if NET40_OR_GREATER || NETCOREAPP + }); +#else + } +#endif + } + + /// + /// Perform deduplication based on the deduplication type provided + /// + /// ItemKey enum representing how to bucket the individual items + /// Dedupe type that should be used + private void PerformDeduplication(ItemKey bucketBy, DedupeType dedupeType) + { + // Get the current list of bucket keys + string[] bucketKeys = [.. _buckets.Keys]; + +#if NET452_OR_GREATER || NETCOREAPP + Parallel.For(0, bucketKeys.Length, Globals.ParallelOptions, i => +#elif NET40_OR_GREATER + Parallel.For(0, bucketKeys.Length, i => +#else + for (int i = 0; i < bucketKeys.Length; i++) +#endif + { + var itemIndices = _buckets[bucketKeys[i]]; + if (itemIndices == null || !itemIndices.Any()) +#if NET40_OR_GREATER || NETCOREAPP + return; +#else + continue; +#endif + + var datItems = itemIndices + .Where(i => _items.ContainsKey(i)) + .Select(i => (i, _items[i])) + .ToList(); + + Sort(ref datItems, false); + + // If we're merging the roms, do so + if (dedupeType == DedupeType.Full || (dedupeType == DedupeType.Game && bucketBy == ItemKey.Machine)) + datItems = Deduplicate(datItems); + + _buckets[bucketKeys[i]] = datItems.Select(m => m.Item1).ToConcurrentList(); +#if NET40_OR_GREATER || NETCOREAPP + }); +#else + } +#endif + } + /// /// Sort existing buckets for consistency /// @@ -429,9 +624,9 @@ namespace SabreTools.DatFiles } /// - /// Sort a list of File objects by SourceID, Game, and Name (in order) + /// Sort a list of item pairs by SourceID, Game, and Name (in order) /// - /// List of File objects representing the roms to be sorted + /// List of pairs representing the items to be sorted /// True if files are not renamed, false otherwise /// True if it sorted correctly, false otherwise private bool Sort(ref List<(long, DatItem)> itemMappings, bool norename) @@ -481,8 +676,6 @@ namespace SabreTools.DatFiles return true; } - // TODO: Write a method that deduplicates items based on any of the fields selected - #endregion #region Statistics