From 0ab13a8c382cecf69d6326a65c42bcfe9a185f53 Mon Sep 17 00:00:00 2001 From: Matt Nadareski Date: Wed, 13 Mar 2024 02:44:04 -0400 Subject: [PATCH] Add bucketing to new database --- SabreTools.DatFiles/ItemDictionaryDB.cs | 165 +++++++++++++++++++++++- 1 file changed, 163 insertions(+), 2 deletions(-) diff --git a/SabreTools.DatFiles/ItemDictionaryDB.cs b/SabreTools.DatFiles/ItemDictionaryDB.cs index 189c42a8..906f54b2 100644 --- a/SabreTools.DatFiles/ItemDictionaryDB.cs +++ b/SabreTools.DatFiles/ItemDictionaryDB.cs @@ -3,9 +3,15 @@ using System.Collections.Concurrent; #endif using System.Collections.Generic; using System.Linq; +#if NET40_OR_GREATER || NETCOREAPP +using System.Threading.Tasks; +#endif using System.Xml.Serialization; using Newtonsoft.Json; +using SabreTools.Core; using SabreTools.DatItems; +using SabreTools.DatItems.Formats; +using SabreTools.Hashing; namespace SabreTools.DatFiles { @@ -30,6 +36,7 @@ namespace SabreTools.DatFiles /// /// Current highest available item index /// + [JsonIgnore, XmlIgnore] private long _itemIndex = 0; /// @@ -45,6 +52,7 @@ namespace SabreTools.DatFiles /// /// Current highest available machine index /// + [JsonIgnore, XmlIgnore] private long _machineIndex = 0; /// @@ -57,7 +65,20 @@ namespace SabreTools.DatFiles private readonly Dictionary _itemToMachineMapping = []; #endif - // TODO: Add another dictionary of string => ConcurrentList representing a bucketed key to a set of item IDs + /// + /// Internal dictionary representing the current buckets + /// + [JsonIgnore, XmlIgnore] +#if NET40_OR_GREATER || NETCOREAPP + private readonly ConcurrentDictionary> _buckets = new ConcurrentDictionary>(); +#else + private readonly Dictionary> _buckets = []; +#endif + + /// + /// Current bucketed by value + /// + private ItemKey _bucketedBy = ItemKey.NULL; #endregion @@ -202,6 +223,146 @@ namespace SabreTools.DatFiles return true; } -#endregion + #endregion + + #region Bucketing + + /// + /// Update the bucketing dictionary + /// + /// ItemKey enum representing how to bucket the individual items + /// True if the key should be lowercased (default), false otherwise + /// True if games should only be compared on game and file name, false if system and source are counted + /// + public void UpdateBucketBy(ItemKey bucketBy, bool lower = true, bool norename = true) + { + // If the bucketing value is the same + if (bucketBy == _bucketedBy) + return; + + // Reset the bucketing values + _bucketedBy = bucketBy; + _buckets.Clear(); + + // Get the current list of item indicies + long[] itemIndicies = _items.Keys.ToArray(); + +#if NET452_OR_GREATER || NETCOREAPP + Parallel.For(0, itemIndicies.Length, Globals.ParallelOptions, i => +#elif NET40_OR_GREATER + Parallel.For(0, itemIndicies.Length, i => +#else + for (int i = 0; i < itemIndicies.Length; i++) +#endif + { + string? bucketKey = GetBucketKey(i, bucketBy); + EnsureBucketingKey(bucketKey); + _buckets[bucketKey].Add(i); +#if NET40_OR_GREATER || NETCOREAPP + }); +#else + } +#endif + } + + /// + /// Get the bucketing key for a given item index + /// + private string GetBucketKey(long itemIndex, ItemKey bucketBy) + { + if (!_items.ContainsKey(itemIndex)) + return string.Empty; + + var datItem = _items[itemIndex]; + if (datItem == null) + return string.Empty; + + if (!_itemToMachineMapping.ContainsKey(itemIndex)) + return string.Empty; + + long machineIndex = _itemToMachineMapping[itemIndex]; + if (!_machines.ContainsKey(machineIndex)) + return string.Empty; + + var machine = _machines[machineIndex]; + if (machine == null) + return string.Empty; + + return bucketBy switch + { + ItemKey.Machine => machine.GetStringFieldValue(Models.Metadata.Machine.NameKey) ?? string.Empty, + _ => GetBucketHashValue(datItem, bucketBy), + }; + } + + /// + /// Get the hash value for a given item, if possible + /// + private static string GetBucketHashValue(DatItem datItem, ItemKey bucketBy) + { + return datItem switch + { + Disk disk => bucketBy switch + { + ItemKey.CRC => Constants.CRCZero, + ItemKey.MD5 => disk.GetStringFieldValue(Models.Metadata.Disk.MD5Key) ?? string.Empty, + ItemKey.SHA1 => disk.GetStringFieldValue(Models.Metadata.Disk.SHA1Key) ?? string.Empty, + ItemKey.SHA256 => Constants.SHA256Zero, + ItemKey.SHA384 => Constants.SHA384Zero, + ItemKey.SHA512 => Constants.SHA512Zero, + ItemKey.SpamSum => Constants.SpamSumZero, + _ => string.Empty, + }, + Media media => bucketBy switch + { + ItemKey.CRC => Constants.CRCZero, + ItemKey.MD5 => media.GetStringFieldValue(Models.Metadata.Media.MD5Key) ?? string.Empty, + ItemKey.SHA1 => media.GetStringFieldValue(Models.Metadata.Media.SHA1Key) ?? string.Empty, + ItemKey.SHA256 => media.GetStringFieldValue(Models.Metadata.Media.SHA256Key) ?? string.Empty, + ItemKey.SHA384 => Constants.SHA384Zero, + ItemKey.SHA512 => Constants.SHA512Zero, + ItemKey.SpamSum => media.GetStringFieldValue(Models.Metadata.Media.SpamSumKey) ?? string.Empty, + _ => string.Empty, + }, + Rom rom => bucketBy switch + { + ItemKey.CRC => rom.GetStringFieldValue(Models.Metadata.Rom.CRCKey) ?? string.Empty, + ItemKey.MD5 => rom.GetStringFieldValue(Models.Metadata.Rom.MD5Key) ?? string.Empty, + ItemKey.SHA1 => rom.GetStringFieldValue(Models.Metadata.Rom.SHA1Key) ?? string.Empty, + ItemKey.SHA256 => rom.GetStringFieldValue(Models.Metadata.Rom.SHA256Key) ?? string.Empty, + ItemKey.SHA384 => rom.GetStringFieldValue(Models.Metadata.Rom.SHA384Key) ?? string.Empty, + ItemKey.SHA512 => rom.GetStringFieldValue(Models.Metadata.Rom.SHA512Key) ?? string.Empty, + ItemKey.SpamSum => rom.GetStringFieldValue(Models.Metadata.Rom.SpamSumKey) ?? string.Empty, + _ => string.Empty, + }, + _ => bucketBy switch + { + ItemKey.CRC => Constants.CRCZero, + ItemKey.MD5 => Constants.MD5Zero, + ItemKey.SHA1 => Constants.SHA1Zero, + ItemKey.SHA256 => Constants.SHA256Zero, + ItemKey.SHA384 => Constants.SHA384Zero, + ItemKey.SHA512 => Constants.SHA512Zero, + ItemKey.SpamSum => Constants.SpamSumZero, + _ => string.Empty, + }, + }; + } + + /// + /// Ensure the key exists in the items dictionary + /// + private void EnsureBucketingKey(string key) + { + // If the key is missing from the dictionary, add it + if (!_buckets.ContainsKey(key)) +#if NET40_OR_GREATER || NETCOREAPP + _buckets.TryAdd(key, []); +#else + _buckets[key] = []; +#endif + } + + #endregion } }