using System; #if NET40_OR_GREATER || NETCOREAPP using System.Collections.Concurrent; #endif using System.Collections.Generic; using System.IO; using System.Linq; #if NET40_OR_GREATER || NETCOREAPP using System.Threading; using System.Threading.Tasks; #endif using System.Xml.Serialization; using Newtonsoft.Json; using SabreTools.Core.Tools; using SabreTools.DatItems; using SabreTools.DatItems.Formats; using SabreTools.Hashing; using SabreTools.Matching.Compare; /* * Planning Notes: * * In order for this in-memory "database" design to work, there need to be a few things: * - Feature parity with all existing item dictionary operations * - A way to transition between the two item dictionaries (a flag?) * - Helper methods that target the "database" version instead of assuming the standard dictionary * * Notable changes include: * - Separation of Machine from DatItem, leading to a mapping instead * + Should DatItem include an index reference to the machine? Or should that be all external? * - Adding machines to the dictionary distinctly from the items * - Having a separate "bucketing" system that only reorders indicies and not full items; quicker? * - Non-key-based add/remove of values; use explicit methods instead of dictionary-style accessors */ namespace SabreTools.DatFiles { /// /// Item dictionary with statistics, bucketing, and sorting /// [JsonObject("items"), XmlRoot("items")] public class ItemDictionaryDB { #region Private instance variables /// /// Internal dictionary for all items /// [JsonIgnore, XmlIgnore] #if NET40_OR_GREATER || NETCOREAPP private readonly ConcurrentDictionary _items = []; #else private readonly Dictionary _items = []; #endif /// /// Current highest available item index /// [JsonIgnore, XmlIgnore] private long _itemIndex = 0; /// /// Internal dictionary for all machines /// [JsonIgnore, XmlIgnore] #if NET40_OR_GREATER || NETCOREAPP private readonly ConcurrentDictionary _machines = []; #else private readonly Dictionary _machines = []; #endif /// /// Current highest available machine index /// [JsonIgnore, XmlIgnore] private long _machineIndex = 0; /// /// Internal dictionary for all sources /// [JsonIgnore, XmlIgnore] #if NET40_OR_GREATER || NETCOREAPP private readonly ConcurrentDictionary _sources = []; #else private readonly Dictionary _sources = []; #endif /// /// Current highest available source index /// [JsonIgnore, XmlIgnore] private long _sourceIndex = 0; /// /// Internal dictionary for item to machine mappings /// /// TODO: Make private when access issues are figured out [JsonIgnore, XmlIgnore] #if NET40_OR_GREATER || NETCOREAPP internal readonly ConcurrentDictionary _itemToMachineMapping = []; #else internal readonly Dictionary _itemToMachineMapping = []; #endif /// /// Internal dictionary for item to source mappings /// [JsonIgnore, XmlIgnore] #if NET40_OR_GREATER || NETCOREAPP private readonly ConcurrentDictionary _itemToSourceMapping = []; #else private readonly Dictionary _itemToSourceMapping = []; #endif /// /// Internal dictionary representing the current buckets /// /// TODO: Make private when access issues are figured out [JsonIgnore, XmlIgnore] #if NET40_OR_GREATER || NETCOREAPP internal readonly ConcurrentDictionary> _buckets = []; #else internal readonly Dictionary> _buckets = []; #endif /// /// Current bucketed by value /// private ItemKey _bucketedBy = ItemKey.NULL; #endregion #region Fields /// /// Get the keys in sorted order from the file dictionary /// /// List of the keys in sorted order [JsonIgnore, XmlIgnore] public string[] SortedKeys { get { List keys = [.. _buckets.Keys]; keys.Sort(new NaturalComparer()); return [.. keys]; } } /// /// DAT statistics /// [JsonIgnore, XmlIgnore] public DatStatistics DatStatistics { get; } = new DatStatistics(); #endregion /// /// Generic constructor /// public ItemDictionaryDB() { } #region Accessors /// /// Add a DatItem to the dictionary after validation /// /// Item data to validate /// Index of the machine related to the item /// Index of the source related to the item /// True to only add item statistics while parsing, false otherwise /// The index for the added item, -1 on error public long AddItem(DatItem item, long machineIndex, long sourceIndex, bool statsOnly) { // If we have a Disk, Media, or Rom, clean the hash data if (item is Disk disk) { // If the file has aboslutely no hashes, skip and log if (disk.GetStringFieldValue(Models.Metadata.Disk.StatusKey).AsEnumValue() != ItemStatus.Nodump && string.IsNullOrEmpty(disk.GetStringFieldValue(Models.Metadata.Disk.MD5Key)) && string.IsNullOrEmpty(disk.GetStringFieldValue(Models.Metadata.Disk.SHA1Key))) { disk.SetFieldValue(Models.Metadata.Disk.StatusKey, ItemStatus.Nodump.AsStringValue()); } item = disk; } else if (item is Media media) { // If the file has aboslutely no hashes, skip and log if (string.IsNullOrEmpty(media.GetStringFieldValue(Models.Metadata.Media.MD5Key)) && string.IsNullOrEmpty(media.GetStringFieldValue(Models.Metadata.Media.SHA1Key)) && string.IsNullOrEmpty(media.GetStringFieldValue(Models.Metadata.Media.SHA256Key)) && string.IsNullOrEmpty(media.GetStringFieldValue(Models.Metadata.Media.SpamSumKey))) { // No-op as there is no status key for Media } item = media; } else if (item is Rom rom) { long? size = rom.GetInt64FieldValue(Models.Metadata.Rom.SizeKey); // If we have the case where there is SHA-1 and nothing else, we don't fill in any other part of the data if (size == null && !rom.HasHashes()) { // No-op, just catch it so it doesn't go further } // If we have a rom and it's missing size AND the hashes match a 0-byte file, fill in the rest of the info else if ((size == 0 || size == null) && (string.IsNullOrEmpty(rom.GetStringFieldValue(Models.Metadata.Rom.CRCKey)) || rom.HasZeroHash())) { rom.SetFieldValue(Models.Metadata.Rom.SizeKey, Constants.SizeZero.ToString()); rom.SetFieldValue(Models.Metadata.Rom.CRCKey, ZeroHash.CRC32Str); rom.SetFieldValue(Models.Metadata.Rom.MD2Key, null); // ZeroHash.GetString(HashType.MD2) rom.SetFieldValue(Models.Metadata.Rom.MD4Key, null); // ZeroHash.GetString(HashType.MD4) rom.SetFieldValue(Models.Metadata.Rom.MD5Key, ZeroHash.MD5Str); rom.SetFieldValue(Models.Metadata.Rom.SHA1Key, ZeroHash.SHA1Str); rom.SetFieldValue(Models.Metadata.Rom.SHA256Key, null); // ZeroHash.SHA256Str; rom.SetFieldValue(Models.Metadata.Rom.SHA384Key, null); // ZeroHash.SHA384Str; rom.SetFieldValue(Models.Metadata.Rom.SHA512Key, null); // ZeroHash.SHA512Str; rom.SetFieldValue(Models.Metadata.Rom.SpamSumKey, null); // ZeroHash.SpamSumStr; } // If the file has no size and it's not the above case, skip and log else if (rom.GetStringFieldValue(Models.Metadata.Rom.StatusKey).AsEnumValue() != ItemStatus.Nodump && (size == 0 || size == null)) { rom.SetFieldValue(Models.Metadata.Rom.StatusKey, ItemStatus.Nodump.AsStringValue()); } // If the file has a size but aboslutely no hashes, skip and log else if (rom.GetStringFieldValue(Models.Metadata.Rom.StatusKey).AsEnumValue() != ItemStatus.Nodump && size != null && size > 0 && !rom.HasHashes()) { rom.SetFieldValue(Models.Metadata.Rom.StatusKey, ItemStatus.Nodump.AsStringValue()); } item = rom; } // If only adding statistics, we add just item stats if (statsOnly) { DatStatistics.AddItemStatistics(item); return -1; } else { return AddItem(item, machineIndex, sourceIndex); } } /// /// Add a machine, returning the insert index /// public long AddMachine(Machine machine) { #if NET40_OR_GREATER || NETCOREAPP long index = Interlocked.Increment(ref _machineIndex) - 1; _machines.TryAdd(index, machine); return index; #else long index = _machineIndex++ - 1; _machines[index] = machine; return index; #endif } /// /// Add a source, returning the insert index /// public long AddSource(Source source) { #if NET40_OR_GREATER || NETCOREAPP long index = Interlocked.Increment(ref _sourceIndex) - 1; _sources.TryAdd(index, source); return index; #else long index = _sourceIndex++ - 1; _sources[index] = source; return index; #endif } /// /// Remove any keys that have null or empty values /// internal void ClearEmpty() { var keys = Array.FindAll(SortedKeys, k => k != null); foreach (string key in keys) { // Get items for the bucket var items = GetItemsForBucket(key); if (items == null || items.Count == 0) continue; // Convert to list of indices for ease of access List itemsList = [.. items.Keys]; // If there are no non-blank items, remove if (!itemsList.Exists(i => GetItem(i) != null && GetItem(i) is not Blank)) #if NET40_OR_GREATER || NETCOREAPP _buckets.TryRemove(key, out _); #else _buckets.Remove(key); #endif } } /// /// Remove all items marked for removal /// internal void ClearMarked() { var itemIndices = _items.Keys; foreach (long itemIndex in itemIndices) { #if NET40_OR_GREATER || NETCOREAPP if (!_items.TryGetValue(itemIndex, out var datItem)) continue; #else var datItem = _items[itemIndex]; #endif if (datItem == null || datItem.GetBoolFieldValue(DatItem.RemoveKey) != true) continue; RemoveItem(itemIndex); } } /// /// Get an item based on the index /// public DatItem? GetItem(long index) { #if NET40_OR_GREATER || NETCOREAPP if (!_items.TryGetValue(index, out var datItem)) return null; return datItem; #else if (!_items.ContainsKey(index)) return null; return _items[index]; #endif } /// /// Get all item to machine mappings /// public IDictionary GetItemMachineMappings() => _itemToMachineMapping; /// /// Get all item to source mappings /// public IDictionary GetItemSourceMappings() => _itemToSourceMapping; /// /// Get all items and their indicies /// public IDictionary GetItems() => _items; /// /// Get the indices and items associated with a bucket name /// public Dictionary GetItemsForBucket(string? bucketName, bool filter = false) { if (bucketName == null) return []; #if NET40_OR_GREATER || NETCOREAPP if (!_buckets.TryGetValue(bucketName, out var itemIds)) return []; #else if (!_buckets.ContainsKey(bucketName)) return []; var itemIds = _buckets[bucketName]; #endif var datItems = new Dictionary(); foreach (long itemId in itemIds) { // Ignore missing IDs #if NET40_OR_GREATER || NETCOREAPP if (!_items.TryGetValue(itemId, out var datItem) || datItem == null) continue; #else if (!_items.ContainsKey(itemId)) continue; var datItem = _items[itemId]; if (datItem == null) continue; #endif if (!filter || datItem.GetBoolFieldValue(DatItem.RemoveKey) != true) datItems[itemId] = datItem; } return datItems; } /// /// Get the indices and items associated with a machine index /// public IDictionary? GetItemsForMachine(long machineIndex, bool filter = false) { var itemIds = _itemToMachineMapping .Where(mapping => mapping.Value == machineIndex) .Select(mapping => mapping.Key); var datItems = new Dictionary(); foreach (long itemId in itemIds) { // Ignore missing IDs #if NET40_OR_GREATER || NETCOREAPP if (!_items.TryGetValue(itemId, out var datItem) || datItem == null) continue; #else if (!_items.ContainsKey(itemId)) continue; var datItem = _items[itemId]; if (datItem == null) continue; #endif if (!filter || datItem.GetBoolFieldValue(DatItem.RemoveKey) != true) datItems[itemId] = datItem; } return datItems; } /// /// Get the indices and items associated with a source index /// public IDictionary? GetItemsForSource(long sourceIndex, bool filter = false) { var itemIds = _itemToSourceMapping .Where(mapping => mapping.Value == sourceIndex) .Select(mapping => mapping.Key); var datItems = new Dictionary(); foreach (long itemId in itemIds) { // Ignore missing IDs #if NET40_OR_GREATER || NETCOREAPP if (!_items.TryGetValue(itemId, out var datItem) || datItem == null) continue; #else if (!_items.ContainsKey(itemId)) continue; var datItem = _items[itemId]; if (datItem == null) continue; #endif if (!filter || datItem.GetBoolFieldValue(DatItem.RemoveKey) != true) datItems[itemId] = datItem; } return datItems; } /// /// Get a machine based on the index /// public Machine? GetMachine(long index) { #if NET40_OR_GREATER || NETCOREAPP if (!_machines.TryGetValue(index, out var machine)) return null; return machine; #else if (!_machines.ContainsKey(index)) return null; return _machines[index]; #endif } /// /// Get a machine based on the name /// /// This assume that all machines have unique names public KeyValuePair GetMachine(string? name) { if (string.IsNullOrEmpty(name)) return new KeyValuePair(-1, null); var machine = _machines.FirstOrDefault(m => m.Value.GetStringFieldValue(Models.Metadata.Machine.NameKey) == name); return new KeyValuePair(machine.Key, machine.Value); } /// /// Get the index and machine associated with an item index /// public KeyValuePair GetMachineForItem(long itemIndex) { #if NET40_OR_GREATER || NETCOREAPP if (!_itemToMachineMapping.TryGetValue(itemIndex, out long machineIndex)) return new KeyValuePair(-1, null); if (!_machines.TryGetValue(machineIndex, out var machine)) return new KeyValuePair(-1, null); return new KeyValuePair(machineIndex, machine); #else if (!_itemToMachineMapping.ContainsKey(itemIndex)) return new KeyValuePair(-1, null); long machineIndex = _itemToMachineMapping[itemIndex]; if (!_machines.ContainsKey(machineIndex)) return new KeyValuePair(-1, null); var machine = _machines[machineIndex]; return new KeyValuePair(machineIndex, machine); #endif } /// /// Get all machines and their indicies /// public IDictionary GetMachines() => _machines; /// /// Get a source based on the index /// public Source? GetSource(long index) { if (!_sources.ContainsKey(index)) return null; return _sources[index]; } /// /// Get the index and source associated with an item index /// public KeyValuePair GetSourceForItem(long itemIndex) { if (!_itemToSourceMapping.ContainsKey(itemIndex)) return new KeyValuePair(-1, null); long sourceIndex = _itemToSourceMapping[itemIndex]; if (!_sources.ContainsKey(sourceIndex)) return new KeyValuePair(-1, null); return new KeyValuePair(sourceIndex, _sources[sourceIndex]); } /// /// Get all sources and their indicies /// public IDictionary GetSources() => _sources; /// /// Remove an item, returning if it could be removed /// public bool RemoveItem(long itemIndex) { if (!_items.ContainsKey(itemIndex)) return false; #if NET40_OR_GREATER || NETCOREAPP _items.TryRemove(itemIndex, out _); #else _items.Remove(itemIndex); #endif if (_itemToMachineMapping.ContainsKey(itemIndex)) #if NET40_OR_GREATER || NETCOREAPP _itemToMachineMapping.TryRemove(itemIndex, out _); #else _itemToMachineMapping.Remove(itemIndex); #endif return true; } /// /// Remove a machine, returning if it could be removed /// public bool RemoveMachine(long machineIndex) { if (!_machines.ContainsKey(machineIndex)) return false; #if NET40_OR_GREATER || NETCOREAPP _machines.TryRemove(machineIndex, out _); #else _machines.Remove(machineIndex); #endif var itemIds = _itemToMachineMapping .Where(mapping => mapping.Value == machineIndex) .Select(mapping => mapping.Key); foreach (long itemId in itemIds) { #if NET40_OR_GREATER || NETCOREAPP _itemToMachineMapping.TryRemove(itemId, out _); #else _itemToMachineMapping.Remove(itemId); #endif } return true; } /// /// Remove a machine, returning if it could be removed /// public bool RemoveMachine(string machineName) { if (string.IsNullOrEmpty(machineName)) return false; var machine = _machines.FirstOrDefault(m => m.Value.GetStringFieldValue(Models.Metadata.Machine.NameKey) == machineName); return RemoveMachine(machine.Key); } /// /// Add an item, returning the insert index /// internal long AddItem(DatItem item, long machineIndex, long sourceIndex) { #if NET40_OR_GREATER || NETCOREAPP // Add the item with a new index long index = Interlocked.Increment(ref _itemIndex) - 1; _items.TryAdd(index, item); // Add the machine mapping _itemToMachineMapping.TryAdd(index, machineIndex); // Add the source mapping _itemToSourceMapping.TryAdd(index, sourceIndex); #else // Add the item with a new index long index = _itemIndex++ - 1; _items[index] = item; // Add the machine mapping _itemToMachineMapping[index] = machineIndex; // Add the source mapping _itemToSourceMapping[index] = sourceIndex; #endif // Add the item statistics DatStatistics.AddItemStatistics(item); // Add the item to the default bucket PerformItemBucketing(index, _bucketedBy, lower: true, norename: true); // Return the used index return index - 1; } #endregion #region Bucketing /// /// Update the bucketing dictionary /// /// ItemKey enum representing how to bucket the individual items /// Dedupe type that should be used /// True if the key should be lowercased (default), false otherwise /// True if games should only be compared on game and file name, false if system and source are counted /// public void BucketBy(ItemKey bucketBy, DedupeType dedupeType, bool lower = true, bool norename = true) { // If the sorted type isn't the same, we want to sort the dictionary accordingly if (_bucketedBy != bucketBy && bucketBy != ItemKey.NULL) PerformBucketing(bucketBy, lower, norename); // If the merge type isn't the same, we want to merge the dictionary accordingly if (dedupeType != DedupeType.None) { PerformDeduplication(bucketBy, dedupeType); } // If the merge type is the same, we want to sort the dictionary to be consistent else { PerformSorting(norename); } } /// /// List all duplicates found in a DAT based on a DatItem /// /// Item to try to match /// True if the DAT is already sorted accordingly, false otherwise (default) /// List of matched DatItem objects /// This also sets the remove flag on any duplicates found internal Dictionary GetDuplicates(KeyValuePair datItem, bool sorted = false) { Dictionary output = []; // Check for an empty rom list first if (DatStatistics.TotalCount == 0) return output; // We want to get the proper key for the DatItem string key = SortAndGetKey(datItem, sorted); // If the key doesn't exist, return the empty list var roms = GetItemsForBucket(key); if (roms == null || roms.Count == 0) return output; // Try to find duplicates Dictionary left = []; foreach (var rom in roms) { if (rom.Value.GetBoolFieldValue(DatItem.RemoveKey) == true) { left[rom.Key] = rom.Value; continue; } if (datItem.Value.Equals(rom.Value)) { rom.Value.SetFieldValue(DatItem.RemoveKey, true); output[rom.Key] = rom.Value; } else { left[rom.Key] = rom.Value; } } // Add back all roms with the proper flags #if NET40_OR_GREATER || NETCOREAPP _buckets.TryAdd(key, [.. output.Keys, .. left.Keys]); #else _buckets[key] = [.. output.Keys, .. left.Keys]; #endif return output; } /// /// Check if a DAT contains the given DatItem /// /// Item to try to match /// True if the DAT is already sorted accordingly, false otherwise (default) /// True if it contains the rom, false otherwise internal bool HasDuplicates(KeyValuePair datItem, bool sorted = false) { // Check for an empty rom list first if (DatStatistics.TotalCount == 0) return false; // We want to get the proper key for the DatItem string key = SortAndGetKey(datItem, sorted); // If the key doesn't exist var roms = GetItemsForBucket(key); if (roms == null || roms.Count == 0) return false; // Try to find duplicates return roms.Values.Any(r => datItem.Equals(r)); } /// /// Merge an arbitrary set of item pairs based on the supplied information /// /// List of pairs representing the items to be merged private List> Merge(List> itemMappings) { // Check for null or blank roms first if (itemMappings == null || itemMappings.Count == 0) return []; // Create output list List> output = []; // Then deduplicate them by checking to see if data matches previous saved roms int nodumpCount = 0; foreach (var kvp in itemMappings) { long itemIndex = kvp.Key; DatItem datItem = kvp.Value; // If we don't have a Disk, File, Media, or Rom, we skip checking for duplicates if (datItem is not Disk && datItem is not DatItems.Formats.File && datItem is not Media && datItem is not Rom) continue; // If it's a nodump, add and skip if (datItem is Rom rom && rom.GetStringFieldValue(Models.Metadata.Rom.StatusKey).AsEnumValue() == ItemStatus.Nodump) { output.Add(new KeyValuePair(itemIndex, datItem)); nodumpCount++; continue; } else if (datItem is Disk disk && disk.GetStringFieldValue(Models.Metadata.Disk.StatusKey).AsEnumValue() == ItemStatus.Nodump) { output.Add(new KeyValuePair(itemIndex, datItem)); nodumpCount++; continue; } // If it's the first non-nodump rom in the list, don't touch it if (output.Count == nodumpCount) { output.Add(new KeyValuePair(itemIndex, datItem)); continue; } // Find the index of the first duplicate, if one exists int pos = output.FindIndex(lastItem => datItem.GetDuplicateStatus(lastItem.Value) != 0x00); if (pos < 0) { output.Add(new KeyValuePair(itemIndex, datItem)); continue; } // Get the duplicate item long savedIndex = output[pos].Key; DatItem savedItem = output[pos].Value; DupeType dupetype = datItem.GetDuplicateStatus(savedItem); // Disks, Media, and Roms have more information to fill if (datItem is Disk diskItem && savedItem is Disk savedDisk) savedDisk.FillMissingInformation(diskItem); else if (datItem is DatItems.Formats.File fileItem && savedItem is DatItems.Formats.File savedFile) savedFile.FillMissingInformation(fileItem); else if (datItem is Media mediaItem && savedItem is Media savedMedia) savedMedia.FillMissingInformation(mediaItem); else if (datItem is Rom romItem && savedItem is Rom savedRom) savedRom.FillMissingInformation(romItem); savedItem.SetFieldValue(DatItem.DupeTypeKey, dupetype); // Get the sources associated with the items var savedSource = _sources[_itemToSourceMapping[savedIndex]]; var itemSource = _sources[_itemToSourceMapping[itemIndex]]; // Get the machines associated with the items var savedMachine = _machines[_itemToMachineMapping[savedIndex]]; var itemMachine = _machines[_itemToMachineMapping[itemIndex]]; // If the current source has a lower ID than the saved, use the saved source if (itemSource?.Index < savedSource?.Index) { _itemToSourceMapping[itemIndex] = _itemToSourceMapping[savedIndex]; _machines[_itemToMachineMapping[savedIndex]] = (itemMachine.Clone() as Machine)!; savedItem.SetName(datItem.GetName()); } // If the saved machine is a child of the current machine, use the current machine instead if (savedMachine.GetStringFieldValue(Models.Metadata.Machine.CloneOfKey) == itemMachine.GetStringFieldValue(Models.Metadata.Machine.NameKey) || savedMachine.GetStringFieldValue(Models.Metadata.Machine.RomOfKey) == itemMachine.GetStringFieldValue(Models.Metadata.Machine.NameKey)) { _machines[_itemToMachineMapping[savedIndex]] = (itemMachine.Clone() as Machine)!; savedItem.SetName(datItem.GetName()); } // Replace the original item in the list output.RemoveAt(pos); output.Insert(pos, new KeyValuePair(savedIndex, savedItem)); } return output; } /// /// Get the highest-order Field value that represents the statistics /// private ItemKey GetBestAvailable() { // Get the required counts long diskCount = DatStatistics.GetItemCount(ItemType.Disk); long mediaCount = DatStatistics.GetItemCount(ItemType.Media); long romCount = DatStatistics.GetItemCount(ItemType.Rom); long nodumpCount = DatStatistics.GetStatusCount(ItemStatus.Nodump); // If all items are supposed to have a SHA-512, we bucket by that if (diskCount + mediaCount + romCount - nodumpCount == DatStatistics.GetHashCount(HashType.SHA512)) return ItemKey.SHA512; // If all items are supposed to have a SHA-384, we bucket by that else if (diskCount + mediaCount + romCount - nodumpCount == DatStatistics.GetHashCount(HashType.SHA384)) return ItemKey.SHA384; // If all items are supposed to have a SHA-256, we bucket by that else if (diskCount + mediaCount + romCount - nodumpCount == DatStatistics.GetHashCount(HashType.SHA256)) return ItemKey.SHA256; // If all items are supposed to have a SHA-1, we bucket by that else if (diskCount + mediaCount + romCount - nodumpCount == DatStatistics.GetHashCount(HashType.SHA1)) return ItemKey.SHA1; // If all items are supposed to have a MD5, we bucket by that else if (diskCount + mediaCount + romCount - nodumpCount == DatStatistics.GetHashCount(HashType.MD5)) return ItemKey.MD5; // If all items are supposed to have a MD4, we bucket by that else if (diskCount + mediaCount + romCount - nodumpCount == DatStatistics.GetHashCount(HashType.MD4)) return ItemKey.MD4; // If all items are supposed to have a MD2, we bucket by that else if (diskCount + mediaCount + romCount - nodumpCount == DatStatistics.GetHashCount(HashType.MD2)) return ItemKey.MD2; // Otherwise, we bucket by CRC else return ItemKey.CRC; } /// /// Get the bucketing key for a given item index /// Index of the current item /// ItemKey value representing what key to get /// True if the key should be lowercased, false otherwise /// True if games should only be compared on game and file name, false if system and source are counted /// private string GetBucketKey(long itemIndex, ItemKey bucketBy, bool lower, bool norename) { #if NET40_OR_GREATER || NETCOREAPP if (!_items.TryGetValue(itemIndex, out var datItem) || datItem == null) return string.Empty; #else if (!_items.ContainsKey(itemIndex)) return string.Empty; var datItem = _items[itemIndex]; if (datItem == null) return string.Empty; #endif var source = GetSourceForItem(itemIndex); string sourceKeyPadded = source.Value?.Index.ToString().PadLeft(10, '0') + '-'; var machine = GetMachineForItem(itemIndex); string machineName = machine.Value?.GetStringFieldValue(Models.Metadata.Machine.NameKey) ?? "Default"; // Treat NULL like machine if (bucketBy == ItemKey.NULL) bucketBy = ItemKey.Machine; // Get the bucket key return datItem.GetKeyDB(bucketBy, machine.Value, source.Value, lower, norename); } /// /// Ensure the key exists in the items dictionary /// private void EnsureBucketingKey(string key) { // If the key is missing from the dictionary, add it #if NET40_OR_GREATER || NETCOREAPP _buckets.GetOrAdd(key, []); #else if (!_buckets.ContainsKey(key)) _buckets[key] = []; #endif } /// /// Perform bucketing based on the item key provided /// /// ItemKey enum representing how to bucket the individual items /// True if the key should be lowercased, false otherwise /// True if games should only be compared on game and file name, false if system and source are counted private void PerformBucketing(ItemKey bucketBy, bool lower, bool norename) { // Reset the bucketing values _bucketedBy = bucketBy; _buckets.Clear(); // Get the current list of item indicies long[] itemIndicies = [.. _items.Keys]; #if NET452_OR_GREATER || NETCOREAPP Parallel.For(0, itemIndicies.Length, Core.Globals.ParallelOptions, i => #elif NET40_OR_GREATER Parallel.For(0, itemIndicies.Length, i => #else for (int i = 0; i < itemIndicies.Length; i++) #endif { PerformItemBucketing(i, bucketBy, lower, norename); #if NET40_OR_GREATER || NETCOREAPP }); #else } #endif } /// /// Bucket a single DatItem /// /// Index of the item to bucket /// ItemKey enum representing how to bucket the individual items /// True if the key should be lowercased, false otherwise /// True if games should only be compared on game and file name, false if system and source are counted private void PerformItemBucketing(long itemIndex, ItemKey bucketBy, bool lower, bool norename) { string? bucketKey = GetBucketKey(itemIndex, bucketBy, lower, norename); lock (bucketKey) { EnsureBucketingKey(bucketKey); #if NET40_OR_GREATER || NETCOREAPP if (!_buckets.TryGetValue(bucketKey, out var bucket) || bucket == null) return; bucket.Add(itemIndex); #else _buckets[bucketKey].Add(itemIndex); #endif } } /// /// Perform deduplication based on the deduplication type provided /// /// ItemKey enum representing how to bucket the individual items /// Dedupe type that should be used private void PerformDeduplication(ItemKey bucketBy, DedupeType dedupeType) { // Get the current list of bucket keys string[] bucketKeys = [.. _buckets.Keys]; #if NET452_OR_GREATER || NETCOREAPP Parallel.For(0, bucketKeys.Length, Core.Globals.ParallelOptions, i => #elif NET40_OR_GREATER Parallel.For(0, bucketKeys.Length, i => #else for (int i = 0; i < bucketKeys.Length; i++) #endif { #if NET40_OR_GREATER || NETCOREAPP if (!_buckets.TryGetValue(bucketKeys[i], out var itemIndices)) return; #else var itemIndices = _buckets[bucketKeys[i]]; #endif if (itemIndices == null || itemIndices.Count == 0) return; var datItems = itemIndices .FindAll(i => _items.ContainsKey(i)) .Select(i => new KeyValuePair(i, _items[i])) .ToList(); Sort(ref datItems, false); // If we're merging the roms, do so if (dedupeType == DedupeType.Full || (dedupeType == DedupeType.Game && bucketBy == ItemKey.Machine)) datItems = Merge(datItems); #if NET40_OR_GREATER || NETCOREAPP _buckets.TryAdd(bucketKeys[i], [.. datItems.Select(kvp => kvp.Key)]); }); #else _buckets[bucketKeys[i]] = [.. datItems.Select(kvp => kvp.Key)]; } #endif } /// /// Sort existing buckets for consistency /// private void PerformSorting(bool norename) { // Get the current list of bucket keys string[] bucketKeys = [.. _buckets.Keys]; #if NET452_OR_GREATER || NETCOREAPP Parallel.For(0, bucketKeys.Length, Core.Globals.ParallelOptions, i => #elif NET40_OR_GREATER Parallel.For(0, bucketKeys.Length, i => #else for (int i = 0; i < bucketKeys.Length; i++) #endif { #if NET452_OR_GREATER || NETCOREAPP _buckets.TryGetValue(bucketKeys[i], out var itemIndices); #else var itemIndices = _buckets[bucketKeys[i]]; #endif if (itemIndices == null || itemIndices.Count == 0) { #if NET40_OR_GREATER || NETCOREAPP _buckets.TryRemove(bucketKeys[i], out _); return; #else _buckets.Remove(bucketKeys[i]); continue; #endif } var datItems = itemIndices .FindAll(i => _items.ContainsKey(i)) .Select(i => new KeyValuePair(i, _items[i])) .ToList(); Sort(ref datItems, norename); #if NET40_OR_GREATER || NETCOREAPP _buckets.TryAdd(bucketKeys[i], [.. datItems.Select(kvp => kvp.Key)]); }); #else _buckets[bucketKeys[i]] = [.. datItems.Select(kvp => kvp.Key)]; } #endif } /// /// Sort a list of item pairs by SourceID, Game, and Name (in order) /// /// List of pairs representing the items to be sorted /// True if files are not renamed, false otherwise /// True if it sorted correctly, false otherwise private bool Sort(ref List> itemMappings, bool norename) { itemMappings.Sort(delegate (KeyValuePair x, KeyValuePair y) { try { var nc = new NaturalComparer(); // If machine names don't match string? xMachineName = _machines[_itemToMachineMapping[x.Key]].GetStringFieldValue(Models.Metadata.Machine.NameKey); string? yMachineName = _machines[_itemToMachineMapping[y.Key]].GetStringFieldValue(Models.Metadata.Machine.NameKey); if (xMachineName != yMachineName) return nc.Compare(xMachineName, yMachineName); // If types don't match string? xType = x.Value.GetStringFieldValue(Models.Metadata.DatItem.TypeKey); string? yType = y.Value.GetStringFieldValue(Models.Metadata.DatItem.TypeKey); if (xType != yType) return xType.AsEnumValue() - yType.AsEnumValue(); // If directory names don't match string? xDirectoryName = Path.GetDirectoryName(TextHelper.RemovePathUnsafeCharacters(x.Value.GetName())); string? yDirectoryName = Path.GetDirectoryName(TextHelper.RemovePathUnsafeCharacters(y.Value.GetName())); if (xDirectoryName != yDirectoryName) return nc.Compare(xDirectoryName, yDirectoryName); // If item names don't match string? xName = Path.GetFileName(TextHelper.RemovePathUnsafeCharacters(x.Value.GetName())); string? yName = Path.GetFileName(TextHelper.RemovePathUnsafeCharacters(y.Value.GetName())); if (xName != yName) return nc.Compare(xName, yName); // Otherwise, compare on machine or source, depending on the flag int? xSourceIndex = GetSourceForItem(x.Key).Value?.Index; int? ySourceIndex = GetSourceForItem(y.Key).Value?.Index; return (norename ? nc.Compare(xMachineName, yMachineName) : (xSourceIndex - ySourceIndex) ?? 0); } catch { // Absorb the error return 0; } }); return true; } /// /// Sort the input DAT and get the key to be used by the item /// /// Item to try to match /// True if the DAT is already sorted accordingly, false otherwise (default) /// Key to try to use private string SortAndGetKey(KeyValuePair datItem, bool sorted = false) { // If we're not already sorted, take care of it if (!sorted) BucketBy(GetBestAvailable(), DedupeType.None); // Now that we have the sorted type, we get the proper key var machine = GetMachineForItem(datItem.Key); var source = GetSourceForItem(datItem.Key); return datItem.Value.GetKeyDB(_bucketedBy, machine.Value, source.Value); } #endregion #region Statistics /// /// Recalculate the statistics for the Dat /// public void RecalculateStats() { // Wipe out any stats already there DatStatistics.ResetStatistics(); // If there are no items if (_items == null || _items.Count == 0) return; // Loop through and add foreach (var item in _items.Values) { if (item == null) continue; DatStatistics.AddItemStatistics(item); } } #endregion } }