using System; #if NET40_OR_GREATER || NETCOREAPP || NETSTANDARD2_0_OR_GREATER using System.Collections.Concurrent; #endif using System.Collections.Generic; using System.IO; using System.Linq; #if NET40_OR_GREATER || NETCOREAPP || NETSTANDARD2_0_OR_GREATER using System.Threading; using System.Threading.Tasks; #endif using System.Xml.Serialization; using Newtonsoft.Json; using SabreTools.Metadata.DatItems; using SabreTools.Metadata.DatItems.Formats; using SabreTools.Hashing; using SabreTools.Logging; using SabreTools.Text.Compare; using SabreTools.Text.Extensions; using ItemStatus = SabreTools.Data.Models.Metadata.ItemStatus; /* * Planning Notes: * * In order for this in-memory "database" design to work, there need to be a few things: * - Feature parity with all existing item dictionary operations * - A way to transition between the two item dictionaries (a flag?) * - Helper methods that target the "database" version instead of assuming the standard dictionary * * Notable changes include: * - Separation of Machine from DatItem, leading to a mapping instead * - Adding machines to the dictionary distinctly from the items * - Having a separate "bucketing" system that only reorders indicies and not full items; quicker? * - Non-key-based add/remove of values; use explicit methods instead of dictionary-style accessors */ namespace SabreTools.Metadata.DatFiles { /// /// Item dictionary with statistics, bucketing, and sorting /// [JsonObject("items"), XmlRoot("items")] public class ItemDatabase { #region Private Classes /// /// Represents a table with a string key and a list of indexes as a value /// private class GroupingTable { #region Private Fields #if NET40_OR_GREATER || NETCOREAPP || NETSTANDARD2_0_OR_GREATER private readonly ConcurrentDictionary> _groupings = []; #else private readonly Dictionary> _groupings = []; #endif #endregion #region Properties /// /// Indicates what the grouping keys represent /// public ItemKey GroupedBy { get; set; } = ItemKey.NULL; /// /// All grouping keys /// public string[] Keys => [.. _groupings.Keys]; #endregion #region Accessors /// /// Add an index to a grouping /// public void Add(string key, long index) { lock (key) { #if NET40_OR_GREATER || NETCOREAPP || NETSTANDARD2_0_OR_GREATER // If the key is missing from the dictionary, add it _groupings.GetOrAdd(key, []); if (!_groupings.TryGetValue(key, out var grouping) || grouping is null) return; grouping.Add(index); #else // If the key is missing from the dictionary, add it if (!_groupings.ContainsKey(key)) _groupings[key] = []; _groupings[key].Add(index); #endif } } /// /// Remove all groupings /// public void Clear() => _groupings.Clear(); /// /// Try to set a grouping by key and list of values /// /// This overwrites anything existing public bool TryAdd(string key, List value) { #if NET40_OR_GREATER || NETCOREAPP || NETSTANDARD2_0_OR_GREATER return _groupings.TryAdd(key, value); #else _groupings[key] = value; return true; #endif } /// /// Try to get a value by key, returning success /// public bool TryGet(string? key, out List? value) { if (key is null) { value = null; return false; } return _groupings.TryGetValue(key, out value); } /// /// Try to remove a value by key, returning success /// public bool TryRemove(string key, out List? value) { #if NET40_OR_GREATER || NETCOREAPP || NETSTANDARD2_0_OR_GREATER return _groupings.TryRemove(key, out value); #else if (!_groupings.ContainsKey(key)) { value = default; return false; } value = _groupings[key]; return _groupings.Remove(key); #endif } #endregion } /// /// Represents a table with an incremental index as the key /// /// Type of the row values private class IndexedTable { #region Private Fields /// /// Internal dictionary for the table /// [JsonIgnore, XmlIgnore] #if NET40_OR_GREATER || NETCOREAPP || NETSTANDARD2_0_OR_GREATER private readonly ConcurrentDictionary _table = []; #else private readonly Dictionary _table = []; #endif /// /// Current highest available index /// [JsonIgnore, XmlIgnore] private long _tableIndex = 0; #endregion #region Properties /// /// Indicates if the table is empty /// #if NET20 || NET35 public bool IsEmpty => _table.Count == 0; #else public bool IsEmpty => _table.IsEmpty; #endif /// /// Direct access to the internal table /// /// TODO: Investigate ways of avoiding this being needed public IDictionary Table => _table; /// /// All currently used indexes /// public long[] Indexes => [.. _table.Keys]; /// /// All values in the table /// public T[] Values => [.. _table.Values]; #endregion #region Accessors /// /// Add a value to the table, returning the insert index /// public long Add(T value) { #if NET40_OR_GREATER || NETCOREAPP || NETSTANDARD2_0_OR_GREATER long index = Interlocked.Increment(ref _tableIndex) - 1; _table.TryAdd(index, value); return index; #else long index = _tableIndex++ - 1; _table[index] = value; return index; #endif } /// /// Get a value from the table, null on error /// public T? Get(long index) { if (_table.TryGetValue(index, out var value)) return value; return default; } /// /// Remove a value from the table, returning success /// public bool Remove(long index) { #if NET40_OR_GREATER || NETCOREAPP || NETSTANDARD2_0_OR_GREATER return _table.TryRemove(index, out var _); #else return _table.Remove(index); #endif } /// /// Remove all values that match a function /// public void RemoveAll(Func func) { #if NET40_OR_GREATER || NETCOREAPP || NETSTANDARD2_0_OR_GREATER Parallel.For(0, Indexes.Length, i => #else for (int i = 0; i < Indexes.Length; i++) #endif { if (func(_table[i])) Remove(i); #if NET40_OR_GREATER || NETCOREAPP || NETSTANDARD2_0_OR_GREATER }); #else } #endif } /// /// Set an indexed value directly /// /// This does not increment the index so values may be overwritten public bool Set(long index, T value) { #if NET40_OR_GREATER || NETCOREAPP || NETSTANDARD2_0_OR_GREATER return _table.TryAdd(index, value); #else _table[index] = value; return true; #endif } /// /// Try to get a value by index, returning success /// public bool TryGet(long index, out T? value) => _table.TryGetValue(index, out value); /// /// Try to remove a value by index, returning success /// public bool TryRemove(long index, out T? value) { #if NET40_OR_GREATER || NETCOREAPP || NETSTANDARD2_0_OR_GREATER return _table.TryRemove(index, out value); #else if (!_table.ContainsKey(index)) { value = default; return false; } value = _table[index]; return _table.Remove(index); #endif } #endregion #region Search /// /// Indicates if an index is valid /// public bool ContainsIndex(long index) => _table.ContainsKey(index); /// /// Find an item based on a supplied function /// public KeyValuePair Find(Func func) { foreach (long i in Indexes) { if (func(_table[i])) return new KeyValuePair(i, _table[i]); } return new KeyValuePair(-1, default); } #endregion } #endregion #region Private instance variables /// /// Internal table for all items /// [JsonIgnore, XmlIgnore] private readonly IndexedTable _items = new(); /// /// Internal table for all machines /// [JsonIgnore, XmlIgnore] private readonly IndexedTable _machines = new(); /// /// Internal table for all sources /// [JsonIgnore, XmlIgnore] private readonly IndexedTable _sources = new(); /// /// Internal dictionary representing the current buckets /// [JsonIgnore, XmlIgnore] private readonly GroupingTable _buckets = new(); /// /// Logging object /// private readonly Logger _logger; #endregion #region Properties /// /// Get the keys in sorted order from the file dictionary /// /// List of the keys in sorted order [JsonIgnore, XmlIgnore] public string[] SortedKeys { get { List keys = [.. _buckets.Keys]; keys.Sort(new NaturalComparer()); return [.. keys]; } } /// /// DAT statistics /// [JsonIgnore, XmlIgnore] public DatStatistics DatStatistics { get; } = new DatStatistics(); #endregion #region Constructors /// /// Generic constructor /// public ItemDatabase() { _logger = new Logger(this); } #endregion #region Accessors /// /// Add a DatItem to the dictionary after validation /// /// Item data to validate /// True to only add item statistics while parsing, false otherwise /// The index for the added item, -1 on error public long AddItem(DatItem item, bool statsOnly) { // If we have a Disk, File, Media, or Rom, clean the hash data if (item is Disk disk) { // If the file has aboslutely no hashes, skip and log if (disk.Status != ItemStatus.Nodump && string.IsNullOrEmpty(disk.MD5) && string.IsNullOrEmpty(disk.SHA1)) { _logger.Verbose($"Incomplete entry for '{disk.Name}' will be output as nodump"); disk.Status = ItemStatus.Nodump; } item = disk; } else if (item is DatItems.Formats.File file) { // If the file has aboslutely no hashes, skip and log if (string.IsNullOrEmpty(file.CRC) && string.IsNullOrEmpty(file.MD5) && string.IsNullOrEmpty(file.SHA1) && string.IsNullOrEmpty(file.SHA256)) { _logger.Verbose($"Incomplete entry for '{file.GetName()}' will be output as nodump"); } item = file; } else if (item is Media media) { // If the file has aboslutely no hashes, skip and log if (string.IsNullOrEmpty(media.MD5) && string.IsNullOrEmpty(media.SHA1) && string.IsNullOrEmpty(media.SHA256) && string.IsNullOrEmpty(media.SpamSum)) { _logger.Verbose($"Incomplete entry for '{media.Name}' will be output as nodump"); } item = media; } else if (item is Rom rom) { long? size = rom.Size; // If we have the case where there is SHA-1 and nothing else, we don't fill in any other part of the data if (size is null && !string.IsNullOrEmpty(rom.SHA1)) { // No-op, just catch it so it doesn't go further //logger.Verbose($"{Header.GetStringFieldValue(DatHeader.FileNameKey)}: Entry with only SHA-1 found - '{rom.Name}'"); } // If we have a rom and it's missing size AND the hashes match a 0-byte file, fill in the rest of the info else if ((size == 0 || size is null) && (string.IsNullOrEmpty(rom.CRC32) || rom.HasZeroHash())) { rom.Size = 0; rom.CRC16 = null; // HashType.CRC16.ZeroString rom.CRC32 = HashType.CRC32.ZeroString; rom.CRC64 = null; // HashType.CRC64.ZeroString rom.MD2 = null; // HashType.MD2.ZeroString rom.MD4 = null; // HashType.MD4.ZeroString rom.MD5 = HashType.MD5.ZeroString; rom.RIPEMD128 = null; // HashType.RIPEMD128.ZeroString rom.RIPEMD160 = null; // HashType.RIPEMD160.ZeroString rom.SHA1 = HashType.SHA1.ZeroString; rom.SHA256 = null; // HashType.SHA256.ZeroString; rom.SHA384 = null; // HashType.SHA384.ZeroString; rom.SHA512 = null; // HashType.SHA512.ZeroString; rom.SpamSum = null; // HashType.SpamSum.ZeroString; } // If the file has no size and it's not the above case, skip and log else if (rom.Status != ItemStatus.Nodump && (size == 0 || size is null)) { //logger.Verbose($"{Header.GetStringFieldValue(DatHeader.FileNameKey)}: Incomplete entry for '{rom.Name}' will be output as nodump"); rom.Status = ItemStatus.Nodump; } // If the file has a size but aboslutely no hashes, skip and log else if (rom.Status != ItemStatus.Nodump && size is not null && size > 0 && !rom.HasHashes()) { //logger.Verbose($"{Header.GetStringFieldValue(DatHeader.FileNameKey)}: Incomplete entry for '{rom.Name}' will be output as nodump"); rom.Status = ItemStatus.Nodump; } item = rom; } // If only adding statistics, we add just item stats if (statsOnly) { DatStatistics.AddItemStatistics(item); return -1; } else { return AddItemInternal(item); } } /// /// Add a machine, returning the insert index /// public long AddMachine(Machine machine) => _machines.Add(machine); /// /// Add a source, returning the insert index /// public long AddSource(Source source) => _sources.Add(source); /// /// Remove all items marked for removal /// public void ClearMarked() => _items.RemoveAll(d => d.RemoveFlag); /// /// Get a item based on the index /// public DatItem? GetItem(long index) => _items.Get(index); /// /// Get the indices and items associated with a bucket name /// public Dictionary GetItemsForBucket(string? bucketName, bool filter = false) { // Get item indexes for the bucket if (!_buckets.TryGet(bucketName, out var itemIds) || itemIds is null) return []; // Get the items based on index var datItems = new Dictionary(); foreach (long itemId in itemIds) { // Ignore missing IDs if (!_items.TryGet(itemId, out var datItem) || datItem is null) continue; if (!filter || !datItem.RemoveFlag) datItems[itemId] = datItem; } return datItems; } /// /// Get a machine based on the index /// public KeyValuePair GetMachine(long index) { if (!_machines.TryGet(index, out var machine)) return new KeyValuePair(-1, null); return new KeyValuePair(index, machine); } /// /// Get a machine based on the name /// /// This assume that all machines have unique names public KeyValuePair GetMachine(string? name) { if (string.IsNullOrEmpty(name)) return new KeyValuePair(-1, null); var machine = _machines.Find(m => m.Name == name); return new KeyValuePair(machine.Key, machine.Value); } /// /// Get all machines and their indicies /// public Machine[] GetMachines() => _machines.Values; /// /// Get a source based on the index /// public KeyValuePair GetSource(long index) { if (!_sources.TryGet(index, out var source)) return new KeyValuePair(-1, null); return new KeyValuePair(index, source); } /// /// Remove a key from the file dictionary if it exists /// /// Key in the dictionary to remove public bool RemoveBucket(string key) { bool removed = _buckets.TryRemove(key, out var list); if (list is null) return removed; list.ForEach(index => RemoveItem(index)); return removed; } /// /// Remove an item, returning if it could be removed /// public bool RemoveItem(long itemIndex) { // If the key doesn't exist, return if (!_items.TryRemove(itemIndex, out var datItem)) return false; // Remove statistics, if possible if (datItem is not null) DatStatistics.RemoveItemStatistics(datItem); return true; } /// /// Remove a machine, returning if it could be removed /// public bool RemoveMachine(long machineIndex) { if (!_machines.TryRemove(machineIndex, out _)) return false; _items.RemoveAll(d => d.MachineIndex == machineIndex); return true; } /// /// Remove a machine, returning if it could be removed /// public bool RemoveMachine(string machineName) { if (string.IsNullOrEmpty(machineName)) return false; var machine = _machines.Find(m => string.Equals(m.Name, machineName, StringComparison.OrdinalIgnoreCase)); return RemoveMachine(machine.Key); } /// /// Add an item, returning the insert index /// internal long AddItemInternal(DatItem item) { // Add the item with a new index long index = _items.Add(item); // Add the item statistics DatStatistics.AddItemStatistics(item); // Add the item to the default bucket PerformItemBucketing(new KeyValuePair(index, item), _buckets.GroupedBy, lower: true, norename: true); // Return the used index return index; } #endregion #region Bucketing /// /// Update the bucketing dictionary /// /// ItemKey enum representing how to bucket the individual items /// True if the key should be lowercased (default), false otherwise /// True if games should only be compared on game and file name, false if system and source are counted /// public void BucketBy(ItemKey bucketBy, bool lower = true, bool norename = true) { // If the sorted type isn't the same, we want to sort the dictionary accordingly if (_buckets.GroupedBy != bucketBy && bucketBy != ItemKey.NULL) { _logger.User($"Organizing roms by {bucketBy}"); PerformBucketing(bucketBy, lower, norename); } // Sort the dictionary to be consistent _logger.User($"Sorting roms by {bucketBy}"); PerformSorting(norename); } /// /// Perform deduplication on the current sorted dictionary /// public void Deduplicate() { #if NET40_OR_GREATER || NETCOREAPP || NETSTANDARD2_0_OR_GREATER Parallel.ForEach(SortedKeys, key => #else foreach (var key in SortedKeys) #endif { // Get the possibly unsorted list List> sortedList = [.. GetItemsForBucket(key)]; // Sort and merge the list Sort(ref sortedList, false); sortedList = Merge(sortedList); // Add the list back to the dictionary RemoveBucket(key); sortedList.ForEach(item => AddItemInternal(item.Value)); #if NET40_OR_GREATER || NETCOREAPP || NETSTANDARD2_0_OR_GREATER }); #else } #endif } /// /// Return the duplicate status of two items /// /// Current DatItem /// Source associated with this item /// DatItem to check against /// Source associated with the last item /// The DupeType corresponding to the relationship between the two public DupeType GetDuplicateStatus(KeyValuePair? selfItem, Source? selfSource, KeyValuePair? lastItem, Source? lastSource) { DupeType output = 0x00; // If either item is null if (selfItem is null || lastItem is null) return output; // If we don't have a duplicate at all, return none if (!selfItem.Value.Value.Equals(lastItem.Value.Value)) return output; // Get the machines for comparison var selfMachine = GetMachine(selfItem.Value.Value.MachineIndex).Value; string? selfMachineName = selfMachine?.Name; var lastMachine = GetMachine(lastItem.Value.Value.MachineIndex).Value; string? lastMachineName = lastMachine?.Name; // If the duplicate is external already #if NET20 || NET35 if ((lastItem.Value.Value.DupeType & DupeType.External) != 0) #else if (lastItem.Value.Value.DupeType.HasFlag(DupeType.External)) #endif output |= DupeType.External; // If the duplicate should be external else if (lastSource?.Index != selfSource?.Index) output |= DupeType.External; // Otherwise, it's considered an internal dupe else output |= DupeType.Internal; // If the item and machine names match if (lastMachineName == selfMachineName && lastItem.Value.Value.GetName() == selfItem.Value.Value.GetName()) output |= DupeType.All; // Otherwise, hash match is assumed else output |= DupeType.Hash; return output; } /// /// List all duplicates found in a DAT based on a DatItem /// /// Item to try to match /// True if the DAT is already sorted accordingly, false otherwise (default) /// List of matched DatItem objects /// This also sets the remove flag on any duplicates found /// TODO: Figure out if removal should be a flag or just removed entirely internal Dictionary GetDuplicates(KeyValuePair datItem, bool sorted = false) { // Check for an empty rom list first if (DatStatistics.TotalCount == 0) return []; // We want to get the proper key for the DatItem, ignoring the index _ = SortAndGetKey(datItem, sorted); var machine = GetMachine(datItem.Value.MachineIndex); var source = GetSource(datItem.Value.SourceIndex); string key = datItem.Value.GetKey(_buckets.GroupedBy, machine.Value, source.Value); // If the key doesn't exist, return the empty list var items = GetItemsForBucket(key); if (items.Count == 0) return []; // Try to find duplicates Dictionary output = []; foreach (var rom in items) { // Skip items marked for removal if (rom.Value.RemoveFlag) continue; // Mark duplicates for future removal if (datItem.Value.Equals(rom.Value)) { rom.Value.RemoveFlag = true; output[rom.Key] = rom.Value; } } // Return any matching items return output; } /// /// Check if a DAT contains the given DatItem /// /// Item to try to match /// True if the DAT is already sorted accordingly, false otherwise (default) /// True if it contains the rom, false otherwise internal bool HasDuplicates(KeyValuePair datItem, bool sorted = false) { // Check for an empty rom list first if (DatStatistics.TotalCount == 0) return false; // We want to get the proper key for the DatItem, ignoring the index _ = SortAndGetKey(datItem, sorted); var machine = GetMachine(datItem.Value.MachineIndex); var source = GetSource(datItem.Value.SourceIndex); string key = datItem.Value.GetKey(_buckets.GroupedBy, machine.Value, source.Value); // If the key doesn't exist var roms = GetItemsForBucket(key); if (roms is null || roms.Count == 0) return false; // Try to find duplicates return roms.Values.Any(datItem.Value.Equals); } /// /// Merge an arbitrary set of item pairs based on the supplied information /// /// List of pairs representing the items to be merged private List> Merge(List> itemMappings) { // Check for null or blank roms first if (itemMappings is null || itemMappings.Count == 0) return []; // Create output list List> output = []; // Then deduplicate them by checking to see if data matches previous saved roms int nodumpCount = 0; foreach (var kvp in itemMappings) { long itemIndex = kvp.Key; DatItem datItem = kvp.Value; // If we don't have a Disk, File, Media, or Rom, we skip checking for duplicates if (datItem is not Disk && datItem is not DatItems.Formats.File && datItem is not Media && datItem is not Rom) continue; // If it's a nodump, add and skip if (datItem is Rom rom && rom.Status == ItemStatus.Nodump) { output.Add(new KeyValuePair(itemIndex, datItem)); nodumpCount++; continue; } else if (datItem is Disk disk && disk.Status == ItemStatus.Nodump) { output.Add(new KeyValuePair(itemIndex, datItem)); nodumpCount++; continue; } // If it's the first non-nodump rom in the list, don't touch it if (output.Count == nodumpCount) { output.Add(new KeyValuePair(itemIndex, datItem)); continue; } // Find the index of the first duplicate, if one exists var datItemSource = GetSource(datItem.SourceIndex); int pos = output.FindIndex(lastItem => { var lastItemSource = GetSource(lastItem.Value.SourceIndex); return GetDuplicateStatus(kvp, datItemSource.Value, lastItem, lastItemSource.Value) != 0x00; }); if (pos < 0) { output.Add(new KeyValuePair(itemIndex, datItem)); continue; } // Get the duplicate item long savedIndex = output[pos].Key; DatItem savedItem = output[pos].Value; var savedItemSource = GetSource(savedItem.SourceIndex); DupeType dupetype = GetDuplicateStatus(kvp, datItemSource.Value, output[pos], savedItemSource.Value); // Disks, Media, and Roms have more information to fill if (datItem is Disk diskItem && savedItem is Disk savedDisk) savedDisk.FillMissingInformation(diskItem); else if (datItem is DatItems.Formats.File fileItem && savedItem is DatItems.Formats.File savedFile) savedFile.FillMissingInformation(fileItem); else if (datItem is Media mediaItem && savedItem is Media savedMedia) savedMedia.FillMissingInformation(mediaItem); else if (datItem is Rom romItem && savedItem is Rom savedRom) savedRom.FillMissingInformation(romItem); savedItem.DupeType = dupetype; // Get the sources associated with the items var savedSource = GetSource(savedItem.SourceIndex); var itemSource = GetSource(datItem.SourceIndex); // Get the machines associated with the items var savedMachine = GetMachine(savedItem.MachineIndex); var itemMachine = GetMachine(datItem.MachineIndex); // If the current source has a lower ID than the saved, use the saved source if (itemSource.Value?.Index < savedSource.Value?.Index) { datItem.SourceIndex = savedItem.SourceIndex; _machines.Set(savedMachine.Key, (itemMachine.Value!.Clone() as Machine)!); savedItem.SetName(datItem.GetName()); } // If the saved machine is a child of the current machine, use the current machine instead if (savedMachine.Value!.CloneOf == itemMachine.Value!.Name || savedMachine.Value!.RomOf == itemMachine.Value!.Name) { _machines.Set(savedMachine.Key, (itemMachine.Value!.Clone() as Machine)!); savedItem.SetName(datItem.GetName()); } // Replace the original item in the list output.RemoveAt(pos); output.Insert(pos, new KeyValuePair(savedIndex, savedItem)); } return output; } /// /// Get the highest-order Field value that represents the statistics /// private ItemKey GetBestAvailable() { // Get the required counts long diskCount = DatStatistics.GetItemCount(Data.Models.Metadata.ItemType.Disk); long mediaCount = DatStatistics.GetItemCount(Data.Models.Metadata.ItemType.Media); long romCount = DatStatistics.GetItemCount(Data.Models.Metadata.ItemType.Rom); long nodumpCount = DatStatistics.GetStatusCount(ItemStatus.Nodump); // If all items are supposed to have a SHA-512, we bucket by that if (diskCount + mediaCount + romCount - nodumpCount == DatStatistics.GetHashCount(HashType.SHA512)) return ItemKey.SHA512; // If all items are supposed to have a SHA-384, we bucket by that else if (diskCount + mediaCount + romCount - nodumpCount == DatStatistics.GetHashCount(HashType.SHA384)) return ItemKey.SHA384; // If all items are supposed to have a SHA-256, we bucket by that else if (diskCount + mediaCount + romCount - nodumpCount == DatStatistics.GetHashCount(HashType.SHA256)) return ItemKey.SHA256; // If all items are supposed to have a SHA-1, we bucket by that else if (diskCount + mediaCount + romCount - nodumpCount == DatStatistics.GetHashCount(HashType.SHA1)) return ItemKey.SHA1; // If all items are supposed to have a RIPEMD160, we bucket by that else if (diskCount + mediaCount + romCount - nodumpCount == DatStatistics.GetHashCount(HashType.RIPEMD160)) return ItemKey.RIPEMD160; // If all items are supposed to have a RIPEMD128, we bucket by that else if (diskCount + mediaCount + romCount - nodumpCount == DatStatistics.GetHashCount(HashType.RIPEMD128)) return ItemKey.RIPEMD128; // If all items are supposed to have a MD5, we bucket by that else if (diskCount + mediaCount + romCount - nodumpCount == DatStatistics.GetHashCount(HashType.MD5)) return ItemKey.MD5; // If all items are supposed to have a MD4, we bucket by that else if (diskCount + mediaCount + romCount - nodumpCount == DatStatistics.GetHashCount(HashType.MD4)) return ItemKey.MD4; // If all items are supposed to have a MD2, we bucket by that else if (diskCount + mediaCount + romCount - nodumpCount == DatStatistics.GetHashCount(HashType.MD2)) return ItemKey.MD2; // If all items are supposed to have a CRC64, we bucket by that else if (diskCount + mediaCount + romCount - nodumpCount == DatStatistics.GetHashCount(HashType.CRC64)) return ItemKey.CRC64; // If all items are supposed to have a CRC16, we bucket by that // TODO: This should really come after normal CRC else if (diskCount + mediaCount + romCount - nodumpCount == DatStatistics.GetHashCount(HashType.CRC16)) return ItemKey.CRC16; // Otherwise, we bucket by CRC32 else return ItemKey.CRC32; } /// /// Get the bucketing key for a given item index /// Current item /// ItemKey value representing what key to get /// True if the key should be lowercased, false otherwise /// True if games should only be compared on game and file name, false if system and source are counted /// private string GetBucketKey(DatItem datItem, ItemKey bucketBy, bool lower, bool norename) { var source = GetSource(datItem.SourceIndex); var machine = GetMachine(datItem.MachineIndex); // Treat NULL like machine if (bucketBy == ItemKey.NULL) bucketBy = ItemKey.Machine; // Get the bucket key return datItem.GetKey(bucketBy, machine.Value, source.Value, lower, norename); } /// /// Perform bucketing based on the item key provided /// /// ItemKey enum representing how to bucket the individual items /// True if the key should be lowercased, false otherwise /// True if games should only be compared on game and file name, false if system and source are counted private void PerformBucketing(ItemKey bucketBy, bool lower, bool norename) { // Reset the bucketing values _buckets.GroupedBy = bucketBy; _buckets.Clear(); // Get the current list of item indicies long[] itemIndexes = _items.Indexes; #if NET40_OR_GREATER || NETCOREAPP || NETSTANDARD2_0_OR_GREATER Parallel.For(0, itemIndexes.Length, i => #else for (int i = 0; i < itemIndexes.Length; i++) #endif { var datItem = GetItem(i); if (datItem is null) #if NET40_OR_GREATER || NETCOREAPP || NETSTANDARD2_0_OR_GREATER return; #else continue; #endif PerformItemBucketing(new KeyValuePair(i, datItem), bucketBy, lower, norename); #if NET40_OR_GREATER || NETCOREAPP || NETSTANDARD2_0_OR_GREATER }); #else } #endif } /// /// Bucket a single DatItem /// /// Item to bucket /// ItemKey enum representing how to bucket the individual items /// True if the key should be lowercased, false otherwise /// True if games should only be compared on game and file name, false if system and source are counted private void PerformItemBucketing(KeyValuePair datItem, ItemKey bucketBy, bool lower, bool norename) { string? bucketKey = GetBucketKey(datItem.Value, bucketBy, lower, norename); _buckets.Add(bucketKey, datItem.Key); } /// /// Sort existing buckets for consistency /// private void PerformSorting(bool norename) { // Get the current list of bucket keys string[] bucketKeys = _buckets.Keys; #if NET40_OR_GREATER || NETCOREAPP || NETSTANDARD2_0_OR_GREATER Parallel.For(0, bucketKeys.Length, i => #else for (int i = 0; i < bucketKeys.Length; i++) #endif { _buckets.TryGet(bucketKeys[i], out var itemIndices); if (itemIndices is null || itemIndices.Count == 0) { _buckets.TryRemove(bucketKeys[i], out _); #if NET40_OR_GREATER || NETCOREAPP || NETSTANDARD2_0_OR_GREATER return; #else continue; #endif } var datItems = itemIndices .FindAll(i => _items.ContainsIndex(i)) .ConvertAll(i => new KeyValuePair(i, _items.Get(i)!)); Sort(ref datItems, norename); _buckets.TryAdd(bucketKeys[i], datItems.ConvertAll(kvp => kvp.Key)); #if NET40_OR_GREATER || NETCOREAPP || NETSTANDARD2_0_OR_GREATER }); #else } #endif } /// /// Sort a list of item pairs by SourceID, Game, and Name (in order) /// /// List of pairs representing the items to be sorted /// True if files are not renamed, false otherwise /// True if it sorted correctly, false otherwise private bool Sort(ref List> itemMappings, bool norename) { // Create the comparer extenal to the delegate var nc = new NaturalComparer(); itemMappings.Sort(delegate (KeyValuePair x, KeyValuePair y) { try { // Compare on source if renaming if (!norename) { int xSourceIndex = GetSource(x.Value.SourceIndex).Value?.Index ?? 0; int ySourceIndex = GetSource(y.Value.SourceIndex).Value?.Index ?? 0; if (xSourceIndex != ySourceIndex) return xSourceIndex - ySourceIndex; } // Get the machines Machine? xMachine = GetMachine(x.Value.MachineIndex).Value; Machine? yMachine = GetMachine(y.Value.MachineIndex).Value; // If machine names don't match string? xMachineName = xMachine?.Name; string? yMachineName = yMachine?.Name; if (xMachineName != yMachineName) return nc.Compare(xMachineName, yMachineName); // If types don't match Data.Models.Metadata.ItemType xType = x.Value.ItemType; Data.Models.Metadata.ItemType yType = y.Value.ItemType; if (xType != yType) return xType - yType; // If directory names don't match string? xDirectoryName = Path.GetDirectoryName(TextHelper.RemovePathUnsafeCharacters(x.Value.GetName())); string? yDirectoryName = Path.GetDirectoryName(TextHelper.RemovePathUnsafeCharacters(y.Value.GetName())); if (xDirectoryName != yDirectoryName) return nc.Compare(xDirectoryName, yDirectoryName); // If item names don't match string? xName = Path.GetFileName(TextHelper.RemovePathUnsafeCharacters(x.Value.GetName())); string? yName = Path.GetFileName(TextHelper.RemovePathUnsafeCharacters(y.Value.GetName())); return nc.Compare(xName, yName); } catch { // Absorb the error return 0; } }); return true; } /// /// Sort the input DAT and get the key to be used by the item /// /// Item to try to match /// True if the DAT is already sorted accordingly, false otherwise (default) /// Key to try to use private string SortAndGetKey(KeyValuePair datItem, bool sorted = false) { // If we're not already sorted, take care of it if (!sorted) BucketBy(GetBestAvailable()); // Now that we have the sorted type, we get the proper key return GetBucketKey(datItem.Value, _buckets.GroupedBy, lower: true, norename: true); } #endregion #region Statistics /// /// Recalculate the statistics for the Dat /// public void RecalculateStats() { DatStatistics.ResetStatistics(); Array.ForEach(_items.Values, item => DatStatistics.AddItemStatistics(item)); } #endregion } }