using System.Collections; #if NET40_OR_GREATER || NETCOREAPP using System.Collections.Concurrent; #endif using System.Collections.Generic; #if NET40_OR_GREATER || NETCOREAPP using System.Threading.Tasks; #endif using System.Xml.Serialization; using Newtonsoft.Json; using SabreTools.Core.Filter; using SabreTools.Core.Tools; using SabreTools.DatItems; using SabreTools.DatItems.Formats; using SabreTools.Hashing; using SabreTools.IO.Logging; using SabreTools.Matching.Compare; // TODO: Remove IDictionary implementation namespace SabreTools.DatFiles { /// /// Item dictionary with statistics, bucketing, and sorting /// [JsonObject("items"), XmlRoot("items")] public class ItemDictionary : IDictionary?> { #region Private instance variables /// /// Determine the bucketing key for all items /// private ItemKey bucketedBy; /// /// Determine merging type for all items /// private DedupeType mergedBy; /// /// Internal dictionary for the class /// #if NET40_OR_GREATER || NETCOREAPP private readonly ConcurrentDictionary?> _items = []; #else private readonly Dictionary?> _items = []; #endif /// /// Logging object /// private readonly Logger _logger; #endregion #region Publically available fields #region Keys /// /// Get the keys from the file dictionary /// /// List of the keys [JsonIgnore, XmlIgnore] public ICollection Keys { get { return _items.Keys; } } /// /// Get the keys in sorted order from the file dictionary /// /// List of the keys in sorted order [JsonIgnore, XmlIgnore] public List SortedKeys { get { List keys = [.. _items.Keys]; keys.Sort(new NaturalComparer()); return keys; } } #endregion #region Statistics /// /// DAT statistics /// [JsonIgnore, XmlIgnore] public DatStatistics DatStatistics { get; } = new DatStatistics(); #endregion #endregion #region Constructors /// /// Generic constructor /// public ItemDictionary() { bucketedBy = ItemKey.NULL; mergedBy = DedupeType.None; _logger = new Logger(this); } #endregion #region Accessors /// /// Passthrough to access the file dictionary /// /// Key in the dictionary to reference public List? this[string key] { get { // Explicit lock for some weird corner cases lock (key) { // Ensure the key exists EnsureKey(key); // Now return the value return _items[key]; } } set { Remove(key); if (value == null) _items[key] = null; else Add(key, value); } } /// /// Add a value to the file dictionary /// /// Key in the dictionary to add to /// Value to add to the dictionary public void Add(string key, DatItem value) { // Explicit lock for some weird corner cases lock (key) { // Ensure the key exists EnsureKey(key); // If item is null, don't add it if (value == null) return; // Now add the value _items[key]!.Add(value); // Now update the statistics DatStatistics.AddItemStatistics(value); } } /// /// Add a range of values to the file dictionary /// /// Key in the dictionary to add to /// Value to add to the dictionary public void Add(string key, List? value) { // Explicit lock for some weird corner cases lock (key) { // If the value is null or empty, just return if (value == null || value.Count == 0) return; // Ensure the key exists EnsureKey(key); // Now add the value _items[key]!.AddRange(value); // Now update the statistics foreach (DatItem item in value) { DatStatistics.AddItemStatistics(item); } } } /// /// Add a DatItem to the dictionary after checking /// /// Item data to check against /// True to only add item statistics while parsing, false otherwise /// The key for the item public string AddItem(DatItem item, bool statsOnly) { string key; // If we have a Disk, Media, or Rom, clean the hash data if (item is Disk disk) { // If the file has aboslutely no hashes, skip and log if (disk.GetStringFieldValue(Models.Metadata.Disk.StatusKey).AsEnumValue() != ItemStatus.Nodump && string.IsNullOrEmpty(disk.GetStringFieldValue(Models.Metadata.Disk.MD5Key)) && string.IsNullOrEmpty(disk.GetStringFieldValue(Models.Metadata.Disk.SHA1Key))) { _logger.Verbose($"Incomplete entry for '{disk.GetName()}' will be output as nodump"); disk.SetFieldValue(Models.Metadata.Disk.StatusKey, ItemStatus.Nodump.AsStringValue()); } item = disk; } if (item is Media media) { // If the file has aboslutely no hashes, skip and log if (string.IsNullOrEmpty(media.GetStringFieldValue(Models.Metadata.Media.MD5Key)) && string.IsNullOrEmpty(media.GetStringFieldValue(Models.Metadata.Media.SHA1Key)) && string.IsNullOrEmpty(media.GetStringFieldValue(Models.Metadata.Media.SHA256Key)) && string.IsNullOrEmpty(media.GetStringFieldValue(Models.Metadata.Media.SpamSumKey))) { _logger.Verbose($"Incomplete entry for '{media.GetName()}' will be output as nodump"); } item = media; } else if (item is Rom rom) { long? size = rom.GetInt64FieldValue(Models.Metadata.Rom.SizeKey); // If we have the case where there is SHA-1 and nothing else, we don't fill in any other part of the data if (size == null && !rom.HasHashes()) { // No-op, just catch it so it doesn't go further //logger.Verbose($"{Header.GetStringFieldValue(DatHeader.FileNameKey)}: Entry with only SHA-1 found - '{rom.GetName()}'"); } // If we have a rom and it's missing size AND the hashes match a 0-byte file, fill in the rest of the info else if ((size == 0 || size == null) && (string.IsNullOrEmpty(rom.GetStringFieldValue(Models.Metadata.Rom.CRCKey)) || rom.HasZeroHash())) { rom.SetFieldValue(Models.Metadata.Rom.SizeKey, Constants.SizeZero.ToString()); rom.SetFieldValue(Models.Metadata.Rom.CRCKey, ZeroHash.CRC32Str); rom.SetFieldValue(Models.Metadata.Rom.MD2Key, null); // ZeroHash.GetString(HashType.MD2) rom.SetFieldValue(Models.Metadata.Rom.MD4Key, null); // ZeroHash.GetString(HashType.MD4) rom.SetFieldValue(Models.Metadata.Rom.MD5Key, ZeroHash.MD5Str); rom.SetFieldValue(Models.Metadata.Rom.SHA1Key, ZeroHash.SHA1Str); rom.SetFieldValue(Models.Metadata.Rom.SHA256Key, null); // ZeroHash.SHA256Str; rom.SetFieldValue(Models.Metadata.Rom.SHA384Key, null); // ZeroHash.SHA384Str; rom.SetFieldValue(Models.Metadata.Rom.SHA512Key, null); // ZeroHash.SHA512Str; rom.SetFieldValue(Models.Metadata.Rom.SpamSumKey, null); // ZeroHash.SpamSumStr; } // If the file has no size and it's not the above case, skip and log else if (rom.GetStringFieldValue(Models.Metadata.Rom.StatusKey).AsEnumValue() != ItemStatus.Nodump && (size == 0 || size == null)) { //logger.Verbose($"{Header.GetStringFieldValue(DatHeader.FileNameKey)}: Incomplete entry for '{rom.GetName()}' will be output as nodump"); rom.SetFieldValue(Models.Metadata.Rom.StatusKey, ItemStatus.Nodump.AsStringValue()); } // If the file has a size but aboslutely no hashes, skip and log else if (rom.GetStringFieldValue(Models.Metadata.Rom.StatusKey).AsEnumValue() != ItemStatus.Nodump && size != null && size > 0 && !rom.HasHashes()) { //logger.Verbose($"{Header.GetStringFieldValue(DatHeader.FileNameKey)}: Incomplete entry for '{rom.GetName()}' will be output as nodump"); rom.SetFieldValue(Models.Metadata.Rom.StatusKey, ItemStatus.Nodump.AsStringValue()); } item = rom; } // Get the key and add the file key = item.GetKey(ItemKey.Machine); // If only adding statistics, we add an empty key for games and then just item stats if (statsOnly) { EnsureKey(key); DatStatistics.AddItemStatistics(item); } else { Add(key, item); } return key; } /// /// Remove any keys that have null or empty values /// internal void ClearEmpty() { string[] keys = [.. Keys]; foreach (string key in keys) { #if NET40_OR_GREATER || NETCOREAPP // If the key doesn't exist, skip if (!_items.TryGetValue(key, out var value)) continue; // If the value is null, remove else if (value == null) _items.TryRemove(key, out _); // If there are no non-blank items, remove else if (value!.FindIndex(i => i != null && i is not Blank) == -1) _items.TryRemove(key, out _); #else // If the key doesn't exist, skip if (!_items.ContainsKey(key)) continue; // If the value is null, remove else if (_items[key] == null) _items.Remove(key); // If there are no non-blank items, remove else if (_items[key]!.FindIndex(i => i != null && i is not Blank) == -1) _items.Remove(key); #endif } } /// /// Remove all items marked for removal /// internal void ClearMarked() { string[] keys = [.. Keys]; foreach (string key in keys) { // Skip invalid item lists List? oldItemList = this[key]; if (oldItemList == null) return; List newItemList = oldItemList.FindAll(i => i.GetBoolFieldValue(DatItem.RemoveKey) != true); Remove(key); Add(key, newItemList); } } /// /// Get if the file dictionary contains the key /// /// Key in the dictionary to check /// True if the key exists, false otherwise public bool ContainsKey(string key) { // If the key is null, we return false since keys can't be null if (key == null) return false; // Explicit lock for some weird corner cases lock (key) { return _items.ContainsKey(key); } } /// /// Get if the file dictionary contains the key and value /// /// Key in the dictionary to check /// Value in the dictionary to check /// True if the key exists, false otherwise public bool Contains(string key, DatItem value) { // If the key is null, we return false since keys can't be null if (key == null) return false; // Explicit lock for some weird corner cases lock (key) { #if NET40_OR_GREATER || NETCOREAPP if (_items.TryGetValue(key, out var list) && list != null) return list.Contains(value); #else if (_items.ContainsKey(key) && _items[key] != null) return _items[key]!.Contains(value); #endif } return false; } /// /// Ensure the key exists in the items dictionary /// /// Key to ensure public void EnsureKey(string key) { // If the key is missing from the dictionary, add it if (!_items.ContainsKey(key)) #if NET40_OR_GREATER || NETCOREAPP _items.TryAdd(key, []); #else _items[key] = []; #endif } /// /// Get the items associated with a bucket name /// public List GetItemsForBucket(string? bucketName, bool filter = false) { if (bucketName == null) return []; if (!_items.ContainsKey(bucketName)) return []; var items = _items[bucketName]; if (items == null) return []; var datItems = new List(); foreach (DatItem item in items) { if (!filter || item.GetBoolFieldValue(DatItem.RemoveKey) != true) datItems.Add(item); } return datItems; } /// /// Remove a key from the file dictionary if it exists /// /// Key in the dictionary to remove public bool Remove(string key) { // Explicit lock for some weird corner cases lock (key) { // If the key doesn't exist, return if (!ContainsKey(key) || _items[key] == null) return false; // Remove the statistics first foreach (DatItem item in _items[key]!) { DatStatistics.RemoveItemStatistics(item); } // Remove the key from the dictionary #if NET40_OR_GREATER || NETCOREAPP return _items.TryRemove(key, out _); #else return _items.Remove(key); #endif } } /// /// Remove the first instance of a value from the file dictionary if it exists /// /// Key in the dictionary to remove from /// Value to remove from the dictionary public bool Remove(string key, DatItem value) { // Explicit lock for some weird corner cases lock (key) { // If the key and value doesn't exist, return if (!Contains(key, value) || _items[key] == null) return false; // Remove the statistics first DatStatistics.RemoveItemStatistics(value); return _items[key]!.Remove(value); } } /// /// Reset a key from the file dictionary if it exists /// /// Key in the dictionary to reset public bool Reset(string key) { // If the key doesn't exist, return if (!ContainsKey(key) || _items[key] == null) return false; // Remove the statistics first foreach (DatItem item in _items[key]!) { DatStatistics.RemoveItemStatistics(item); } // Remove the key from the dictionary _items[key] = []; return true; } /// /// Override the internal ItemKey value /// /// public void SetBucketedBy(ItemKey newBucket) { bucketedBy = newBucket; } #endregion // TODO: All internal, can this be put into a better location? #region Bucketing /// /// Take the arbitrarily bucketed Files Dictionary and convert to one bucketed by a user-defined method /// /// ItemKey enum representing how to bucket the individual items /// Dedupe type that should be used /// True if the key should be lowercased (default), false otherwise /// True if games should only be compared on game and file name, false if system and source are counted internal void BucketBy(ItemKey bucketBy, DedupeType dedupeType, bool lower = true, bool norename = true) { // If we have a situation where there's no dictionary or no keys at all, we skip if (_items == null || _items.Count == 0) return; // If the sorted type isn't the same, we want to sort the dictionary accordingly if (bucketedBy != bucketBy && bucketBy != ItemKey.NULL) { _logger.User($"Organizing roms by {bucketBy}"); PerformBucketing(bucketBy, lower, norename); } // If the merge type isn't the same, we want to merge the dictionary accordingly if (mergedBy != dedupeType) { _logger.User($"Deduping roms by {dedupeType}"); PerformDeduplication(bucketBy, dedupeType); } // If the merge type is the same, we want to sort the dictionary to be consistent else { _logger.User($"Sorting roms by {bucketBy}"); PerformSorting(); } } /// /// List all duplicates found in a DAT based on a DatItem /// /// Item to try to match /// True if the DAT is already sorted accordingly, false otherwise (default) /// List of matched DatItem objects /// This also sets the remove flag on any duplicates found internal List GetDuplicates(DatItem datItem, bool sorted = false) { List output = []; // Check for an empty rom list first if (DatStatistics.TotalCount == 0) return output; // We want to get the proper key for the DatItem string key = SortAndGetKey(datItem, sorted); // If the key doesn't exist, return the empty list if (!ContainsKey(key)) return output; // Try to find duplicates List? roms = this[key]; if (roms == null) return output; List left = []; for (int i = 0; i < roms.Count; i++) { DatItem other = roms[i]; if (other.GetBoolFieldValue(DatItem.RemoveKey) == true) { left.Add(other); continue; } if (datItem.Equals(other)) { other.SetFieldValue(DatItem.RemoveKey, true); output.Add(other); } else { left.Add(other); } } // Add back all roms with the proper flags Remove(key); Add(key, output); Add(key, left); return output; } /// /// Check if a DAT contains the given DatItem /// /// Item to try to match /// True if the DAT is already sorted accordingly, false otherwise (default) /// True if it contains the rom, false otherwise internal bool HasDuplicates(DatItem datItem, bool sorted = false) { // Check for an empty rom list first if (DatStatistics.TotalCount == 0) return false; // We want to get the proper key for the DatItem string key = SortAndGetKey(datItem, sorted); // If the key doesn't exist, return the empty list if (!ContainsKey(key)) return false; // Try to find duplicates List? roms = this[key]; if (roms == null) return false; return roms.FindIndex(r => datItem.Equals(r)) > -1; } /// /// Get the highest-order Field value that represents the statistics /// private ItemKey GetBestAvailable() { // Get the required counts long diskCount = DatStatistics.GetItemCount(ItemType.Disk); long mediaCount = DatStatistics.GetItemCount(ItemType.Media); long romCount = DatStatistics.GetItemCount(ItemType.Rom); long nodumpCount = DatStatistics.GetStatusCount(ItemStatus.Nodump); // If all items are supposed to have a SHA-512, we bucket by that if (diskCount + mediaCount + romCount - nodumpCount == DatStatistics.GetHashCount(HashType.SHA512)) return ItemKey.SHA512; // If all items are supposed to have a SHA-384, we bucket by that else if (diskCount + mediaCount + romCount - nodumpCount == DatStatistics.GetHashCount(HashType.SHA384)) return ItemKey.SHA384; // If all items are supposed to have a SHA-256, we bucket by that else if (diskCount + mediaCount + romCount - nodumpCount == DatStatistics.GetHashCount(HashType.SHA256)) return ItemKey.SHA256; // If all items are supposed to have a SHA-1, we bucket by that else if (diskCount + mediaCount + romCount - nodumpCount == DatStatistics.GetHashCount(HashType.SHA1)) return ItemKey.SHA1; // If all items are supposed to have a MD5, we bucket by that else if (diskCount + mediaCount + romCount - nodumpCount == DatStatistics.GetHashCount(HashType.MD5)) return ItemKey.MD5; // If all items are supposed to have a MD4, we bucket by that else if (diskCount + mediaCount + romCount - nodumpCount == DatStatistics.GetHashCount(HashType.MD4)) return ItemKey.MD4; // If all items are supposed to have a MD2, we bucket by that else if (diskCount + mediaCount + romCount - nodumpCount == DatStatistics.GetHashCount(HashType.MD2)) return ItemKey.MD2; // Otherwise, we bucket by CRC else return ItemKey.CRC; } /// /// Perform bucketing based on the item key provided /// /// ItemKey enum representing how to bucket the individual items /// True if the key should be lowercased, false otherwise /// True if games should only be compared on game and file name, false if system and source are counted private void PerformBucketing(ItemKey bucketBy, bool lower, bool norename) { // Set the sorted type bucketedBy = bucketBy; // Reset the merged type since this might change the merge mergedBy = DedupeType.None; // First do the initial sort of all of the roms inplace List oldkeys = [.. Keys]; #if NET452_OR_GREATER || NETCOREAPP Parallel.For(0, oldkeys.Count, Core.Globals.ParallelOptions, k => #elif NET40_OR_GREATER Parallel.For(0, oldkeys.Count, k => #else for (int k = 0; k < oldkeys.Count; k++) #endif { string key = oldkeys[k]; if (this[key] == null) Remove(key); // Now add each of the roms to their respective keys for (int i = 0; i < this[key]!.Count; i++) { DatItem item = this[key]![i]; if (item == null) continue; // We want to get the key most appropriate for the given sorting type string newkey = item.GetKey(bucketBy, lower, norename); // If the key is different, move the item to the new key if (newkey != key) { Add(newkey, item); Remove(key, item); i--; // This make sure that the pointer stays on the correct since one was removed } } // If the key is now empty, remove it if (this[key]!.Count == 0) Remove(key); #if NET40_OR_GREATER || NETCOREAPP }); #else } #endif } /// /// Perform deduplication based on the deduplication type provided /// /// ItemKey enum representing how to bucket the individual items /// Dedupe type that should be used private void PerformDeduplication(ItemKey bucketBy, DedupeType dedupeType) { // Set the sorted type mergedBy = dedupeType; List keys = [.. Keys]; #if NET452_OR_GREATER || NETCOREAPP Parallel.ForEach(keys, Core.Globals.ParallelOptions, key => #elif NET40_OR_GREATER Parallel.ForEach(keys, key => #else foreach (var key in keys) #endif { // Get the possibly unsorted list List? sortedlist = this[key]; if (sortedlist == null) #if NET40_OR_GREATER || NETCOREAPP return; #else continue; #endif // Sort the list of items to be consistent DatFileTool.Sort(ref sortedlist, false); // If we're merging the roms, do so if (dedupeType == DedupeType.Full || (dedupeType == DedupeType.Game && bucketBy == ItemKey.Machine)) sortedlist = DatFileTool.Merge(sortedlist); // Add the list back to the dictionary Reset(key); Add(key, sortedlist); #if NET40_OR_GREATER || NETCOREAPP }); #else } #endif } /// /// Perform inplace sorting of the dictionary /// private void PerformSorting() { List keys = [.. Keys]; #if NET452_OR_GREATER || NETCOREAPP Parallel.ForEach(keys, Core.Globals.ParallelOptions, key => #elif NET40_OR_GREATER Parallel.ForEach(keys, key => #else foreach (var key in keys) #endif { // Get the possibly unsorted list List? sortedlist = this[key]; // Sort the list of items to be consistent if (sortedlist != null) DatFileTool.Sort(ref sortedlist, false); #if NET40_OR_GREATER || NETCOREAPP }); #else } #endif } /// /// Sort the input DAT and get the key to be used by the item /// /// Item to try to match /// True if the DAT is already sorted accordingly, false otherwise (default) /// Key to try to use private string SortAndGetKey(DatItem datItem, bool sorted = false) { // If we're not already sorted, take care of it if (!sorted) BucketBy(GetBestAvailable(), DedupeType.None); // Now that we have the sorted type, we get the proper key return datItem.GetKey(bucketedBy); } #endregion // TODO: All internal, can this be put into a better location? #region Filtering /// /// Execute all filters in a filter runner on the items in the dictionary /// /// Preconfigured filter runner to use internal void ExecuteFilters(FilterRunner filterRunner) { List keys = [.. Keys]; #if NET452_OR_GREATER || NETCOREAPP Parallel.ForEach(keys, Core.Globals.ParallelOptions, key => #elif NET40_OR_GREATER Parallel.ForEach(keys, key => #else foreach (var key in keys) #endif { List? items = this[key]; if (items == null) #if NET40_OR_GREATER || NETCOREAPP return; #else continue; #endif // Filter all items in the current key List newItems = []; foreach (var item in items) { if (item.PassesFilter(filterRunner)) newItems.Add(item); } // Set the value in the key to the new set this[key] = newItems; #if NET40_OR_GREATER || NETCOREAPP }); #else } #endif } #endregion #region Statistics /// /// Recalculate the statistics for the Dat /// public void RecalculateStats() { // Wipe out any stats already there DatStatistics.ResetStatistics(); // If we have a blank Dat in any way, return if (_items == null) return; // Loop through and add foreach (string key in _items.Keys) { List? datItems = _items[key]; if (datItems == null) continue; foreach (DatItem item in datItems) { DatStatistics.AddItemStatistics(item); } } } #endregion #region IDictionary Implementations public ICollection?> Values => ((IDictionary?>)_items).Values; public int Count => ((ICollection?>>)_items).Count; public bool IsReadOnly => ((ICollection?>>)_items).IsReadOnly; public bool TryGetValue(string key, out List? value) { return ((IDictionary?>)_items).TryGetValue(key, out value); } public void Add(KeyValuePair?> item) { ((ICollection?>>)_items).Add(item); } public void Clear() { ((ICollection?>>)_items).Clear(); } public bool Contains(KeyValuePair?> item) { return ((ICollection?>>)_items).Contains(item); } public void CopyTo(KeyValuePair?>[] array, int arrayIndex) { ((ICollection?>>)_items).CopyTo(array, arrayIndex); } public bool Remove(KeyValuePair?> item) { return ((ICollection?>>)_items).Remove(item); } public IEnumerator?>> GetEnumerator() { return ((IEnumerable?>>)_items).GetEnumerator(); } IEnumerator IEnumerable.GetEnumerator() { return ((IEnumerable)_items).GetEnumerator(); } #endregion } }