using System;
#if NET40_OR_GREATER || NETCOREAPP || NETSTANDARD2_0_OR_GREATER
using System.Collections.Concurrent;
#endif
using System.Collections.Generic;
using System.IO;
using System.Linq;
#if NET40_OR_GREATER || NETCOREAPP || NETSTANDARD2_0_OR_GREATER
using System.Threading;
using System.Threading.Tasks;
#endif
using System.Xml.Serialization;
using Newtonsoft.Json;
using SabreTools.Metadata.DatItems;
using SabreTools.Metadata.DatItems.Formats;
using SabreTools.Hashing;
using SabreTools.Logging;
using SabreTools.Text.Compare;
using SabreTools.Text.Extensions;
using ItemStatus = SabreTools.Data.Models.Metadata.ItemStatus;
/*
* Planning Notes:
*
* In order for this in-memory "database" design to work, there need to be a few things:
* - Feature parity with all existing item dictionary operations
* - A way to transition between the two item dictionaries (a flag?)
* - Helper methods that target the "database" version instead of assuming the standard dictionary
*
* Notable changes include:
* - Separation of Machine from DatItem, leading to a mapping instead
* - Adding machines to the dictionary distinctly from the items
* - Having a separate "bucketing" system that only reorders indicies and not full items; quicker?
* - Non-key-based add/remove of values; use explicit methods instead of dictionary-style accessors
*/
namespace SabreTools.Metadata.DatFiles
{
///
/// Item dictionary with statistics, bucketing, and sorting
///
[JsonObject("items"), XmlRoot("items")]
public class ItemDatabase
{
#region Private Classes
///
/// Represents a table with a string key and a list of indexes as a value
///
private class GroupingTable
{
#region Private Fields
#if NET40_OR_GREATER || NETCOREAPP || NETSTANDARD2_0_OR_GREATER
private readonly ConcurrentDictionary> _groupings = [];
#else
private readonly Dictionary> _groupings = [];
#endif
#endregion
#region Properties
///
/// Indicates what the grouping keys represent
///
public ItemKey GroupedBy { get; set; } = ItemKey.NULL;
///
/// All grouping keys
///
public string[] Keys => [.. _groupings.Keys];
#endregion
#region Accessors
///
/// Add an index to a grouping
///
public void Add(string key, long index)
{
lock (key)
{
#if NET40_OR_GREATER || NETCOREAPP || NETSTANDARD2_0_OR_GREATER
// If the key is missing from the dictionary, add it
_groupings.GetOrAdd(key, []);
if (!_groupings.TryGetValue(key, out var grouping) || grouping is null)
return;
grouping.Add(index);
#else
// If the key is missing from the dictionary, add it
if (!_groupings.ContainsKey(key))
_groupings[key] = [];
_groupings[key].Add(index);
#endif
}
}
///
/// Remove all groupings
///
public void Clear() => _groupings.Clear();
///
/// Try to set a grouping by key and list of values
///
/// This overwrites anything existing
public bool TryAdd(string key, List value)
{
#if NET40_OR_GREATER || NETCOREAPP || NETSTANDARD2_0_OR_GREATER
return _groupings.TryAdd(key, value);
#else
_groupings[key] = value;
return true;
#endif
}
///
/// Try to get a value by key, returning success
///
public bool TryGet(string? key, out List? value)
{
if (key is null)
{
value = null;
return false;
}
return _groupings.TryGetValue(key, out value);
}
///
/// Try to remove a value by key, returning success
///
public bool TryRemove(string key, out List? value)
{
#if NET40_OR_GREATER || NETCOREAPP || NETSTANDARD2_0_OR_GREATER
return _groupings.TryRemove(key, out value);
#else
if (!_groupings.ContainsKey(key))
{
value = default;
return false;
}
value = _groupings[key];
return _groupings.Remove(key);
#endif
}
#endregion
}
///
/// Represents a table with an incremental index as the key
///
/// Type of the row values
private class IndexedTable
{
#region Private Fields
///
/// Internal dictionary for the table
///
[JsonIgnore, XmlIgnore]
#if NET40_OR_GREATER || NETCOREAPP || NETSTANDARD2_0_OR_GREATER
private readonly ConcurrentDictionary _table = [];
#else
private readonly Dictionary _table = [];
#endif
///
/// Current highest available index
///
[JsonIgnore, XmlIgnore]
private long _tableIndex = 0;
#endregion
#region Properties
///
/// Indicates if the table is empty
///
#if NET20 || NET35
public bool IsEmpty => _table.Count == 0;
#else
public bool IsEmpty => _table.IsEmpty;
#endif
///
/// Direct access to the internal table
///
/// TODO: Investigate ways of avoiding this being needed
public IDictionary Table => _table;
///
/// All currently used indexes
///
public long[] Indexes => [.. _table.Keys];
///
/// All values in the table
///
public T[] Values => [.. _table.Values];
#endregion
#region Accessors
///
/// Add a value to the table, returning the insert index
///
public long Add(T value)
{
#if NET40_OR_GREATER || NETCOREAPP || NETSTANDARD2_0_OR_GREATER
long index = Interlocked.Increment(ref _tableIndex) - 1;
_table.TryAdd(index, value);
return index;
#else
long index = _tableIndex++ - 1;
_table[index] = value;
return index;
#endif
}
///
/// Get a value from the table, null on error
///
public T? Get(long index)
{
if (_table.TryGetValue(index, out var value))
return value;
return default;
}
///
/// Remove a value from the table, returning success
///
public bool Remove(long index)
{
#if NET40_OR_GREATER || NETCOREAPP || NETSTANDARD2_0_OR_GREATER
return _table.TryRemove(index, out var _);
#else
return _table.Remove(index);
#endif
}
///
/// Remove all values that match a function
///
public void RemoveAll(Func func)
{
#if NET40_OR_GREATER || NETCOREAPP || NETSTANDARD2_0_OR_GREATER
Parallel.For(0, Indexes.Length, i =>
#else
for (int i = 0; i < Indexes.Length; i++)
#endif
{
if (func(_table[i]))
Remove(i);
#if NET40_OR_GREATER || NETCOREAPP || NETSTANDARD2_0_OR_GREATER
});
#else
}
#endif
}
///
/// Set an indexed value directly
///
/// This does not increment the index so values may be overwritten
public bool Set(long index, T value)
{
#if NET40_OR_GREATER || NETCOREAPP || NETSTANDARD2_0_OR_GREATER
return _table.TryAdd(index, value);
#else
_table[index] = value;
return true;
#endif
}
///
/// Try to get a value by index, returning success
///
public bool TryGet(long index, out T? value)
=> _table.TryGetValue(index, out value);
///
/// Try to remove a value by index, returning success
///
public bool TryRemove(long index, out T? value)
{
#if NET40_OR_GREATER || NETCOREAPP || NETSTANDARD2_0_OR_GREATER
return _table.TryRemove(index, out value);
#else
if (!_table.ContainsKey(index))
{
value = default;
return false;
}
value = _table[index];
return _table.Remove(index);
#endif
}
#endregion
#region Search
///
/// Indicates if an index is valid
///
public bool ContainsIndex(long index) => _table.ContainsKey(index);
///
/// Find an item based on a supplied function
///
public KeyValuePair Find(Func func)
{
foreach (long i in Indexes)
{
if (func(_table[i]))
return new KeyValuePair(i, _table[i]);
}
return new KeyValuePair(-1, default);
}
#endregion
}
#endregion
#region Private instance variables
///
/// Internal table for all items
///
[JsonIgnore, XmlIgnore]
private readonly IndexedTable _items = new();
///
/// Internal table for all machines
///
[JsonIgnore, XmlIgnore]
private readonly IndexedTable _machines = new();
///
/// Internal table for all sources
///
[JsonIgnore, XmlIgnore]
private readonly IndexedTable _sources = new();
///
/// Internal dictionary representing the current buckets
///
[JsonIgnore, XmlIgnore]
private readonly GroupingTable _buckets = new();
///
/// Logging object
///
private readonly Logger _logger;
#endregion
#region Properties
///
/// Get the keys in sorted order from the file dictionary
///
/// List of the keys in sorted order
[JsonIgnore, XmlIgnore]
public string[] SortedKeys
{
get
{
List keys = [.. _buckets.Keys];
keys.Sort(new NaturalComparer());
return [.. keys];
}
}
///
/// DAT statistics
///
[JsonIgnore, XmlIgnore]
public DatStatistics DatStatistics { get; } = new DatStatistics();
#endregion
#region Constructors
///
/// Generic constructor
///
public ItemDatabase()
{
_logger = new Logger(this);
}
#endregion
#region Accessors
///
/// Add a DatItem to the dictionary after validation
///
/// Item data to validate
/// True to only add item statistics while parsing, false otherwise
/// The index for the added item, -1 on error
public long AddItem(DatItem item, bool statsOnly)
{
// If we have a Disk, File, Media, or Rom, clean the hash data
if (item is Disk disk)
{
// If the file has aboslutely no hashes, skip and log
if (disk.Status != ItemStatus.Nodump
&& string.IsNullOrEmpty(disk.MD5)
&& string.IsNullOrEmpty(disk.SHA1))
{
_logger.Verbose($"Incomplete entry for '{disk.Name}' will be output as nodump");
disk.Status = ItemStatus.Nodump;
}
item = disk;
}
else if (item is DatItems.Formats.File file)
{
// If the file has aboslutely no hashes, skip and log
if (string.IsNullOrEmpty(file.CRC)
&& string.IsNullOrEmpty(file.MD5)
&& string.IsNullOrEmpty(file.SHA1)
&& string.IsNullOrEmpty(file.SHA256))
{
_logger.Verbose($"Incomplete entry for '{file.GetName()}' will be output as nodump");
}
item = file;
}
else if (item is Media media)
{
// If the file has aboslutely no hashes, skip and log
if (string.IsNullOrEmpty(media.MD5)
&& string.IsNullOrEmpty(media.SHA1)
&& string.IsNullOrEmpty(media.SHA256)
&& string.IsNullOrEmpty(media.SpamSum))
{
_logger.Verbose($"Incomplete entry for '{media.Name}' will be output as nodump");
}
item = media;
}
else if (item is Rom rom)
{
long? size = rom.Size;
// If we have the case where there is SHA-1 and nothing else, we don't fill in any other part of the data
if (size is null && !string.IsNullOrEmpty(rom.SHA1))
{
// No-op, just catch it so it doesn't go further
//logger.Verbose($"{Header.GetStringFieldValue(DatHeader.FileNameKey)}: Entry with only SHA-1 found - '{rom.Name}'");
}
// If we have a rom and it's missing size AND the hashes match a 0-byte file, fill in the rest of the info
else if ((size == 0 || size is null)
&& (string.IsNullOrEmpty(rom.CRC32) || rom.HasZeroHash()))
{
rom.Size = 0;
rom.CRC16 = null; // HashType.CRC16.ZeroString
rom.CRC32 = HashType.CRC32.ZeroString;
rom.CRC64 = null; // HashType.CRC64.ZeroString
rom.MD2 = null; // HashType.MD2.ZeroString
rom.MD4 = null; // HashType.MD4.ZeroString
rom.MD5 = HashType.MD5.ZeroString;
rom.RIPEMD128 = null; // HashType.RIPEMD128.ZeroString
rom.RIPEMD160 = null; // HashType.RIPEMD160.ZeroString
rom.SHA1 = HashType.SHA1.ZeroString;
rom.SHA256 = null; // HashType.SHA256.ZeroString;
rom.SHA384 = null; // HashType.SHA384.ZeroString;
rom.SHA512 = null; // HashType.SHA512.ZeroString;
rom.SpamSum = null; // HashType.SpamSum.ZeroString;
}
// If the file has no size and it's not the above case, skip and log
else if (rom.Status != ItemStatus.Nodump && (size == 0 || size is null))
{
//logger.Verbose($"{Header.GetStringFieldValue(DatHeader.FileNameKey)}: Incomplete entry for '{rom.Name}' will be output as nodump");
rom.Status = ItemStatus.Nodump;
}
// If the file has a size but aboslutely no hashes, skip and log
else if (rom.Status != ItemStatus.Nodump
&& size is not null && size > 0
&& !rom.HasHashes())
{
//logger.Verbose($"{Header.GetStringFieldValue(DatHeader.FileNameKey)}: Incomplete entry for '{rom.Name}' will be output as nodump");
rom.Status = ItemStatus.Nodump;
}
item = rom;
}
// If only adding statistics, we add just item stats
if (statsOnly)
{
DatStatistics.AddItemStatistics(item);
return -1;
}
else
{
return AddItemInternal(item);
}
}
///
/// Add a machine, returning the insert index
///
public long AddMachine(Machine machine) => _machines.Add(machine);
///
/// Add a source, returning the insert index
///
public long AddSource(Source source) => _sources.Add(source);
///
/// Remove all items marked for removal
///
public void ClearMarked() => _items.RemoveAll(d => d.RemoveFlag);
///
/// Get a item based on the index
///
public DatItem? GetItem(long index) => _items.Get(index);
///
/// Get the indices and items associated with a bucket name
///
public Dictionary GetItemsForBucket(string? bucketName, bool filter = false)
{
// Get item indexes for the bucket
if (!_buckets.TryGet(bucketName, out var itemIds) || itemIds is null)
return [];
// Get the items based on index
var datItems = new Dictionary();
foreach (long itemId in itemIds)
{
// Ignore missing IDs
if (!_items.TryGet(itemId, out var datItem) || datItem is null)
continue;
if (!filter || !datItem.RemoveFlag)
datItems[itemId] = datItem;
}
return datItems;
}
///
/// Get a machine based on the index
///
public KeyValuePair GetMachine(long index)
{
if (!_machines.TryGet(index, out var machine))
return new KeyValuePair(-1, null);
return new KeyValuePair(index, machine);
}
///
/// Get a machine based on the name
///
/// This assume that all machines have unique names
public KeyValuePair GetMachine(string? name)
{
if (string.IsNullOrEmpty(name))
return new KeyValuePair(-1, null);
var machine = _machines.Find(m => m.Name == name);
return new KeyValuePair(machine.Key, machine.Value);
}
///
/// Get all machines and their indicies
///
public Machine[] GetMachines() => _machines.Values;
///
/// Get a source based on the index
///
public KeyValuePair GetSource(long index)
{
if (!_sources.TryGet(index, out var source))
return new KeyValuePair(-1, null);
return new KeyValuePair(index, source);
}
///
/// Remove a key from the file dictionary if it exists
///
/// Key in the dictionary to remove
public bool RemoveBucket(string key)
{
bool removed = _buckets.TryRemove(key, out var list);
if (list is null)
return removed;
list.ForEach(index => RemoveItem(index));
return removed;
}
///
/// Remove an item, returning if it could be removed
///
public bool RemoveItem(long itemIndex)
{
// If the key doesn't exist, return
if (!_items.TryRemove(itemIndex, out var datItem))
return false;
// Remove statistics, if possible
if (datItem is not null)
DatStatistics.RemoveItemStatistics(datItem);
return true;
}
///
/// Remove a machine, returning if it could be removed
///
public bool RemoveMachine(long machineIndex)
{
if (!_machines.TryRemove(machineIndex, out _))
return false;
_items.RemoveAll(d => d.MachineIndex == machineIndex);
return true;
}
///
/// Remove a machine, returning if it could be removed
///
public bool RemoveMachine(string machineName)
{
if (string.IsNullOrEmpty(machineName))
return false;
var machine = _machines.Find(m => string.Equals(m.Name, machineName, StringComparison.OrdinalIgnoreCase));
return RemoveMachine(machine.Key);
}
///
/// Add an item, returning the insert index
///
internal long AddItemInternal(DatItem item)
{
// Add the item with a new index
long index = _items.Add(item);
// Add the item statistics
DatStatistics.AddItemStatistics(item);
// Add the item to the default bucket
PerformItemBucketing(new KeyValuePair(index, item), _buckets.GroupedBy, lower: true, norename: true);
// Return the used index
return index;
}
#endregion
#region Bucketing
///
/// Update the bucketing dictionary
///
/// ItemKey enum representing how to bucket the individual items
/// True if the key should be lowercased (default), false otherwise
/// True if games should only be compared on game and file name, false if system and source are counted
///
public void BucketBy(ItemKey bucketBy, bool lower = true, bool norename = true)
{
// If the sorted type isn't the same, we want to sort the dictionary accordingly
if (_buckets.GroupedBy != bucketBy && bucketBy != ItemKey.NULL)
{
_logger.User($"Organizing roms by {bucketBy}");
PerformBucketing(bucketBy, lower, norename);
}
// Sort the dictionary to be consistent
_logger.User($"Sorting roms by {bucketBy}");
PerformSorting(norename);
}
///
/// Perform deduplication on the current sorted dictionary
///
public void Deduplicate()
{
#if NET40_OR_GREATER || NETCOREAPP || NETSTANDARD2_0_OR_GREATER
Parallel.ForEach(SortedKeys, key =>
#else
foreach (var key in SortedKeys)
#endif
{
// Get the possibly unsorted list
List> sortedList = [.. GetItemsForBucket(key)];
// Sort and merge the list
Sort(ref sortedList, false);
sortedList = Merge(sortedList);
// Add the list back to the dictionary
RemoveBucket(key);
sortedList.ForEach(item => AddItemInternal(item.Value));
#if NET40_OR_GREATER || NETCOREAPP || NETSTANDARD2_0_OR_GREATER
});
#else
}
#endif
}
///
/// Return the duplicate status of two items
///
/// Current DatItem
/// Source associated with this item
/// DatItem to check against
/// Source associated with the last item
/// The DupeType corresponding to the relationship between the two
public DupeType GetDuplicateStatus(KeyValuePair? selfItem, Source? selfSource, KeyValuePair? lastItem, Source? lastSource)
{
DupeType output = 0x00;
// If either item is null
if (selfItem is null || lastItem is null)
return output;
// If we don't have a duplicate at all, return none
if (!selfItem.Value.Value.Equals(lastItem.Value.Value))
return output;
// Get the machines for comparison
var selfMachine = GetMachine(selfItem.Value.Value.MachineIndex).Value;
string? selfMachineName = selfMachine?.Name;
var lastMachine = GetMachine(lastItem.Value.Value.MachineIndex).Value;
string? lastMachineName = lastMachine?.Name;
// If the duplicate is external already
#if NET20 || NET35
if ((lastItem.Value.Value.DupeType & DupeType.External) != 0)
#else
if (lastItem.Value.Value.DupeType.HasFlag(DupeType.External))
#endif
output |= DupeType.External;
// If the duplicate should be external
else if (lastSource?.Index != selfSource?.Index)
output |= DupeType.External;
// Otherwise, it's considered an internal dupe
else
output |= DupeType.Internal;
// If the item and machine names match
if (lastMachineName == selfMachineName && lastItem.Value.Value.GetName() == selfItem.Value.Value.GetName())
output |= DupeType.All;
// Otherwise, hash match is assumed
else
output |= DupeType.Hash;
return output;
}
///
/// List all duplicates found in a DAT based on a DatItem
///
/// Item to try to match
/// True if the DAT is already sorted accordingly, false otherwise (default)
/// List of matched DatItem objects
/// This also sets the remove flag on any duplicates found
/// TODO: Figure out if removal should be a flag or just removed entirely
internal Dictionary GetDuplicates(KeyValuePair datItem, bool sorted = false)
{
// Check for an empty rom list first
if (DatStatistics.TotalCount == 0)
return [];
// We want to get the proper key for the DatItem, ignoring the index
_ = SortAndGetKey(datItem, sorted);
var machine = GetMachine(datItem.Value.MachineIndex);
var source = GetSource(datItem.Value.SourceIndex);
string key = datItem.Value.GetKey(_buckets.GroupedBy, machine.Value, source.Value);
// If the key doesn't exist, return the empty list
var items = GetItemsForBucket(key);
if (items.Count == 0)
return [];
// Try to find duplicates
Dictionary output = [];
foreach (var rom in items)
{
// Skip items marked for removal
if (rom.Value.RemoveFlag)
continue;
// Mark duplicates for future removal
if (datItem.Value.Equals(rom.Value))
{
rom.Value.RemoveFlag = true;
output[rom.Key] = rom.Value;
}
}
// Return any matching items
return output;
}
///
/// Check if a DAT contains the given DatItem
///
/// Item to try to match
/// True if the DAT is already sorted accordingly, false otherwise (default)
/// True if it contains the rom, false otherwise
internal bool HasDuplicates(KeyValuePair datItem, bool sorted = false)
{
// Check for an empty rom list first
if (DatStatistics.TotalCount == 0)
return false;
// We want to get the proper key for the DatItem, ignoring the index
_ = SortAndGetKey(datItem, sorted);
var machine = GetMachine(datItem.Value.MachineIndex);
var source = GetSource(datItem.Value.SourceIndex);
string key = datItem.Value.GetKey(_buckets.GroupedBy, machine.Value, source.Value);
// If the key doesn't exist
var roms = GetItemsForBucket(key);
if (roms is null || roms.Count == 0)
return false;
// Try to find duplicates
return roms.Values.Any(datItem.Value.Equals);
}
///
/// Merge an arbitrary set of item pairs based on the supplied information
///
/// List of pairs representing the items to be merged
private List> Merge(List> itemMappings)
{
// Check for null or blank roms first
if (itemMappings is null || itemMappings.Count == 0)
return [];
// Create output list
List> output = [];
// Then deduplicate them by checking to see if data matches previous saved roms
int nodumpCount = 0;
foreach (var kvp in itemMappings)
{
long itemIndex = kvp.Key;
DatItem datItem = kvp.Value;
// If we don't have a Disk, File, Media, or Rom, we skip checking for duplicates
if (datItem is not Disk && datItem is not DatItems.Formats.File && datItem is not Media && datItem is not Rom)
continue;
// If it's a nodump, add and skip
if (datItem is Rom rom && rom.Status == ItemStatus.Nodump)
{
output.Add(new KeyValuePair(itemIndex, datItem));
nodumpCount++;
continue;
}
else if (datItem is Disk disk && disk.Status == ItemStatus.Nodump)
{
output.Add(new KeyValuePair(itemIndex, datItem));
nodumpCount++;
continue;
}
// If it's the first non-nodump rom in the list, don't touch it
if (output.Count == nodumpCount)
{
output.Add(new KeyValuePair(itemIndex, datItem));
continue;
}
// Find the index of the first duplicate, if one exists
var datItemSource = GetSource(datItem.SourceIndex);
int pos = output.FindIndex(lastItem =>
{
var lastItemSource = GetSource(lastItem.Value.SourceIndex);
return GetDuplicateStatus(kvp, datItemSource.Value, lastItem, lastItemSource.Value) != 0x00;
});
if (pos < 0)
{
output.Add(new KeyValuePair(itemIndex, datItem));
continue;
}
// Get the duplicate item
long savedIndex = output[pos].Key;
DatItem savedItem = output[pos].Value;
var savedItemSource = GetSource(savedItem.SourceIndex);
DupeType dupetype = GetDuplicateStatus(kvp, datItemSource.Value, output[pos], savedItemSource.Value);
// Disks, Media, and Roms have more information to fill
if (datItem is Disk diskItem && savedItem is Disk savedDisk)
savedDisk.FillMissingInformation(diskItem);
else if (datItem is DatItems.Formats.File fileItem && savedItem is DatItems.Formats.File savedFile)
savedFile.FillMissingInformation(fileItem);
else if (datItem is Media mediaItem && savedItem is Media savedMedia)
savedMedia.FillMissingInformation(mediaItem);
else if (datItem is Rom romItem && savedItem is Rom savedRom)
savedRom.FillMissingInformation(romItem);
savedItem.DupeType = dupetype;
// Get the sources associated with the items
var savedSource = GetSource(savedItem.SourceIndex);
var itemSource = GetSource(datItem.SourceIndex);
// Get the machines associated with the items
var savedMachine = GetMachine(savedItem.MachineIndex);
var itemMachine = GetMachine(datItem.MachineIndex);
// If the current source has a lower ID than the saved, use the saved source
if (itemSource.Value?.Index < savedSource.Value?.Index)
{
datItem.SourceIndex = savedItem.SourceIndex;
_machines.Set(savedMachine.Key, (itemMachine.Value!.Clone() as Machine)!);
savedItem.SetName(datItem.GetName());
}
// If the saved machine is a child of the current machine, use the current machine instead
if (savedMachine.Value!.CloneOf == itemMachine.Value!.Name
|| savedMachine.Value!.RomOf == itemMachine.Value!.Name)
{
_machines.Set(savedMachine.Key, (itemMachine.Value!.Clone() as Machine)!);
savedItem.SetName(datItem.GetName());
}
// Replace the original item in the list
output.RemoveAt(pos);
output.Insert(pos, new KeyValuePair(savedIndex, savedItem));
}
return output;
}
///
/// Get the highest-order Field value that represents the statistics
///
private ItemKey GetBestAvailable()
{
// Get the required counts
long diskCount = DatStatistics.GetItemCount(Data.Models.Metadata.ItemType.Disk);
long mediaCount = DatStatistics.GetItemCount(Data.Models.Metadata.ItemType.Media);
long romCount = DatStatistics.GetItemCount(Data.Models.Metadata.ItemType.Rom);
long nodumpCount = DatStatistics.GetStatusCount(ItemStatus.Nodump);
// If all items are supposed to have a SHA-512, we bucket by that
if (diskCount + mediaCount + romCount - nodumpCount == DatStatistics.GetHashCount(HashType.SHA512))
return ItemKey.SHA512;
// If all items are supposed to have a SHA-384, we bucket by that
else if (diskCount + mediaCount + romCount - nodumpCount == DatStatistics.GetHashCount(HashType.SHA384))
return ItemKey.SHA384;
// If all items are supposed to have a SHA-256, we bucket by that
else if (diskCount + mediaCount + romCount - nodumpCount == DatStatistics.GetHashCount(HashType.SHA256))
return ItemKey.SHA256;
// If all items are supposed to have a SHA-1, we bucket by that
else if (diskCount + mediaCount + romCount - nodumpCount == DatStatistics.GetHashCount(HashType.SHA1))
return ItemKey.SHA1;
// If all items are supposed to have a RIPEMD160, we bucket by that
else if (diskCount + mediaCount + romCount - nodumpCount == DatStatistics.GetHashCount(HashType.RIPEMD160))
return ItemKey.RIPEMD160;
// If all items are supposed to have a RIPEMD128, we bucket by that
else if (diskCount + mediaCount + romCount - nodumpCount == DatStatistics.GetHashCount(HashType.RIPEMD128))
return ItemKey.RIPEMD128;
// If all items are supposed to have a MD5, we bucket by that
else if (diskCount + mediaCount + romCount - nodumpCount == DatStatistics.GetHashCount(HashType.MD5))
return ItemKey.MD5;
// If all items are supposed to have a MD4, we bucket by that
else if (diskCount + mediaCount + romCount - nodumpCount == DatStatistics.GetHashCount(HashType.MD4))
return ItemKey.MD4;
// If all items are supposed to have a MD2, we bucket by that
else if (diskCount + mediaCount + romCount - nodumpCount == DatStatistics.GetHashCount(HashType.MD2))
return ItemKey.MD2;
// If all items are supposed to have a CRC64, we bucket by that
else if (diskCount + mediaCount + romCount - nodumpCount == DatStatistics.GetHashCount(HashType.CRC64))
return ItemKey.CRC64;
// If all items are supposed to have a CRC16, we bucket by that
// TODO: This should really come after normal CRC
else if (diskCount + mediaCount + romCount - nodumpCount == DatStatistics.GetHashCount(HashType.CRC16))
return ItemKey.CRC16;
// Otherwise, we bucket by CRC32
else
return ItemKey.CRC32;
}
///
/// Get the bucketing key for a given item index
/// Current item
/// ItemKey value representing what key to get
/// True if the key should be lowercased, false otherwise
/// True if games should only be compared on game and file name, false if system and source are counted
///
private string GetBucketKey(DatItem datItem, ItemKey bucketBy, bool lower, bool norename)
{
var source = GetSource(datItem.SourceIndex);
var machine = GetMachine(datItem.MachineIndex);
// Treat NULL like machine
if (bucketBy == ItemKey.NULL)
bucketBy = ItemKey.Machine;
// Get the bucket key
return datItem.GetKey(bucketBy, machine.Value, source.Value, lower, norename);
}
///
/// Perform bucketing based on the item key provided
///
/// ItemKey enum representing how to bucket the individual items
/// True if the key should be lowercased, false otherwise
/// True if games should only be compared on game and file name, false if system and source are counted
private void PerformBucketing(ItemKey bucketBy, bool lower, bool norename)
{
// Reset the bucketing values
_buckets.GroupedBy = bucketBy;
_buckets.Clear();
// Get the current list of item indicies
long[] itemIndexes = _items.Indexes;
#if NET40_OR_GREATER || NETCOREAPP || NETSTANDARD2_0_OR_GREATER
Parallel.For(0, itemIndexes.Length, i =>
#else
for (int i = 0; i < itemIndexes.Length; i++)
#endif
{
var datItem = GetItem(i);
if (datItem is null)
#if NET40_OR_GREATER || NETCOREAPP || NETSTANDARD2_0_OR_GREATER
return;
#else
continue;
#endif
PerformItemBucketing(new KeyValuePair(i, datItem), bucketBy, lower, norename);
#if NET40_OR_GREATER || NETCOREAPP || NETSTANDARD2_0_OR_GREATER
});
#else
}
#endif
}
///
/// Bucket a single DatItem
///
/// Item to bucket
/// ItemKey enum representing how to bucket the individual items
/// True if the key should be lowercased, false otherwise
/// True if games should only be compared on game and file name, false if system and source are counted
private void PerformItemBucketing(KeyValuePair datItem, ItemKey bucketBy, bool lower, bool norename)
{
string? bucketKey = GetBucketKey(datItem.Value, bucketBy, lower, norename);
_buckets.Add(bucketKey, datItem.Key);
}
///
/// Sort existing buckets for consistency
///
private void PerformSorting(bool norename)
{
// Get the current list of bucket keys
string[] bucketKeys = _buckets.Keys;
#if NET40_OR_GREATER || NETCOREAPP || NETSTANDARD2_0_OR_GREATER
Parallel.For(0, bucketKeys.Length, i =>
#else
for (int i = 0; i < bucketKeys.Length; i++)
#endif
{
_buckets.TryGet(bucketKeys[i], out var itemIndices);
if (itemIndices is null || itemIndices.Count == 0)
{
_buckets.TryRemove(bucketKeys[i], out _);
#if NET40_OR_GREATER || NETCOREAPP || NETSTANDARD2_0_OR_GREATER
return;
#else
continue;
#endif
}
var datItems = itemIndices
.FindAll(i => _items.ContainsIndex(i))
.ConvertAll(i => new KeyValuePair(i, _items.Get(i)!));
Sort(ref datItems, norename);
_buckets.TryAdd(bucketKeys[i], datItems.ConvertAll(kvp => kvp.Key));
#if NET40_OR_GREATER || NETCOREAPP || NETSTANDARD2_0_OR_GREATER
});
#else
}
#endif
}
///
/// Sort a list of item pairs by SourceID, Game, and Name (in order)
///
/// List of pairs representing the items to be sorted
/// True if files are not renamed, false otherwise
/// True if it sorted correctly, false otherwise
private bool Sort(ref List> itemMappings, bool norename)
{
// Create the comparer extenal to the delegate
var nc = new NaturalComparer();
itemMappings.Sort(delegate (KeyValuePair x, KeyValuePair y)
{
try
{
// Compare on source if renaming
if (!norename)
{
int xSourceIndex = GetSource(x.Value.SourceIndex).Value?.Index ?? 0;
int ySourceIndex = GetSource(y.Value.SourceIndex).Value?.Index ?? 0;
if (xSourceIndex != ySourceIndex)
return xSourceIndex - ySourceIndex;
}
// Get the machines
Machine? xMachine = GetMachine(x.Value.MachineIndex).Value;
Machine? yMachine = GetMachine(y.Value.MachineIndex).Value;
// If machine names don't match
string? xMachineName = xMachine?.Name;
string? yMachineName = yMachine?.Name;
if (xMachineName != yMachineName)
return nc.Compare(xMachineName, yMachineName);
// If types don't match
Data.Models.Metadata.ItemType xType = x.Value.ItemType;
Data.Models.Metadata.ItemType yType = y.Value.ItemType;
if (xType != yType)
return xType - yType;
// If directory names don't match
string? xDirectoryName = Path.GetDirectoryName(TextHelper.RemovePathUnsafeCharacters(x.Value.GetName()));
string? yDirectoryName = Path.GetDirectoryName(TextHelper.RemovePathUnsafeCharacters(y.Value.GetName()));
if (xDirectoryName != yDirectoryName)
return nc.Compare(xDirectoryName, yDirectoryName);
// If item names don't match
string? xName = Path.GetFileName(TextHelper.RemovePathUnsafeCharacters(x.Value.GetName()));
string? yName = Path.GetFileName(TextHelper.RemovePathUnsafeCharacters(y.Value.GetName()));
return nc.Compare(xName, yName);
}
catch
{
// Absorb the error
return 0;
}
});
return true;
}
///
/// Sort the input DAT and get the key to be used by the item
///
/// Item to try to match
/// True if the DAT is already sorted accordingly, false otherwise (default)
/// Key to try to use
private string SortAndGetKey(KeyValuePair datItem, bool sorted = false)
{
// If we're not already sorted, take care of it
if (!sorted)
BucketBy(GetBestAvailable());
// Now that we have the sorted type, we get the proper key
return GetBucketKey(datItem.Value, _buckets.GroupedBy, lower: true, norename: true);
}
#endregion
#region Statistics
///
/// Recalculate the statistics for the Dat
///
public void RecalculateStats()
{
DatStatistics.ResetStatistics();
Array.ForEach(_items.Values, item => DatStatistics.AddItemStatistics(item));
}
#endregion
}
}