diff --git a/SabreTools.DatFiles/ItemDictionaryDB.cs b/SabreTools.DatFiles/ItemDictionaryDB.cs
index f6a165b4..3c97083e 100644
--- a/SabreTools.DatFiles/ItemDictionaryDB.cs
+++ b/SabreTools.DatFiles/ItemDictionaryDB.cs
@@ -500,6 +500,76 @@ namespace SabreTools.DatFiles
}
}
+ ///
+ /// List all duplicates found in a DAT based on a DatItem
+ ///
+ /// Item to try to match
+ /// True if the DAT is already sorted accordingly, false otherwise (default)
+ /// List of matched DatItem objects
+ public ConcurrentList<(long, DatItem)> GetDuplicates(DatItem datItem, bool sorted = false)
+ {
+ ConcurrentList<(long, DatItem)> output = [];
+
+ // Check for an empty rom list first
+ if (DatStatistics.TotalCount == 0)
+ return output;
+
+ // We want to get the proper key for the DatItem
+ string key = SortAndGetKey(datItem, sorted);
+
+ // If the key doesn't exist, return the empty list
+ var roms = GetDatItemsForBucket(key);
+ if (roms == null || roms.Length == 0)
+ return output;
+
+ // Try to find duplicates
+ ConcurrentList<(long, DatItem)> left = [];
+ for (int i = 0; i < roms.Length; i++)
+ {
+ DatItem other = roms[i].Item2;
+ if (other.GetBoolFieldValue(DatItem.RemoveKey) == true)
+ continue;
+
+ if (datItem.Equals(other))
+ {
+ other.SetFieldValue(DatItem.RemoveKey, true);
+ output.Add(other);
+ }
+ else
+ {
+ left.Add(other);
+ }
+ }
+
+ // Add back all roms with the proper flags
+ _buckets[key] = output.Concat(left).Select(i => i.Item1).ToConcurrentList();
+ return output;
+ }
+
+ ///
+ /// Check if a DAT contains the given DatItem
+ ///
+ /// Item to try to match
+ /// True if the DAT is already sorted accordingly, false otherwise (default)
+ /// True if it contains the rom, false otherwise
+ public bool HasDuplicates(DatItem datItem, bool sorted = false)
+ {
+ // Check for an empty rom list first
+ if (DatStatistics.TotalCount == 0)
+ return false;
+
+ // We want to get the proper key for the DatItem
+ string key = SortAndGetKey(datItem, sorted);
+
+ // If the key doesn't exist
+ var roms = GetDatItemsForBucket(key);
+ if (roms == null || roms.Length == 0)
+ return false;
+
+ // Try to find duplicates
+ return roms.Any(r => datItem.Equals(r.Item2)) == true;
+ }
+
///
/// Merge an arbitrary set of item pairs based on the supplied information
///
@@ -620,6 +690,42 @@ namespace SabreTools.DatFiles
return output;
}
+ ///
+ /// Get the highest-order Field value that represents the statistics
+ ///
+ private ItemKey GetBestAvailable()
+ {
+ // Get the required counts
+ long diskCount = DatStatistics.GetItemCount(ItemType.Disk);
+ long mediaCount = DatStatistics.GetItemCount(ItemType.Media);
+ long romCount = DatStatistics.GetItemCount(ItemType.Rom);
+ long nodumpCount = DatStatistics.GetStatusCount(ItemStatus.Nodump);
+
+ // If all items are supposed to have a SHA-512, we bucket by that
+ if (diskCount + mediaCount + romCount - nodumpCount == DatStatistics.GetHashCount(HashType.SHA512))
+ return ItemKey.SHA512;
+
+ // If all items are supposed to have a SHA-384, we bucket by that
+ else if (diskCount + mediaCount + romCount - nodumpCount == DatStatistics.GetHashCount(HashType.SHA384))
+ return ItemKey.SHA384;
+
+ // If all items are supposed to have a SHA-256, we bucket by that
+ else if (diskCount + mediaCount + romCount - nodumpCount == DatStatistics.GetHashCount(HashType.SHA256))
+ return ItemKey.SHA256;
+
+ // If all items are supposed to have a SHA-1, we bucket by that
+ else if (diskCount + mediaCount + romCount - nodumpCount == DatStatistics.GetHashCount(HashType.SHA1))
+ return ItemKey.SHA1;
+
+ // If all items are supposed to have a MD5, we bucket by that
+ else if (diskCount + mediaCount + romCount - nodumpCount == DatStatistics.GetHashCount(HashType.MD5))
+ return ItemKey.MD5;
+
+ // Otherwise, we bucket by CRC
+ else
+ return ItemKey.CRC;
+ }
+
///
/// Get the bucketing key for a given item index
/// Index of the current item
@@ -916,6 +1022,22 @@ namespace SabreTools.DatFiles
return true;
}
+ ///
+ /// Sort the input DAT and get the key to be used by the item
+ ///
+ /// Item to try to match
+ /// True if the DAT is already sorted accordingly, false otherwise (default)
+ /// Key to try to use
+ private string SortAndGetKey(DatItem datItem, bool sorted = false)
+ {
+ // If we're not already sorted, take care of it
+ if (!sorted)
+ BucketBy(GetBestAvailable(), DedupeType.None);
+
+ // Now that we have the sorted type, we get the proper key
+ return datItem.GetKey(_bucketedBy);
+ }
+
#endregion
#region Filtering
diff --git a/SabreTools.DatTools/Verification.cs b/SabreTools.DatTools/Verification.cs
index 18124561..6a84a9b4 100644
--- a/SabreTools.DatTools/Verification.cs
+++ b/SabreTools.DatTools/Verification.cs
@@ -108,6 +108,90 @@ namespace SabreTools.DatTools
datFile.Header.SetFieldValue(Models.Metadata.Header.NameKey, $"fixDAT_{datFile.Header.GetStringFieldValue(Models.Metadata.Header.NameKey)}");
datFile.Header.SetFieldValue(Models.Metadata.Header.DescriptionKey, $"fixDAT_{datFile.Header.GetStringFieldValue(Models.Metadata.Header.DescriptionKey)}");
datFile.Items.ClearMarked();
+
+ return success;
+ }
+
+ ///
+ /// Verify a DatFile against a set of depots, leaving only missing files
+ ///
+ /// Current DatFile object to verify against
+ /// List of input directories to compare against
+ /// True if verification was a success, false otherwise
+ public static bool VerifyDepotDB(DatFile datFile, List inputs)
+ {
+ bool success = true;
+
+ var watch = new InternalStopwatch("Verifying all from supplied depots");
+
+ // Now loop through and get only directories from the input paths
+ List directories = [];
+ foreach (string input in inputs)
+ {
+ // Add to the list if the input is a directory
+ if (Directory.Exists(input))
+ {
+ logger.Verbose($"Adding depot: {input}");
+ directories.Add(input);
+ }
+ }
+
+ // If we don't have any directories, we want to exit
+ if (directories.Count == 0)
+ return success;
+
+ // Now that we have a list of depots, we want to bucket the input DAT by SHA-1
+ datFile.ItemsDB.BucketBy(ItemKey.SHA1, DedupeType.None);
+
+ // Then we want to loop through each of the hashes and see if we can rebuild
+ var keys = datFile.ItemsDB.SortedKeys.ToList();
+ foreach (string hash in keys)
+ {
+ // Pre-empt any issues that could arise from string length
+ if (hash.Length != Constants.SHA1Length)
+ continue;
+
+ logger.User($"Checking hash '{hash}'");
+
+ // Get the extension path for the hash
+ string? subpath = Utilities.GetDepotPath(hash, datFile.Header.GetFieldValue(DatHeader.InputDepotKey)?.Depth ?? 0);
+ if (subpath == null)
+ continue;
+
+ // Find the first depot that includes the hash
+ string? foundpath = null;
+ foreach (string directory in directories)
+ {
+ if (System.IO.File.Exists(Path.Combine(directory, subpath)))
+ {
+ foundpath = Path.Combine(directory, subpath);
+ break;
+ }
+ }
+
+ // If we didn't find a path, then we continue
+ if (foundpath == null)
+ continue;
+
+ // If we have a path, we want to try to get the rom information
+ GZipArchive tgz = new(foundpath);
+ BaseFile? fileinfo = tgz.GetTorrentGZFileInfo();
+
+ // If the file information is null, then we continue
+ if (fileinfo == null)
+ continue;
+
+ // Now we want to remove all duplicates from the DAT
+ datFile.ItemsDB.GetDuplicates(new Rom(fileinfo))
+ .AddRange(datFile.ItemsDB.GetDuplicates(new Disk(fileinfo)));
+ }
+
+ watch.Stop();
+
+ // Set fixdat headers in case of writing out
+ datFile.Header.SetFieldValue(DatHeader.FileNameKey, $"fixDAT_{datFile.Header.GetStringFieldValue(DatHeader.FileNameKey)}");
+ datFile.Header.SetFieldValue(Models.Metadata.Header.NameKey, $"fixDAT_{datFile.Header.GetStringFieldValue(Models.Metadata.Header.NameKey)}");
+ datFile.Header.SetFieldValue(Models.Metadata.Header.DescriptionKey, $"fixDAT_{datFile.Header.GetStringFieldValue(Models.Metadata.Header.DescriptionKey)}");
datFile.ItemsDB.ClearMarked();
return success;
@@ -158,6 +242,50 @@ namespace SabreTools.DatTools
datFile.Header.SetFieldValue(Models.Metadata.Header.NameKey, $"fixDAT_{datFile.Header.GetStringFieldValue(Models.Metadata.Header.NameKey)}");
datFile.Header.SetFieldValue(Models.Metadata.Header.DescriptionKey, $"fixDAT_{datFile.Header.GetStringFieldValue(Models.Metadata.Header.DescriptionKey)}");
datFile.Items.ClearMarked();
+
+ return success;
+ }
+
+ ///
+ /// Verify a DatFile against a set of inputs, leaving only missing files
+ ///
+ /// Current DatFile object to verify against
+ /// True if only hashes should be checked, false for full file information
+ /// True if verification was a success, false otherwise
+ public static bool VerifyGenericDB(DatFile datFile, bool hashOnly)
+ {
+ bool success = true;
+
+ var watch = new InternalStopwatch("Verifying all from supplied paths");
+
+ // Force bucketing according to the flags
+ if (hashOnly)
+ datFile.ItemsDB.BucketBy(ItemKey.CRC, DedupeType.Full);
+ else
+ datFile.ItemsDB.BucketBy(ItemKey.Machine, DedupeType.Full);
+
+ // Then mark items for removal
+ var keys = datFile.ItemsDB.SortedKeys.ToList();
+ foreach (string key in keys)
+ {
+ var items = datFile.ItemsDB.GetDatItemsForBucket(key);
+ if (items == null)
+ continue;
+
+ for (int i = 0; i < items.Length; i++)
+ {
+ // Unmatched items will have a source ID of int.MaxValue, remove all others
+ if (items[i].Item2.GetFieldValue(DatItem.SourceKey)?.Index != int.MaxValue)
+ items[i].Item2.SetFieldValue(DatItem.RemoveKey, true);
+ }
+ }
+
+ watch.Stop();
+
+ // Set fixdat headers in case of writing out
+ datFile.Header.SetFieldValue(DatHeader.FileNameKey, $"fixDAT_{datFile.Header.GetStringFieldValue(DatHeader.FileNameKey)}");
+ datFile.Header.SetFieldValue(Models.Metadata.Header.NameKey, $"fixDAT_{datFile.Header.GetStringFieldValue(Models.Metadata.Header.NameKey)}");
+ datFile.Header.SetFieldValue(Models.Metadata.Header.DescriptionKey, $"fixDAT_{datFile.Header.GetStringFieldValue(Models.Metadata.Header.DescriptionKey)}");
datFile.ItemsDB.ClearMarked();
return success;
diff --git a/SabreTools.DatTools/Writer.cs b/SabreTools.DatTools/Writer.cs
index eefc96f5..586e5f99 100644
--- a/SabreTools.DatTools/Writer.cs
+++ b/SabreTools.DatTools/Writer.cs
@@ -69,9 +69,11 @@ namespace SabreTools.DatTools
// Bucket roms by game name, if not already
datFile.Items.BucketBy(ItemKey.Machine, DedupeType.None);
+ datFile.ItemsDB.BucketBy(ItemKey.Machine, DedupeType.None);
// Output the number of items we're going to be writing
logger.User($"A total of {datFile.Items.DatStatistics.TotalCount - datFile.Items.DatStatistics.RemovedCount} items will be written out to '{datFile.Header.GetStringFieldValue(DatHeader.FileNameKey)}'");
+ //logger.User($"A total of {datFile.ItemsDB.DatStatistics.TotalCount - datFile.ItemsDB.DatStatistics.RemovedCount} items will be written out to '{datFile.Header.GetStringFieldValue(DatHeader.FileNameKey)}'");
// Get the outfile names
Dictionary outfiles = datFile.Header.CreateOutFileNames(outDir!, overwrite);
@@ -128,15 +130,28 @@ namespace SabreTools.DatTools
if (diskCount + mediaCount + romCount == 0)
datFile.Items.RecalculateStats();
+ diskCount = datFile.ItemsDB.DatStatistics.GetItemCount(ItemType.Disk);
+ mediaCount = datFile.ItemsDB.DatStatistics.GetItemCount(ItemType.Media);
+ romCount = datFile.ItemsDB.DatStatistics.GetItemCount(ItemType.Rom);
+
+ if (diskCount + mediaCount + romCount == 0)
+ datFile.ItemsDB.RecalculateStats();
+
datFile.Items.BucketBy(ItemKey.Machine, DedupeType.None, norename: true);
+ datFile.ItemsDB.BucketBy(ItemKey.Machine, DedupeType.None, norename: true);
datFile.Items.DatStatistics.DisplayName = datFile.Header.GetStringFieldValue(DatHeader.FileNameKey);
datFile.Items.DatStatistics.MachineCount = datFile.Items.Keys.Count;
datFile.Items.DatStatistics.IsDirectory = false;
+ datFile.ItemsDB.DatStatistics.DisplayName = datFile.Header.GetStringFieldValue(DatHeader.FileNameKey);
+ datFile.ItemsDB.DatStatistics.MachineCount = datFile.Items.Keys.Count;
+ datFile.ItemsDB.DatStatistics.IsDirectory = false;
+
var statsList = new List
{
datFile.Items.DatStatistics,
+ //datFile.ItemsDB.DatStatistics,
};
var consoleOutput = BaseReport.Create(StatReportFormat.None, statsList);
consoleOutput!.WriteToFile(null, true, true);
@@ -206,14 +221,19 @@ namespace SabreTools.DatTools
{
// Force a statistics recheck, just in case
datFile.Items.RecalculateStats();
+ datFile.ItemsDB.RecalculateStats();
// If there's nothing there, abort
if (datFile.Items.DatStatistics.TotalCount == 0)
return false;
+ if (datFile.ItemsDB.DatStatistics.TotalCount == 0)
+ return false;
// If every item is removed, abort
if (datFile.Items.DatStatistics.TotalCount == datFile.Items.DatStatistics.RemovedCount)
return false;
+ if (datFile.ItemsDB.DatStatistics.TotalCount == datFile.ItemsDB.DatStatistics.RemovedCount)
+ return false;
return true;
}
diff --git a/SabreTools/Features/Verify.cs b/SabreTools/Features/Verify.cs
index 4b2c3694..9f1cc4bf 100644
--- a/SabreTools/Features/Verify.cs
+++ b/SabreTools/Features/Verify.cs
@@ -94,6 +94,7 @@ namespace SabreTools.Features
}
Verification.VerifyGeneric(datdata, hashOnly);
+ //Verification.VerifyGenericDB(datdata, hashOnly);
}
// Now write out if there are any items left
@@ -145,6 +146,7 @@ namespace SabreTools.Features
}
Verification.VerifyGeneric(datdata, hashOnly);
+ //Verification.VerifyGenericDB(datdata, hashOnly);
}
// Now write out if there are any items left