diff --git a/SabreTools.DatFiles/ItemDictionaryDB.cs b/SabreTools.DatFiles/ItemDictionaryDB.cs index f6a165b4..3c97083e 100644 --- a/SabreTools.DatFiles/ItemDictionaryDB.cs +++ b/SabreTools.DatFiles/ItemDictionaryDB.cs @@ -500,6 +500,76 @@ namespace SabreTools.DatFiles } } + /// + /// List all duplicates found in a DAT based on a DatItem + /// + /// Item to try to match + /// True if the DAT is already sorted accordingly, false otherwise (default) + /// List of matched DatItem objects + public ConcurrentList<(long, DatItem)> GetDuplicates(DatItem datItem, bool sorted = false) + { + ConcurrentList<(long, DatItem)> output = []; + + // Check for an empty rom list first + if (DatStatistics.TotalCount == 0) + return output; + + // We want to get the proper key for the DatItem + string key = SortAndGetKey(datItem, sorted); + + // If the key doesn't exist, return the empty list + var roms = GetDatItemsForBucket(key); + if (roms == null || roms.Length == 0) + return output; + + // Try to find duplicates + ConcurrentList<(long, DatItem)> left = []; + for (int i = 0; i < roms.Length; i++) + { + DatItem other = roms[i].Item2; + if (other.GetBoolFieldValue(DatItem.RemoveKey) == true) + continue; + + if (datItem.Equals(other)) + { + other.SetFieldValue(DatItem.RemoveKey, true); + output.Add(other); + } + else + { + left.Add(other); + } + } + + // Add back all roms with the proper flags + _buckets[key] = output.Concat(left).Select(i => i.Item1).ToConcurrentList(); + return output; + } + + /// + /// Check if a DAT contains the given DatItem + /// + /// Item to try to match + /// True if the DAT is already sorted accordingly, false otherwise (default) + /// True if it contains the rom, false otherwise + public bool HasDuplicates(DatItem datItem, bool sorted = false) + { + // Check for an empty rom list first + if (DatStatistics.TotalCount == 0) + return false; + + // We want to get the proper key for the DatItem + string key = SortAndGetKey(datItem, sorted); + + // If the key doesn't exist + var roms = GetDatItemsForBucket(key); + if (roms == null || roms.Length == 0) + return false; + + // Try to find duplicates + return roms.Any(r => datItem.Equals(r.Item2)) == true; + } + /// /// Merge an arbitrary set of item pairs based on the supplied information /// @@ -620,6 +690,42 @@ namespace SabreTools.DatFiles return output; } + /// + /// Get the highest-order Field value that represents the statistics + /// + private ItemKey GetBestAvailable() + { + // Get the required counts + long diskCount = DatStatistics.GetItemCount(ItemType.Disk); + long mediaCount = DatStatistics.GetItemCount(ItemType.Media); + long romCount = DatStatistics.GetItemCount(ItemType.Rom); + long nodumpCount = DatStatistics.GetStatusCount(ItemStatus.Nodump); + + // If all items are supposed to have a SHA-512, we bucket by that + if (diskCount + mediaCount + romCount - nodumpCount == DatStatistics.GetHashCount(HashType.SHA512)) + return ItemKey.SHA512; + + // If all items are supposed to have a SHA-384, we bucket by that + else if (diskCount + mediaCount + romCount - nodumpCount == DatStatistics.GetHashCount(HashType.SHA384)) + return ItemKey.SHA384; + + // If all items are supposed to have a SHA-256, we bucket by that + else if (diskCount + mediaCount + romCount - nodumpCount == DatStatistics.GetHashCount(HashType.SHA256)) + return ItemKey.SHA256; + + // If all items are supposed to have a SHA-1, we bucket by that + else if (diskCount + mediaCount + romCount - nodumpCount == DatStatistics.GetHashCount(HashType.SHA1)) + return ItemKey.SHA1; + + // If all items are supposed to have a MD5, we bucket by that + else if (diskCount + mediaCount + romCount - nodumpCount == DatStatistics.GetHashCount(HashType.MD5)) + return ItemKey.MD5; + + // Otherwise, we bucket by CRC + else + return ItemKey.CRC; + } + /// /// Get the bucketing key for a given item index /// Index of the current item @@ -916,6 +1022,22 @@ namespace SabreTools.DatFiles return true; } + /// + /// Sort the input DAT and get the key to be used by the item + /// + /// Item to try to match + /// True if the DAT is already sorted accordingly, false otherwise (default) + /// Key to try to use + private string SortAndGetKey(DatItem datItem, bool sorted = false) + { + // If we're not already sorted, take care of it + if (!sorted) + BucketBy(GetBestAvailable(), DedupeType.None); + + // Now that we have the sorted type, we get the proper key + return datItem.GetKey(_bucketedBy); + } + #endregion #region Filtering diff --git a/SabreTools.DatTools/Verification.cs b/SabreTools.DatTools/Verification.cs index 18124561..6a84a9b4 100644 --- a/SabreTools.DatTools/Verification.cs +++ b/SabreTools.DatTools/Verification.cs @@ -108,6 +108,90 @@ namespace SabreTools.DatTools datFile.Header.SetFieldValue(Models.Metadata.Header.NameKey, $"fixDAT_{datFile.Header.GetStringFieldValue(Models.Metadata.Header.NameKey)}"); datFile.Header.SetFieldValue(Models.Metadata.Header.DescriptionKey, $"fixDAT_{datFile.Header.GetStringFieldValue(Models.Metadata.Header.DescriptionKey)}"); datFile.Items.ClearMarked(); + + return success; + } + + /// + /// Verify a DatFile against a set of depots, leaving only missing files + /// + /// Current DatFile object to verify against + /// List of input directories to compare against + /// True if verification was a success, false otherwise + public static bool VerifyDepotDB(DatFile datFile, List inputs) + { + bool success = true; + + var watch = new InternalStopwatch("Verifying all from supplied depots"); + + // Now loop through and get only directories from the input paths + List directories = []; + foreach (string input in inputs) + { + // Add to the list if the input is a directory + if (Directory.Exists(input)) + { + logger.Verbose($"Adding depot: {input}"); + directories.Add(input); + } + } + + // If we don't have any directories, we want to exit + if (directories.Count == 0) + return success; + + // Now that we have a list of depots, we want to bucket the input DAT by SHA-1 + datFile.ItemsDB.BucketBy(ItemKey.SHA1, DedupeType.None); + + // Then we want to loop through each of the hashes and see if we can rebuild + var keys = datFile.ItemsDB.SortedKeys.ToList(); + foreach (string hash in keys) + { + // Pre-empt any issues that could arise from string length + if (hash.Length != Constants.SHA1Length) + continue; + + logger.User($"Checking hash '{hash}'"); + + // Get the extension path for the hash + string? subpath = Utilities.GetDepotPath(hash, datFile.Header.GetFieldValue(DatHeader.InputDepotKey)?.Depth ?? 0); + if (subpath == null) + continue; + + // Find the first depot that includes the hash + string? foundpath = null; + foreach (string directory in directories) + { + if (System.IO.File.Exists(Path.Combine(directory, subpath))) + { + foundpath = Path.Combine(directory, subpath); + break; + } + } + + // If we didn't find a path, then we continue + if (foundpath == null) + continue; + + // If we have a path, we want to try to get the rom information + GZipArchive tgz = new(foundpath); + BaseFile? fileinfo = tgz.GetTorrentGZFileInfo(); + + // If the file information is null, then we continue + if (fileinfo == null) + continue; + + // Now we want to remove all duplicates from the DAT + datFile.ItemsDB.GetDuplicates(new Rom(fileinfo)) + .AddRange(datFile.ItemsDB.GetDuplicates(new Disk(fileinfo))); + } + + watch.Stop(); + + // Set fixdat headers in case of writing out + datFile.Header.SetFieldValue(DatHeader.FileNameKey, $"fixDAT_{datFile.Header.GetStringFieldValue(DatHeader.FileNameKey)}"); + datFile.Header.SetFieldValue(Models.Metadata.Header.NameKey, $"fixDAT_{datFile.Header.GetStringFieldValue(Models.Metadata.Header.NameKey)}"); + datFile.Header.SetFieldValue(Models.Metadata.Header.DescriptionKey, $"fixDAT_{datFile.Header.GetStringFieldValue(Models.Metadata.Header.DescriptionKey)}"); datFile.ItemsDB.ClearMarked(); return success; @@ -158,6 +242,50 @@ namespace SabreTools.DatTools datFile.Header.SetFieldValue(Models.Metadata.Header.NameKey, $"fixDAT_{datFile.Header.GetStringFieldValue(Models.Metadata.Header.NameKey)}"); datFile.Header.SetFieldValue(Models.Metadata.Header.DescriptionKey, $"fixDAT_{datFile.Header.GetStringFieldValue(Models.Metadata.Header.DescriptionKey)}"); datFile.Items.ClearMarked(); + + return success; + } + + /// + /// Verify a DatFile against a set of inputs, leaving only missing files + /// + /// Current DatFile object to verify against + /// True if only hashes should be checked, false for full file information + /// True if verification was a success, false otherwise + public static bool VerifyGenericDB(DatFile datFile, bool hashOnly) + { + bool success = true; + + var watch = new InternalStopwatch("Verifying all from supplied paths"); + + // Force bucketing according to the flags + if (hashOnly) + datFile.ItemsDB.BucketBy(ItemKey.CRC, DedupeType.Full); + else + datFile.ItemsDB.BucketBy(ItemKey.Machine, DedupeType.Full); + + // Then mark items for removal + var keys = datFile.ItemsDB.SortedKeys.ToList(); + foreach (string key in keys) + { + var items = datFile.ItemsDB.GetDatItemsForBucket(key); + if (items == null) + continue; + + for (int i = 0; i < items.Length; i++) + { + // Unmatched items will have a source ID of int.MaxValue, remove all others + if (items[i].Item2.GetFieldValue(DatItem.SourceKey)?.Index != int.MaxValue) + items[i].Item2.SetFieldValue(DatItem.RemoveKey, true); + } + } + + watch.Stop(); + + // Set fixdat headers in case of writing out + datFile.Header.SetFieldValue(DatHeader.FileNameKey, $"fixDAT_{datFile.Header.GetStringFieldValue(DatHeader.FileNameKey)}"); + datFile.Header.SetFieldValue(Models.Metadata.Header.NameKey, $"fixDAT_{datFile.Header.GetStringFieldValue(Models.Metadata.Header.NameKey)}"); + datFile.Header.SetFieldValue(Models.Metadata.Header.DescriptionKey, $"fixDAT_{datFile.Header.GetStringFieldValue(Models.Metadata.Header.DescriptionKey)}"); datFile.ItemsDB.ClearMarked(); return success; diff --git a/SabreTools.DatTools/Writer.cs b/SabreTools.DatTools/Writer.cs index eefc96f5..586e5f99 100644 --- a/SabreTools.DatTools/Writer.cs +++ b/SabreTools.DatTools/Writer.cs @@ -69,9 +69,11 @@ namespace SabreTools.DatTools // Bucket roms by game name, if not already datFile.Items.BucketBy(ItemKey.Machine, DedupeType.None); + datFile.ItemsDB.BucketBy(ItemKey.Machine, DedupeType.None); // Output the number of items we're going to be writing logger.User($"A total of {datFile.Items.DatStatistics.TotalCount - datFile.Items.DatStatistics.RemovedCount} items will be written out to '{datFile.Header.GetStringFieldValue(DatHeader.FileNameKey)}'"); + //logger.User($"A total of {datFile.ItemsDB.DatStatistics.TotalCount - datFile.ItemsDB.DatStatistics.RemovedCount} items will be written out to '{datFile.Header.GetStringFieldValue(DatHeader.FileNameKey)}'"); // Get the outfile names Dictionary outfiles = datFile.Header.CreateOutFileNames(outDir!, overwrite); @@ -128,15 +130,28 @@ namespace SabreTools.DatTools if (diskCount + mediaCount + romCount == 0) datFile.Items.RecalculateStats(); + diskCount = datFile.ItemsDB.DatStatistics.GetItemCount(ItemType.Disk); + mediaCount = datFile.ItemsDB.DatStatistics.GetItemCount(ItemType.Media); + romCount = datFile.ItemsDB.DatStatistics.GetItemCount(ItemType.Rom); + + if (diskCount + mediaCount + romCount == 0) + datFile.ItemsDB.RecalculateStats(); + datFile.Items.BucketBy(ItemKey.Machine, DedupeType.None, norename: true); + datFile.ItemsDB.BucketBy(ItemKey.Machine, DedupeType.None, norename: true); datFile.Items.DatStatistics.DisplayName = datFile.Header.GetStringFieldValue(DatHeader.FileNameKey); datFile.Items.DatStatistics.MachineCount = datFile.Items.Keys.Count; datFile.Items.DatStatistics.IsDirectory = false; + datFile.ItemsDB.DatStatistics.DisplayName = datFile.Header.GetStringFieldValue(DatHeader.FileNameKey); + datFile.ItemsDB.DatStatistics.MachineCount = datFile.Items.Keys.Count; + datFile.ItemsDB.DatStatistics.IsDirectory = false; + var statsList = new List { datFile.Items.DatStatistics, + //datFile.ItemsDB.DatStatistics, }; var consoleOutput = BaseReport.Create(StatReportFormat.None, statsList); consoleOutput!.WriteToFile(null, true, true); @@ -206,14 +221,19 @@ namespace SabreTools.DatTools { // Force a statistics recheck, just in case datFile.Items.RecalculateStats(); + datFile.ItemsDB.RecalculateStats(); // If there's nothing there, abort if (datFile.Items.DatStatistics.TotalCount == 0) return false; + if (datFile.ItemsDB.DatStatistics.TotalCount == 0) + return false; // If every item is removed, abort if (datFile.Items.DatStatistics.TotalCount == datFile.Items.DatStatistics.RemovedCount) return false; + if (datFile.ItemsDB.DatStatistics.TotalCount == datFile.ItemsDB.DatStatistics.RemovedCount) + return false; return true; } diff --git a/SabreTools/Features/Verify.cs b/SabreTools/Features/Verify.cs index 4b2c3694..9f1cc4bf 100644 --- a/SabreTools/Features/Verify.cs +++ b/SabreTools/Features/Verify.cs @@ -94,6 +94,7 @@ namespace SabreTools.Features } Verification.VerifyGeneric(datdata, hashOnly); + //Verification.VerifyGenericDB(datdata, hashOnly); } // Now write out if there are any items left @@ -145,6 +146,7 @@ namespace SabreTools.Features } Verification.VerifyGeneric(datdata, hashOnly); + //Verification.VerifyGenericDB(datdata, hashOnly); } // Now write out if there are any items left