2020-12-09 21:52:38 -08:00
|
|
|
using System.Collections.Generic;
|
|
|
|
|
using System.IO;
|
2020-12-10 22:16:53 -08:00
|
|
|
using SabreTools.Core.Tools;
|
2020-12-10 23:24:09 -08:00
|
|
|
using SabreTools.DatFiles;
|
2020-12-09 21:52:38 -08:00
|
|
|
using SabreTools.DatItems;
|
|
|
|
|
using SabreTools.FileTypes;
|
2020-12-10 22:31:23 -08:00
|
|
|
using SabreTools.FileTypes.Archives;
|
2024-03-06 11:23:22 -05:00
|
|
|
using SabreTools.Hashing;
|
2024-10-24 00:36:44 -04:00
|
|
|
using SabreTools.IO.Logging;
|
2020-12-09 21:52:38 -08:00
|
|
|
|
2020-12-10 23:24:09 -08:00
|
|
|
namespace SabreTools.DatTools
|
2020-12-09 21:52:38 -08:00
|
|
|
{
|
2020-12-21 11:38:56 -08:00
|
|
|
/// <summary>
|
|
|
|
|
/// Helper methods for verifying data from DatFiles
|
|
|
|
|
/// </summary>
|
2020-12-10 13:30:08 -08:00
|
|
|
public class Verification
|
2020-12-09 21:52:38 -08:00
|
|
|
{
|
2020-12-10 13:30:08 -08:00
|
|
|
#region Logging
|
|
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
|
/// Logging object
|
|
|
|
|
/// </summary>
|
2025-01-08 16:59:44 -05:00
|
|
|
private static readonly Logger _staticLogger = new();
|
2020-12-10 13:30:08 -08:00
|
|
|
|
|
|
|
|
#endregion
|
|
|
|
|
|
2020-12-09 21:52:38 -08:00
|
|
|
/// <summary>
|
|
|
|
|
/// Verify a DatFile against a set of depots, leaving only missing files
|
|
|
|
|
/// </summary>
|
2020-12-10 11:11:59 -08:00
|
|
|
/// <param name="datFile">Current DatFile object to verify against</param>
|
2020-12-09 21:52:38 -08:00
|
|
|
/// <param name="inputs">List of input directories to compare against</param>
|
|
|
|
|
/// <returns>True if verification was a success, false otherwise</returns>
|
2020-12-10 11:58:46 -08:00
|
|
|
public static bool VerifyDepot(DatFile datFile, List<string> inputs)
|
2020-12-09 21:52:38 -08:00
|
|
|
{
|
|
|
|
|
bool success = true;
|
|
|
|
|
|
2023-04-19 16:39:58 -04:00
|
|
|
InternalStopwatch watch = new("Verifying all from supplied depots");
|
2020-12-09 21:52:38 -08:00
|
|
|
|
|
|
|
|
// Now loop through and get only directories from the input paths
|
2024-02-28 19:19:50 -05:00
|
|
|
List<string> directories = [];
|
2020-12-09 21:52:38 -08:00
|
|
|
foreach (string input in inputs)
|
|
|
|
|
{
|
|
|
|
|
// Add to the list if the input is a directory
|
|
|
|
|
if (Directory.Exists(input))
|
|
|
|
|
{
|
2025-01-08 16:59:44 -05:00
|
|
|
_staticLogger.Verbose($"Adding depot: {input}");
|
2020-12-09 21:52:38 -08:00
|
|
|
directories.Add(input);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// If we don't have any directories, we want to exit
|
|
|
|
|
if (directories.Count == 0)
|
|
|
|
|
return success;
|
|
|
|
|
|
|
|
|
|
// Now that we have a list of depots, we want to bucket the input DAT by SHA-1
|
2025-01-14 20:21:54 -05:00
|
|
|
datFile.BucketBy(ItemKey.SHA1);
|
2020-12-09 21:52:38 -08:00
|
|
|
|
|
|
|
|
// Then we want to loop through each of the hashes and see if we can rebuild
|
2025-01-14 15:59:47 -05:00
|
|
|
foreach (string hash in datFile.Items.SortedKeys)
|
2020-12-09 21:52:38 -08:00
|
|
|
{
|
|
|
|
|
// Pre-empt any issues that could arise from string length
|
|
|
|
|
if (hash.Length != Constants.SHA1Length)
|
|
|
|
|
continue;
|
|
|
|
|
|
2025-01-08 16:59:44 -05:00
|
|
|
_staticLogger.User($"Checking hash '{hash}'");
|
2020-12-09 21:52:38 -08:00
|
|
|
|
|
|
|
|
// Get the extension path for the hash
|
2025-01-29 22:51:30 -05:00
|
|
|
string? subpath = Utilities.GetDepotPath(hash, datFile.Modifiers.InputDepot?.Depth ?? 0);
|
2024-02-28 19:19:50 -05:00
|
|
|
if (subpath == null)
|
|
|
|
|
continue;
|
2020-12-09 21:52:38 -08:00
|
|
|
|
|
|
|
|
// Find the first depot that includes the hash
|
2024-02-28 19:19:50 -05:00
|
|
|
string? foundpath = null;
|
2020-12-09 21:52:38 -08:00
|
|
|
foreach (string directory in directories)
|
|
|
|
|
{
|
2023-04-07 16:13:15 -04:00
|
|
|
if (System.IO.File.Exists(Path.Combine(directory, subpath)))
|
2020-12-09 21:52:38 -08:00
|
|
|
{
|
|
|
|
|
foundpath = Path.Combine(directory, subpath);
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// If we didn't find a path, then we continue
|
|
|
|
|
if (foundpath == null)
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
// If we have a path, we want to try to get the rom information
|
2023-04-19 16:39:58 -04:00
|
|
|
GZipArchive tgz = new(foundpath);
|
2024-02-28 19:19:50 -05:00
|
|
|
BaseFile? fileinfo = tgz.GetTorrentGZFileInfo();
|
2020-12-09 21:52:38 -08:00
|
|
|
|
|
|
|
|
// If the file information is null, then we continue
|
|
|
|
|
if (fileinfo == null)
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
// Now we want to remove all duplicates from the DAT
|
2025-04-14 14:16:14 -04:00
|
|
|
_ = datFile.GetDuplicates(fileinfo.ConvertToRom());
|
|
|
|
|
_ = datFile.GetDuplicates(fileinfo.ConvertToDisk());
|
2020-12-09 21:52:38 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
watch.Stop();
|
|
|
|
|
|
|
|
|
|
// Set fixdat headers in case of writing out
|
2024-03-11 15:46:44 -04:00
|
|
|
datFile.Header.SetFieldValue<string?>(DatHeader.FileNameKey, $"fixDAT_{datFile.Header.GetStringFieldValue(DatHeader.FileNameKey)}");
|
|
|
|
|
datFile.Header.SetFieldValue<string?>(Models.Metadata.Header.NameKey, $"fixDAT_{datFile.Header.GetStringFieldValue(Models.Metadata.Header.NameKey)}");
|
|
|
|
|
datFile.Header.SetFieldValue<string?>(Models.Metadata.Header.DescriptionKey, $"fixDAT_{datFile.Header.GetStringFieldValue(Models.Metadata.Header.DescriptionKey)}");
|
2025-01-12 22:10:48 -05:00
|
|
|
datFile.ClearMarked();
|
2024-03-19 23:15:58 -04:00
|
|
|
|
|
|
|
|
return success;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
|
/// Verify a DatFile against a set of depots, leaving only missing files
|
|
|
|
|
/// </summary>
|
|
|
|
|
/// <param name="datFile">Current DatFile object to verify against</param>
|
|
|
|
|
/// <param name="inputs">List of input directories to compare against</param>
|
|
|
|
|
/// <returns>True if verification was a success, false otherwise</returns>
|
|
|
|
|
public static bool VerifyDepotDB(DatFile datFile, List<string> inputs)
|
|
|
|
|
{
|
|
|
|
|
bool success = true;
|
|
|
|
|
|
|
|
|
|
var watch = new InternalStopwatch("Verifying all from supplied depots");
|
|
|
|
|
|
|
|
|
|
// Now loop through and get only directories from the input paths
|
|
|
|
|
List<string> directories = [];
|
|
|
|
|
foreach (string input in inputs)
|
|
|
|
|
{
|
|
|
|
|
// Add to the list if the input is a directory
|
|
|
|
|
if (Directory.Exists(input))
|
|
|
|
|
{
|
2025-01-08 16:59:44 -05:00
|
|
|
_staticLogger.Verbose($"Adding depot: {input}");
|
2024-03-19 23:15:58 -04:00
|
|
|
directories.Add(input);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// If we don't have any directories, we want to exit
|
|
|
|
|
if (directories.Count == 0)
|
|
|
|
|
return success;
|
|
|
|
|
|
|
|
|
|
// Now that we have a list of depots, we want to bucket the input DAT by SHA-1
|
2025-01-14 20:21:54 -05:00
|
|
|
datFile.BucketBy(ItemKey.SHA1);
|
2024-03-19 23:15:58 -04:00
|
|
|
|
|
|
|
|
// Then we want to loop through each of the hashes and see if we can rebuild
|
2024-10-19 21:41:08 -04:00
|
|
|
List<string> keys = [.. datFile.ItemsDB.SortedKeys];
|
2024-03-19 23:15:58 -04:00
|
|
|
foreach (string hash in keys)
|
|
|
|
|
{
|
|
|
|
|
// Pre-empt any issues that could arise from string length
|
|
|
|
|
if (hash.Length != Constants.SHA1Length)
|
|
|
|
|
continue;
|
|
|
|
|
|
2025-01-08 16:59:44 -05:00
|
|
|
_staticLogger.User($"Checking hash '{hash}'");
|
2024-03-19 23:15:58 -04:00
|
|
|
|
|
|
|
|
// Get the extension path for the hash
|
2025-01-29 22:51:30 -05:00
|
|
|
string? subpath = Utilities.GetDepotPath(hash, datFile.Modifiers.InputDepot?.Depth ?? 0);
|
2024-03-19 23:15:58 -04:00
|
|
|
if (subpath == null)
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
// Find the first depot that includes the hash
|
|
|
|
|
string? foundpath = null;
|
|
|
|
|
foreach (string directory in directories)
|
|
|
|
|
{
|
|
|
|
|
if (System.IO.File.Exists(Path.Combine(directory, subpath)))
|
|
|
|
|
{
|
|
|
|
|
foundpath = Path.Combine(directory, subpath);
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// If we didn't find a path, then we continue
|
|
|
|
|
if (foundpath == null)
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
// If we have a path, we want to try to get the rom information
|
|
|
|
|
GZipArchive tgz = new(foundpath);
|
|
|
|
|
BaseFile? fileinfo = tgz.GetTorrentGZFileInfo();
|
|
|
|
|
|
|
|
|
|
// If the file information is null, then we continue
|
|
|
|
|
if (fileinfo == null)
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
// Now we want to remove all duplicates from the DAT
|
2025-04-14 14:16:14 -04:00
|
|
|
_ = datFile.GetDuplicatesDB(new KeyValuePair<long, DatItem>(-1, fileinfo.ConvertToRom()));
|
|
|
|
|
_ = datFile.GetDuplicatesDB(new KeyValuePair<long, DatItem>(-1, fileinfo.ConvertToDisk()));
|
2024-03-19 23:15:58 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
watch.Stop();
|
|
|
|
|
|
|
|
|
|
// Set fixdat headers in case of writing out
|
|
|
|
|
datFile.Header.SetFieldValue<string?>(DatHeader.FileNameKey, $"fixDAT_{datFile.Header.GetStringFieldValue(DatHeader.FileNameKey)}");
|
|
|
|
|
datFile.Header.SetFieldValue<string?>(Models.Metadata.Header.NameKey, $"fixDAT_{datFile.Header.GetStringFieldValue(Models.Metadata.Header.NameKey)}");
|
|
|
|
|
datFile.Header.SetFieldValue<string?>(Models.Metadata.Header.DescriptionKey, $"fixDAT_{datFile.Header.GetStringFieldValue(Models.Metadata.Header.DescriptionKey)}");
|
2025-01-12 22:10:48 -05:00
|
|
|
datFile.ClearMarked();
|
2020-12-09 21:52:38 -08:00
|
|
|
|
|
|
|
|
return success;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
|
/// Verify a DatFile against a set of inputs, leaving only missing files
|
|
|
|
|
/// </summary>
|
2020-12-10 11:11:59 -08:00
|
|
|
/// <param name="datFile">Current DatFile object to verify against</param>
|
2020-12-09 21:52:38 -08:00
|
|
|
/// <param name="hashOnly">True if only hashes should be checked, false for full file information</param>
|
|
|
|
|
/// <returns>True if verification was a success, false otherwise</returns>
|
2020-12-10 11:58:46 -08:00
|
|
|
public static bool VerifyGeneric(DatFile datFile, bool hashOnly)
|
2020-12-09 21:52:38 -08:00
|
|
|
{
|
|
|
|
|
bool success = true;
|
|
|
|
|
|
2023-04-19 16:39:58 -04:00
|
|
|
InternalStopwatch watch = new("Verifying all from supplied paths");
|
2021-02-02 14:09:49 -08:00
|
|
|
|
2020-12-09 21:52:38 -08:00
|
|
|
// Force bucketing according to the flags
|
2020-12-14 15:43:01 -08:00
|
|
|
datFile.Items.SetBucketedBy(ItemKey.NULL);
|
2020-12-09 21:52:38 -08:00
|
|
|
if (hashOnly)
|
2025-01-14 20:21:54 -05:00
|
|
|
{
|
|
|
|
|
datFile.BucketBy(ItemKey.CRC);
|
2025-01-21 11:07:39 -05:00
|
|
|
datFile.Deduplicate();
|
2025-01-14 20:21:54 -05:00
|
|
|
}
|
2020-12-09 21:52:38 -08:00
|
|
|
else
|
2025-01-14 20:21:54 -05:00
|
|
|
{
|
|
|
|
|
datFile.BucketBy(ItemKey.Machine);
|
2025-01-21 11:07:39 -05:00
|
|
|
datFile.Deduplicate();
|
2025-01-14 20:21:54 -05:00
|
|
|
}
|
2020-12-09 21:52:38 -08:00
|
|
|
|
|
|
|
|
// Then mark items for removal
|
2025-01-14 15:59:47 -05:00
|
|
|
foreach (string key in datFile.Items.SortedKeys)
|
2020-12-09 21:52:38 -08:00
|
|
|
{
|
2025-01-12 23:15:30 -05:00
|
|
|
List<DatItem>? items = datFile.GetItemsForBucket(key);
|
2024-02-28 19:19:50 -05:00
|
|
|
if (items == null)
|
|
|
|
|
continue;
|
|
|
|
|
|
2020-12-09 21:52:38 -08:00
|
|
|
for (int i = 0; i < items.Count; i++)
|
|
|
|
|
{
|
|
|
|
|
// Unmatched items will have a source ID of int.MaxValue, remove all others
|
2024-03-10 16:49:07 -04:00
|
|
|
if (items[i].GetFieldValue<Source?>(DatItem.SourceKey)?.Index != int.MaxValue)
|
2024-03-11 14:31:02 -04:00
|
|
|
items[i].SetFieldValue<bool?>(DatItem.RemoveKey, true);
|
2020-12-09 21:52:38 -08:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2021-02-02 14:09:49 -08:00
|
|
|
watch.Stop();
|
|
|
|
|
|
2020-12-09 21:52:38 -08:00
|
|
|
// Set fixdat headers in case of writing out
|
2024-03-11 15:46:44 -04:00
|
|
|
datFile.Header.SetFieldValue<string?>(DatHeader.FileNameKey, $"fixDAT_{datFile.Header.GetStringFieldValue(DatHeader.FileNameKey)}");
|
|
|
|
|
datFile.Header.SetFieldValue<string?>(Models.Metadata.Header.NameKey, $"fixDAT_{datFile.Header.GetStringFieldValue(Models.Metadata.Header.NameKey)}");
|
|
|
|
|
datFile.Header.SetFieldValue<string?>(Models.Metadata.Header.DescriptionKey, $"fixDAT_{datFile.Header.GetStringFieldValue(Models.Metadata.Header.DescriptionKey)}");
|
2025-01-12 22:10:48 -05:00
|
|
|
datFile.ClearMarked();
|
2024-03-19 23:15:58 -04:00
|
|
|
|
|
|
|
|
return success;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// <summary>
|
|
|
|
|
/// Verify a DatFile against a set of inputs, leaving only missing files
|
|
|
|
|
/// </summary>
|
|
|
|
|
/// <param name="datFile">Current DatFile object to verify against</param>
|
|
|
|
|
/// <param name="hashOnly">True if only hashes should be checked, false for full file information</param>
|
|
|
|
|
/// <returns>True if verification was a success, false otherwise</returns>
|
|
|
|
|
public static bool VerifyGenericDB(DatFile datFile, bool hashOnly)
|
|
|
|
|
{
|
|
|
|
|
bool success = true;
|
|
|
|
|
|
|
|
|
|
var watch = new InternalStopwatch("Verifying all from supplied paths");
|
|
|
|
|
|
|
|
|
|
// Force bucketing according to the flags
|
|
|
|
|
if (hashOnly)
|
2025-01-14 20:21:54 -05:00
|
|
|
{
|
|
|
|
|
datFile.BucketBy(ItemKey.CRC);
|
2025-01-21 11:07:39 -05:00
|
|
|
datFile.Deduplicate();
|
2025-01-14 20:21:54 -05:00
|
|
|
}
|
2024-03-19 23:15:58 -04:00
|
|
|
else
|
2025-01-14 20:21:54 -05:00
|
|
|
{
|
|
|
|
|
datFile.BucketBy(ItemKey.Machine);
|
2025-01-21 11:07:39 -05:00
|
|
|
datFile.Deduplicate();
|
2025-01-14 20:21:54 -05:00
|
|
|
}
|
2024-03-19 23:15:58 -04:00
|
|
|
|
|
|
|
|
// Then mark items for removal
|
2024-10-19 21:41:08 -04:00
|
|
|
List<string> keys = [.. datFile.ItemsDB.SortedKeys];
|
2024-03-19 23:15:58 -04:00
|
|
|
foreach (string key in keys)
|
|
|
|
|
{
|
2024-03-19 23:35:29 -04:00
|
|
|
var items = datFile.ItemsDB.GetItemsForBucket(key);
|
2024-03-19 23:15:58 -04:00
|
|
|
if (items == null)
|
|
|
|
|
continue;
|
|
|
|
|
|
2024-12-06 23:16:09 -05:00
|
|
|
foreach (var item in items)
|
2024-03-19 23:15:58 -04:00
|
|
|
{
|
2024-03-20 01:50:08 -04:00
|
|
|
// Get the source associated with the item
|
2025-02-24 09:20:46 -05:00
|
|
|
var source = datFile.GetSourceForItemDB(item.Key);
|
2024-03-20 01:50:08 -04:00
|
|
|
|
2024-03-19 23:15:58 -04:00
|
|
|
// Unmatched items will have a source ID of int.MaxValue, remove all others
|
2024-12-06 23:16:09 -05:00
|
|
|
if (source.Value?.Index != int.MaxValue)
|
|
|
|
|
item.Value.SetFieldValue<bool?>(DatItem.RemoveKey, true);
|
2024-03-19 23:15:58 -04:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
watch.Stop();
|
|
|
|
|
|
|
|
|
|
// Set fixdat headers in case of writing out
|
|
|
|
|
datFile.Header.SetFieldValue<string?>(DatHeader.FileNameKey, $"fixDAT_{datFile.Header.GetStringFieldValue(DatHeader.FileNameKey)}");
|
|
|
|
|
datFile.Header.SetFieldValue<string?>(Models.Metadata.Header.NameKey, $"fixDAT_{datFile.Header.GetStringFieldValue(Models.Metadata.Header.NameKey)}");
|
|
|
|
|
datFile.Header.SetFieldValue<string?>(Models.Metadata.Header.DescriptionKey, $"fixDAT_{datFile.Header.GetStringFieldValue(Models.Metadata.Header.DescriptionKey)}");
|
2025-01-12 22:10:48 -05:00
|
|
|
datFile.ClearMarked();
|
2020-12-09 21:52:38 -08:00
|
|
|
|
|
|
|
|
return success;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|