Files
SabreTools/SabreTools.DatTools/Rebuilder.cs

866 lines
40 KiB
C#
Raw Normal View History

2020-12-10 11:07:36 -08:00
using System.Collections.Generic;
using System.IO;
2024-03-05 03:04:47 -05:00
#if NET40_OR_GREATER || NETCOREAPP
2020-12-10 11:07:36 -08:00
using System.Threading.Tasks;
2024-03-05 03:04:47 -05:00
#endif
2020-12-10 22:16:53 -08:00
using SabreTools.Core.Tools;
2020-12-10 23:24:09 -08:00
using SabreTools.DatFiles;
2020-12-10 11:07:36 -08:00
using SabreTools.DatItems;
2021-02-02 10:23:43 -08:00
using SabreTools.DatItems.Formats;
2020-12-10 11:07:36 -08:00
using SabreTools.FileTypes;
2020-12-10 22:31:23 -08:00
using SabreTools.FileTypes.Archives;
2024-03-04 23:56:05 -05:00
using SabreTools.Hashing;
2024-04-24 13:45:38 -04:00
using SabreTools.IO.Extensions;
2024-10-24 00:36:44 -04:00
using SabreTools.IO.Logging;
2020-12-10 11:07:36 -08:00
using SabreTools.Skippers;
2020-12-10 23:24:09 -08:00
namespace SabreTools.DatTools
2020-12-10 11:07:36 -08:00
{
/// <summary>
/// Helper methods for rebuilding from DatFiles
/// </summary>
2020-12-10 14:31:00 -08:00
public class Rebuilder
2020-12-10 11:07:36 -08:00
{
2020-12-10 14:31:00 -08:00
#region Logging
/// <summary>
/// Logging object
/// </summary>
private static readonly Logger logger = new();
2020-12-10 14:31:00 -08:00
#endregion
2020-12-10 11:07:36 -08:00
/// <summary>
/// Process the DAT and find all matches in input files and folders assuming they're a depot
/// </summary>
/// <param name="datFile">Current DatFile object to rebuild from</param>
/// <param name="inputs">List of input files/folders to check</param>
/// <param name="outDir">Output directory to use to build to</param>
/// <param name="date">True if the date from the DAT should be used if available, false otherwise</param>
/// <param name="delete">True if input files should be deleted, false otherwise</param>
/// <param name="inverse">True if the DAT should be used as a filter instead of a template, false otherwise</param>
/// <param name="outputFormat">Output format that files should be written to</param>
/// <returns>True if rebuilding was a success, false otherwise</returns>
2020-12-10 11:58:46 -08:00
public static bool RebuildDepot(
2020-12-10 11:07:36 -08:00
DatFile datFile,
List<string> inputs,
string outDir,
bool date = false,
bool delete = false,
bool inverse = false,
OutputFormat outputFormat = OutputFormat.Folder)
{
#region Perform setup
// If the DAT is not populated and inverse is not set, inform the user and quit
if (datFile.Items.DatStatistics.TotalCount == 0 && !inverse)
2020-12-10 11:07:36 -08:00
{
logger.User("No entries were found to rebuild, exiting...");
return false;
}
// Check that the output directory exists
2020-12-10 22:16:53 -08:00
outDir = outDir.Ensure(create: true);
2020-12-10 11:07:36 -08:00
// Now we want to get forcepack flag if it's not overridden
2024-07-15 12:48:26 -04:00
PackingFlag forcePacking = datFile.Header.GetStringFieldValue(Models.Metadata.Header.ForcePackingKey).AsEnumValue<PackingFlag>();
if (outputFormat == OutputFormat.Folder && forcePacking != PackingFlag.None)
outputFormat = GetOutputFormat(forcePacking);
2020-12-10 11:07:36 -08:00
#endregion
bool success = true;
#region Rebuild from depots in order
string format = FromOutputFormat(outputFormat) ?? string.Empty;
InternalStopwatch watch = new($"Rebuilding all files to {format}");
2020-12-10 11:07:36 -08:00
// Now loop through and get only directories from the input paths
2024-02-28 19:19:50 -05:00
List<string> directories = [];
2024-02-28 22:54:56 -05:00
#if NET452_OR_GREATER || NETCOREAPP
2024-10-24 05:58:03 -04:00
Parallel.ForEach(inputs, Core.Globals.ParallelOptions, input =>
2024-02-28 22:54:56 -05:00
#elif NET40_OR_GREATER
Parallel.ForEach(inputs, input =>
#else
foreach (var input in inputs)
#endif
2020-12-10 11:07:36 -08:00
{
// Add to the list if the input is a directory
if (Directory.Exists(input))
{
logger.Verbose($"Adding depot: {input}");
lock (directories)
{
directories.Add(input);
}
}
2024-02-28 21:59:13 -05:00
#if NET40_OR_GREATER || NETCOREAPP
2020-12-10 11:07:36 -08:00
});
2024-02-28 21:59:13 -05:00
#else
}
#endif
2020-12-10 11:07:36 -08:00
// If we don't have any directories, we want to exit
if (directories.Count == 0)
return success;
// Now that we have a list of depots, we want to bucket the input DAT by SHA-1
2020-12-14 15:43:01 -08:00
datFile.Items.BucketBy(ItemKey.SHA1, DedupeType.None);
2020-12-10 11:07:36 -08:00
// Then we want to loop through each of the hashes and see if we can rebuild
List<string> keys = [.. datFile.Items.SortedKeys];
2020-12-10 11:07:36 -08:00
foreach (string hash in keys)
{
// Pre-empt any issues that could arise from string length
if (hash.Length != Constants.SHA1Length)
continue;
logger.User($"Checking hash '{hash}'");
// Get the extension path for the hash
2024-03-10 22:08:08 -04:00
string? subpath = Utilities.GetDepotPath(hash, datFile.Header.GetFieldValue<DepotInformation?>(DatHeader.InputDepotKey)?.Depth ?? 0);
2024-02-28 19:19:50 -05:00
if (subpath == null)
continue;
2020-12-10 11:07:36 -08:00
// Find the first depot that includes the hash
2024-02-28 19:19:50 -05:00
string? foundpath = null;
2020-12-10 11:07:36 -08:00
foreach (string directory in directories)
{
2023-04-07 16:13:15 -04:00
if (System.IO.File.Exists(Path.Combine(directory, subpath)))
2020-12-10 11:07:36 -08:00
{
foundpath = Path.Combine(directory, subpath);
break;
}
}
// If we didn't find a path, then we continue
if (foundpath == null)
continue;
// If we have a path, we want to try to get the rom information
GZipArchive archive = new(foundpath);
2024-02-28 19:19:50 -05:00
BaseFile? fileinfo = archive.GetTorrentGZFileInfo();
2020-12-10 11:07:36 -08:00
// If the file information is null, then we continue
if (fileinfo == null)
continue;
// Ensure we are sorted correctly (some other calls can change this)
2024-06-30 00:08:46 -04:00
//datFile.Items.BucketBy(ItemKey.SHA1, DedupeType.None);
2020-12-10 11:07:36 -08:00
// If there are no items in the hash, we continue
2024-02-28 19:19:50 -05:00
var items = datFile.Items[hash];
if (items == null || items.Count == 0)
2020-12-10 11:07:36 -08:00
continue;
// Otherwise, we rebuild that file to all locations that we need to
bool usedInternally;
2024-03-11 16:26:28 -04:00
if (items[0].GetStringFieldValue(Models.Metadata.DatItem.TypeKey).AsEnumValue<ItemType>() == ItemType.Disk)
2023-04-07 16:13:15 -04:00
usedInternally = RebuildIndividualFile(datFile, new Disk(fileinfo), foundpath, outDir, date, inverse, outputFormat, isZip: false);
2024-03-11 16:26:28 -04:00
else if (items[0].GetStringFieldValue(Models.Metadata.DatItem.TypeKey).AsEnumValue<ItemType>() == ItemType.File)
2023-04-07 16:13:15 -04:00
usedInternally = RebuildIndividualFile(datFile, new DatItems.Formats.File(fileinfo), foundpath, outDir, date, inverse, outputFormat, isZip: false);
2024-03-11 16:26:28 -04:00
else if (items[0].GetStringFieldValue(Models.Metadata.DatItem.TypeKey).AsEnumValue<ItemType>() == ItemType.Media)
2023-04-07 16:13:15 -04:00
usedInternally = RebuildIndividualFile(datFile, new Media(fileinfo), foundpath, outDir, date, inverse, outputFormat, isZip: false);
2020-12-10 11:07:36 -08:00
else
2023-04-07 16:13:15 -04:00
usedInternally = RebuildIndividualFile(datFile, new Rom(fileinfo), foundpath, outDir, date, inverse, outputFormat, isZip: false);
2020-12-10 11:07:36 -08:00
// If we are supposed to delete the depot file, do so
if (delete && usedInternally)
2023-04-07 16:13:15 -04:00
System.IO.File.Delete(foundpath);
2020-12-10 11:07:36 -08:00
}
watch.Stop();
#endregion
return success;
}
/// <summary>
/// Process the DAT and find all matches in input files and folders
/// </summary>
/// <param name="datFile">Current DatFile object to rebuild from</param>
/// <param name="inputs">List of input files/folders to check</param>
/// <param name="outDir">Output directory to use to build to</param>
/// <param name="quickScan">True to enable external scanning of archives, false otherwise</param>
/// <param name="date">True if the date from the DAT should be used if available, false otherwise</param>
/// <param name="delete">True if input files should be deleted, false otherwise</param>
/// <param name="inverse">True if the DAT should be used as a filter instead of a template, false otherwise</param>
/// <param name="outputFormat">Output format that files should be written to</param>
2025-01-05 21:51:35 -05:00
/// <param name="asFile">TreatAsFile representing special format scanning</param>
2020-12-10 11:07:36 -08:00
/// <returns>True if rebuilding was a success, false otherwise</returns>
2020-12-10 11:58:46 -08:00
public static bool RebuildGeneric(
2020-12-10 11:07:36 -08:00
DatFile datFile,
List<string> inputs,
string outDir,
bool quickScan = false,
bool date = false,
bool delete = false,
bool inverse = false,
OutputFormat outputFormat = OutputFormat.Folder,
2025-01-05 21:51:35 -05:00
TreatAsFile asFile = 0x00)
2020-12-10 11:07:36 -08:00
{
#region Perform setup
// If the DAT is not populated and inverse is not set, inform the user and quit
if (datFile.Items.DatStatistics.TotalCount == 0 && !inverse)
2020-12-10 11:07:36 -08:00
{
logger.User("No entries were found to rebuild, exiting...");
return false;
}
// Check that the output directory exists
if (!Directory.Exists(outDir))
{
Directory.CreateDirectory(outDir);
outDir = Path.GetFullPath(outDir);
}
// Now we want to get forcepack flag if it's not overridden
2024-07-15 12:48:26 -04:00
PackingFlag forcePacking = datFile.Header.GetStringFieldValue(Models.Metadata.Header.ForcePackingKey).AsEnumValue<PackingFlag>();
if (outputFormat == OutputFormat.Folder && forcePacking != PackingFlag.None)
outputFormat = GetOutputFormat(forcePacking);
2020-12-10 11:07:36 -08:00
#endregion
bool success = true;
#region Rebuild from sources in order
string format = FromOutputFormat(outputFormat) ?? string.Empty;
InternalStopwatch watch = new($"Rebuilding all files to {format}");
2020-12-10 11:07:36 -08:00
// Now loop through all of the files in all of the inputs
foreach (string input in inputs)
{
// If the input is a file
2023-04-07 16:13:15 -04:00
if (System.IO.File.Exists(input))
2020-12-10 11:07:36 -08:00
{
logger.User($"Checking file: {input}");
2025-01-05 21:51:35 -05:00
bool rebuilt = RebuildGenericHelper(datFile, input, outDir, quickScan, date, inverse, outputFormat, asFile);
2020-12-10 11:07:36 -08:00
// If we are supposed to delete the file, do so
if (delete && rebuilt)
2023-04-07 16:13:15 -04:00
System.IO.File.Delete(input);
2020-12-10 11:07:36 -08:00
}
// If the input is a directory
else if (Directory.Exists(input))
{
logger.Verbose($"Checking directory: {input}");
2024-02-29 00:14:16 -05:00
#if NET20 || NET35
foreach (string file in Directory.GetFiles(input, "*"))
#else
2020-12-10 11:07:36 -08:00
foreach (string file in Directory.EnumerateFiles(input, "*", SearchOption.AllDirectories))
2024-02-29 00:14:16 -05:00
#endif
2020-12-10 11:07:36 -08:00
{
logger.User($"Checking file: {file}");
2025-01-05 21:51:35 -05:00
bool rebuilt = RebuildGenericHelper(datFile, file, outDir, quickScan, date, inverse, outputFormat, asFile);
2020-12-10 11:07:36 -08:00
// If we are supposed to delete the file, do so
if (delete && rebuilt)
2023-04-18 12:02:09 -04:00
System.IO.File.Delete(file);
2020-12-10 11:07:36 -08:00
}
}
}
watch.Stop();
#endregion
return success;
}
/// <summary>
/// Attempt to add a file to the output if it matches
/// </summary>
/// <param name="datFile">Current DatFile object to rebuild from</param>
/// <param name="file">Name of the file to process</param>
/// <param name="outDir">Output directory to use to build to</param>
/// <param name="quickScan">True to enable external scanning of archives, false otherwise</param>
/// <param name="date">True if the date from the DAT should be used if available, false otherwise</param>
/// <param name="inverse">True if the DAT should be used as a filter instead of a template, false otherwise</param>
/// <param name="outputFormat">Output format that files should be written to</param>
2025-01-05 21:51:35 -05:00
/// <param name="asFile">TreatAsFile representing special format scanning</param>
2020-12-10 11:07:36 -08:00
/// <returns>True if the file was used to rebuild, false otherwise</returns>
2020-12-10 11:58:46 -08:00
private static bool RebuildGenericHelper(
2020-12-10 11:07:36 -08:00
DatFile datFile,
string file,
string outDir,
bool quickScan,
bool date,
bool inverse,
OutputFormat outputFormat,
2025-01-05 21:51:35 -05:00
TreatAsFile asFile)
2020-12-10 11:07:36 -08:00
{
// If we somehow have a null filename, return
if (file == null)
return false;
// Set the deletion variables
bool usedExternally = false, usedInternally = false;
// Create an empty list of BaseFile for archive entries
2024-02-28 19:19:50 -05:00
List<BaseFile>? entries = null;
2020-12-10 11:07:36 -08:00
// Get the TGZ and TXZ status for later
GZipArchive tgz = new(file);
XZArchive txz = new(file);
2025-01-04 21:42:46 -05:00
bool isSingleTorrent = tgz.IsStandardized() || txz.IsStandardized();
2020-12-10 11:07:36 -08:00
// Get the base archive first
BaseArchive? archive = FileTypeTool.CreateArchiveType(file);
2020-12-10 11:07:36 -08:00
// Now get all extracted items from the archive
HashType[] hashTypes = quickScan ? [HashType.CRC32] : [HashType.CRC32, HashType.MD5, HashType.SHA1];
2020-12-10 11:07:36 -08:00
if (archive != null)
{
archive.SetHashTypes(hashTypes);
2020-12-10 11:07:36 -08:00
entries = archive.GetChildren();
}
// If the entries list is null, we encountered an error or have a file and should scan externally
2023-04-07 16:13:15 -04:00
if (entries == null && System.IO.File.Exists(file))
2020-12-10 11:07:36 -08:00
{
2025-01-05 21:35:06 -05:00
BaseFile? internalFileInfo = FileTypeTool.GetInfo(file, hashTypes);
2020-12-10 11:07:36 -08:00
// Create the correct DatItem
2024-02-28 19:19:50 -05:00
DatItem? internalDatItem;
if (internalFileInfo == null)
internalDatItem = null;
2024-12-28 20:15:32 -05:00
#if NET20 || NET35
2025-01-05 21:51:35 -05:00
else if (internalFileInfo is FileTypes.Aaru.AaruFormat && (asFile & TreatAsFile.AaruFormat) == 0)
2024-02-28 22:54:56 -05:00
#else
2025-01-05 21:51:35 -05:00
else if (internalFileInfo is FileTypes.Aaru.AaruFormat && !asFile.HasFlag(TreatAsFile.AaruFormat))
2024-02-28 22:54:56 -05:00
#endif
2020-12-10 11:07:36 -08:00
internalDatItem = new Media(internalFileInfo);
2024-12-28 20:15:32 -05:00
#if NET20 || NET35
2025-01-05 21:51:35 -05:00
else if (internalFileInfo is FileTypes.CHD.CHDFile && (asFile & TreatAsFile.CHD) == 0)
2024-02-28 22:54:56 -05:00
#else
2025-01-05 21:51:35 -05:00
else if (internalFileInfo is FileTypes.CHD.CHDFile && !asFile.HasFlag(TreatAsFile.CHD))
2024-02-28 22:54:56 -05:00
#endif
2020-12-10 11:07:36 -08:00
internalDatItem = new Disk(internalFileInfo);
else
internalDatItem = new Rom(internalFileInfo);
2024-02-28 19:19:50 -05:00
if (internalDatItem != null)
usedExternally = RebuildIndividualFile(datFile, internalDatItem, file, outDir, date, inverse, outputFormat);
2020-12-10 11:07:36 -08:00
}
// Otherwise, loop through the entries and try to match
2024-02-28 19:19:50 -05:00
else if (entries != null)
2020-12-10 11:07:36 -08:00
{
foreach (BaseFile entry in entries)
{
DatItem? internalDatItem = DatItemTool.CreateDatItem(entry);
2024-02-28 19:19:50 -05:00
if (internalDatItem == null)
continue;
2020-12-10 11:07:36 -08:00
usedInternally |= RebuildIndividualFile(datFile, internalDatItem, file, outDir, date, inverse, outputFormat, !isSingleTorrent /* isZip */);
}
}
return usedExternally || usedInternally;
}
/// <summary>
/// Find duplicates and rebuild individual files to output
/// </summary>
/// <param name="datFile">Current DatFile object to rebuild from</param>
/// <param name="datItem">Information for the current file to rebuild from</param>
/// <param name="file">Name of the file to process</param>
/// <param name="outDir">Output directory to use to build to</param>
/// <param name="date">True if the date from the DAT should be used if available, false otherwise</param>
/// <param name="inverse">True if the DAT should be used as a filter instead of a template, false otherwise</param>
/// <param name="outputFormat">Output format that files should be written to</param>
/// <param name="isZip">True if the input file is an archive, false if the file is TGZ/TXZ, null otherwise</param>
/// <returns>True if the file was able to be rebuilt, false otherwise</returns>
2020-12-10 11:58:46 -08:00
private static bool RebuildIndividualFile(
2020-12-10 11:07:36 -08:00
DatFile datFile,
DatItem datItem,
string file,
string outDir,
bool date,
bool inverse,
OutputFormat outputFormat,
bool? isZip = null)
{
// Set the initial output value
bool rebuilt = false;
// If the DatItem is a Disk or Media, force rebuilding to a folder except if TGZ or TXZ
2024-03-10 16:49:07 -04:00
if ((datItem is Disk || datItem is Media)
2020-12-10 11:07:36 -08:00
&& !(outputFormat == OutputFormat.TorrentGzip || outputFormat == OutputFormat.TorrentGzipRomba)
&& !(outputFormat == OutputFormat.TorrentXZ || outputFormat == OutputFormat.TorrentXZRomba))
{
outputFormat = OutputFormat.Folder;
}
2023-04-07 16:13:15 -04:00
// If we have a Disk, File, or Media, change it into a Rom for later use
2024-02-28 19:19:50 -05:00
if (datItem is Disk disk)
datItem = disk.ConvertToRom();
else if (datItem is DatItems.Formats.File fileItem)
datItem = fileItem.ConvertToRom();
else if (datItem is Media media)
datItem = media.ConvertToRom();
2020-12-10 11:07:36 -08:00
// Prepopluate a key string
string crc = (datItem as Rom)!.GetStringFieldValue(Models.Metadata.Rom.CRCKey) ?? string.Empty;
2020-12-10 11:07:36 -08:00
// Try to get the stream for the file
if (!GetFileStream(datItem, file, isZip, out Stream? fileStream) || fileStream == null)
2020-12-10 11:07:36 -08:00
return false;
// If either we have duplicates or we're filtering
if (ShouldRebuild(datFile, datItem, fileStream, inverse, out List<DatItem> dupes))
//if (ShouldRebuildDB(datFile, datItem, fileStream, inverse, out List<DatItem> dupes))
2020-12-10 11:07:36 -08:00
{
// If we have a very specific TGZ->TGZ case, just copy it accordingly
if (RebuildTorrentGzip(datFile, datItem, file, outDir, outputFormat, isZip))
return true;
// If we have a very specific TXZ->TXZ case, just copy it accordingly
if (RebuildTorrentXz(datFile, datItem, file, outDir, outputFormat, isZip))
return true;
2024-07-16 15:15:42 -04:00
// Create a temp file if we're compressing the data after or if there are multiple dupes
string? tempFile = null;
2024-07-16 15:15:42 -04:00
if (outputFormat != OutputFormat.Folder || dupes.Count > 1)
{
tempFile = Path.Combine(outDir, $"tmp{System.Guid.NewGuid()}");
Stream tempStream = System.IO.File.Open(tempFile, FileMode.Create, FileAccess.ReadWrite, FileShare.ReadWrite);
byte[] tempBuffer = new byte[4096 * 128];
int zlen;
while ((zlen = fileStream.Read(tempBuffer, 0, tempBuffer.Length)) > 0)
{
tempStream.Write(tempBuffer, 0, zlen);
tempStream.Flush();
}
fileStream.Dispose();
fileStream = tempStream;
fileStream.Seek(0, SeekOrigin.Begin);
}
2024-07-17 12:16:10 -04:00
logger.User($"{(inverse ? "No matches" : $"{dupes.Count} Matches")} found for '{Path.GetFileName(datItem.GetName() ?? datItem.GetStringFieldValue(Models.Metadata.DatItem.TypeKey).AsEnumValue<ItemType>().AsStringValue())}', rebuilding accordingly...");
2020-12-10 11:07:36 -08:00
rebuilt = true;
// Special case for partial packing mode
bool shouldCheck = false;
if (outputFormat == OutputFormat.Folder && datFile.Header.GetStringFieldValue(Models.Metadata.Header.ForcePackingKey).AsEnumValue<PackingFlag>() == PackingFlag.Partial)
2020-12-10 11:07:36 -08:00
{
shouldCheck = true;
2020-12-14 15:43:01 -08:00
datFile.Items.BucketBy(ItemKey.Machine, DedupeType.None, lower: false);
2020-12-10 11:07:36 -08:00
}
// Now loop through the list and rebuild accordingly
foreach (DatItem item in dupes)
{
2024-02-28 19:19:50 -05:00
// If we don't have a proper machine
2024-06-30 00:02:53 -04:00
var machine = item.GetFieldValue<Machine>(DatItem.MachineKey);
if (machine?.GetStringFieldValue(Models.Metadata.Machine.NameKey) == null)
2024-02-28 19:19:50 -05:00
continue;
2020-12-10 11:07:36 -08:00
// If we should check for the items in the machine
2024-06-30 00:02:53 -04:00
var items = datFile.Items[machine.GetStringFieldValue(Models.Metadata.Machine.NameKey)!];
2024-02-28 19:19:50 -05:00
if (shouldCheck && items!.Count > 1)
2020-12-10 11:07:36 -08:00
outputFormat = OutputFormat.Folder;
2024-02-28 19:19:50 -05:00
else if (shouldCheck && items!.Count == 1)
2020-12-10 11:07:36 -08:00
outputFormat = OutputFormat.ParentFolder;
// Get the output archive, if possible
2025-01-04 22:10:52 -05:00
IParent? outputArchive = GetPreconfiguredFolder(datFile, date, outputFormat);
2020-12-10 11:07:36 -08:00
// Now rebuild to the output file
2024-02-28 19:19:50 -05:00
outputArchive!.Write(fileStream, outDir, (item as Rom)!.ConvertToBaseFile());
2020-12-10 11:07:36 -08:00
}
// Close the input stream
fileStream.Dispose();
// Delete the file if a temp file was created
if (tempFile != null && System.IO.File.Exists(tempFile))
System.IO.File.Delete(tempFile);
2020-12-10 11:07:36 -08:00
}
// Now we want to take care of headers, if applicable
if (datFile.Header.GetStringFieldValue(Models.Metadata.Header.HeaderKey) != null)
2020-12-10 11:07:36 -08:00
{
// Check to see if we have a matching header first
2020-12-10 21:29:17 -08:00
SkipperMatch.Init();
Rule rule = SkipperMatch.GetMatchingRule(fileStream, Path.GetFileNameWithoutExtension(datFile.Header.GetStringFieldValue(Models.Metadata.Header.HeaderKey)!));
2020-12-10 11:07:36 -08:00
// If there's a match, create the new file to write
if (rule.Tests != null && rule.Tests.Length != 0)
2020-12-10 11:07:36 -08:00
{
// If the file could be transformed correctly
MemoryStream transformStream = new();
2020-12-10 11:07:36 -08:00
if (rule.TransformStream(fileStream, transformStream, keepReadOpen: true, keepWriteOpen: true))
{
// Get the file informations that we will be using
2025-01-04 22:50:36 -05:00
HashType[] hashes = [HashType.CRC32, HashType.MD5, HashType.SHA1];
Rom headerless = new(FileTypeTool.GetInfo(transformStream, hashes));
2020-12-10 11:07:36 -08:00
// If we have duplicates and we're not filtering
if (ShouldRebuild(datFile, headerless, transformStream, false, out dupes))
//if (ShouldRebuildDB(datFile, headerless, transformStream, false, out dupes))
2020-12-10 11:07:36 -08:00
{
2024-03-11 16:26:28 -04:00
logger.User($"Headerless matches found for '{Path.GetFileName(datItem.GetName() ?? datItem.GetStringFieldValue(Models.Metadata.DatItem.TypeKey).AsEnumValue<ItemType>().AsStringValue())}', rebuilding accordingly...");
2020-12-10 11:07:36 -08:00
rebuilt = true;
// Now loop through the list and rebuild accordingly
foreach (DatItem item in dupes)
{
// Create a headered item to use as well
datItem.CopyMachineInformation(item);
datItem.SetName($"{datItem.GetName()}_{crc}");
2020-12-10 11:07:36 -08:00
// Get the output archive, if possible
2025-01-04 22:10:52 -05:00
IParent? outputArchive = GetPreconfiguredFolder(datFile, date, outputFormat);
2020-12-10 11:07:36 -08:00
// Now rebuild to the output file
bool eitherSuccess = false;
2024-02-28 19:19:50 -05:00
eitherSuccess |= outputArchive!.Write(transformStream, outDir, (item as Rom)!.ConvertToBaseFile());
eitherSuccess |= outputArchive.Write(fileStream, outDir, (datItem as Rom)!.ConvertToBaseFile());
2020-12-10 11:07:36 -08:00
// Now add the success of either rebuild
rebuilt &= eitherSuccess;
}
}
}
// Dispose of the stream
transformStream?.Dispose();
}
// Dispose of the stream
fileStream?.Dispose();
}
return rebuilt;
}
/// <summary>
/// Get the rebuild state for a given item
/// </summary>
/// <param name="datFile">Current DatFile object to rebuild from</param>
/// <param name="datItem">Information for the current file to rebuild from</param>
/// <param name="stream">Stream representing the input file</param>
/// <param name="inverse">True if the DAT should be used as a filter instead of a template, false otherwise</param>
/// <param name="dupes">Output list of duplicate items to rebuild to</param>
/// <returns>True if the item should be rebuilt, false otherwise</returns>
private static bool ShouldRebuild(DatFile datFile, DatItem datItem, Stream? stream, bool inverse, out List<DatItem> dupes)
2020-12-10 11:07:36 -08:00
{
// Find if the file has duplicates in the DAT
dupes = datFile.Items.GetDuplicates(datItem);
bool hasDuplicates = dupes.Count > 0;
// If we have duplicates but we're filtering
if (hasDuplicates && inverse)
{
return false;
}
// If we have duplicates without filtering
else if (hasDuplicates && !inverse)
{
return true;
}
// If we have no duplicates and we're filtering
else if (!hasDuplicates && inverse)
{
2024-02-28 19:19:50 -05:00
string? machinename = null;
2020-12-10 11:07:36 -08:00
// Get the item from the current file
2025-01-04 22:50:36 -05:00
HashType[] hashes = [HashType.CRC32, HashType.MD5, HashType.SHA1];
Rom item = new(FileTypeTool.GetInfo(stream, hashes));
2024-03-10 16:49:07 -04:00
item.GetFieldValue<Machine>(DatItem.MachineKey)!.SetFieldValue<string?>(Models.Metadata.Machine.DescriptionKey, Path.GetFileNameWithoutExtension(item.GetName()));
item.GetFieldValue<Machine>(DatItem.MachineKey)!.SetFieldValue<string?>(Models.Metadata.Machine.NameKey, Path.GetFileNameWithoutExtension(item.GetName()));
2020-12-10 11:07:36 -08:00
// If we are coming from an archive, set the correct machine name
if (machinename != null)
{
2024-03-10 16:49:07 -04:00
item.GetFieldValue<Machine>(DatItem.MachineKey)!.SetFieldValue<string?>(Models.Metadata.Machine.DescriptionKey, machinename);
item.GetFieldValue<Machine>(DatItem.MachineKey)!.SetFieldValue<string?>(Models.Metadata.Machine.NameKey, machinename);
2020-12-10 11:07:36 -08:00
}
dupes.Add(item);
return true;
}
// If we have no duplicates and we're not filtering
else
{
return false;
}
}
/// <summary>
/// Get the rebuild state for a given item
/// </summary>
/// <param name="datFile">Current DatFile object to rebuild from</param>
/// <param name="datItem">Information for the current file to rebuild from</param>
/// <param name="stream">Stream representing the input file</param>
/// <param name="inverse">True if the DAT should be used as a filter instead of a template, false otherwise</param>
/// <param name="dupes">Output list of duplicate items to rebuild to</param>
/// <returns>True if the item should be rebuilt, false otherwise</returns>
private static bool ShouldRebuildDB(DatFile datFile, KeyValuePair<long, DatItem> datItem, Stream? stream, bool inverse, out Dictionary<long, DatItem> dupes)
{
// Find if the file has duplicates in the DAT
dupes = datFile.ItemsDB.GetDuplicates(datItem);
bool hasDuplicates = dupes.Count > 0;
// If we have duplicates but we're filtering
if (hasDuplicates && inverse)
{
return false;
}
// If we have duplicates without filtering
else if (hasDuplicates && !inverse)
{
return true;
}
// TODO: Figure out how getting a set of duplicates works with IDDB
// If we have no duplicates and we're filtering
else if (!hasDuplicates && inverse)
{
string? machinename = null;
// Get the item from the current file
2025-01-04 22:50:36 -05:00
HashType[] hashes = [HashType.CRC32, HashType.MD5, HashType.SHA1];
var item = new Rom(FileTypeTool.GetInfo(stream, hashes));
// Create a machine for the current item
var machine = new Machine();
machine.SetFieldValue<string?>(Models.Metadata.Machine.DescriptionKey, Path.GetFileNameWithoutExtension(item.GetName()));
machine.SetFieldValue<string?>(Models.Metadata.Machine.NameKey, Path.GetFileNameWithoutExtension(item.GetName()));
long machineIndex = datFile.ItemsDB.AddMachine(machine);
// If we are coming from an archive, set the correct machine name
if (machinename != null)
{
machine.SetFieldValue<string?>(Models.Metadata.Machine.DescriptionKey, machinename);
machine.SetFieldValue<string?>(Models.Metadata.Machine.NameKey, machinename);
}
long index = datFile.ItemsDB.AddItem(item, machineIndex, -1, false);
dupes[index] = item;
return true;
}
2020-12-10 11:07:36 -08:00
// If we have no duplicates and we're not filtering
else
{
return false;
}
}
/// <summary>
/// Rebuild from TorrentGzip to TorrentGzip
/// </summary>
/// <param name="datFile">Current DatFile object to rebuild from</param>
/// <param name="datItem">Information for the current file to rebuild from</param>
/// <param name="file">Name of the file to process</param>
/// <param name="outDir">Output directory to use to build to</param>
/// <param name="outputFormat">Output format that files should be written to</param>
/// <param name="isZip">True if the input file is an archive, false if the file is TGZ, null otherwise</param>
/// <returns>True if rebuilt properly, false otherwise</returns>
2020-12-10 11:58:46 -08:00
private static bool RebuildTorrentGzip(DatFile datFile, DatItem datItem, string file, string outDir, OutputFormat outputFormat, bool? isZip)
2020-12-10 11:07:36 -08:00
{
// If we have a very specific TGZ->TGZ case, just copy it accordingly
GZipArchive tgz = new(file);
2024-02-28 19:19:50 -05:00
BaseFile? tgzRom = tgz.GetTorrentGZFileInfo();
2020-12-10 11:07:36 -08:00
if (isZip == false && tgzRom != null && (outputFormat == OutputFormat.TorrentGzip || outputFormat == OutputFormat.TorrentGzipRomba))
{
logger.User($"Matches found for '{Path.GetFileName(datItem.GetName() ?? string.Empty)}', rebuilding accordingly...");
// Get the proper output path
string sha1 = (datItem as Rom)!.GetStringFieldValue(Models.Metadata.Rom.SHA1Key) ?? string.Empty;
2020-12-10 11:07:36 -08:00
if (outputFormat == OutputFormat.TorrentGzipRomba)
2024-03-10 22:08:08 -04:00
outDir = Path.Combine(outDir, Utilities.GetDepotPath(sha1, datFile.Header.GetFieldValue<DepotInformation?>(DatHeader.OutputDepotKey)?.Depth ?? 0) ?? string.Empty);
2020-12-10 11:07:36 -08:00
else
outDir = Path.Combine(outDir, sha1 + ".gz");
// Make sure the output folder is created
2024-02-28 19:19:50 -05:00
string? dir = Path.GetDirectoryName(outDir);
if (dir != null)
Directory.CreateDirectory(dir);
2020-12-10 11:07:36 -08:00
// Now copy the file over
try
{
2023-04-07 16:13:15 -04:00
System.IO.File.Copy(file, outDir);
2020-12-10 11:07:36 -08:00
return true;
}
catch
{
return false;
}
}
return false;
}
/// <summary>
/// Rebuild from TorrentXz to TorrentXz
/// </summary>
/// <param name="datFile">Current DatFile object to rebuild from</param>
/// <param name="datItem">Information for the current file to rebuild from</param>
/// <param name="file">Name of the file to process</param>
/// <param name="outDir">Output directory to use to build to</param>
/// <param name="outputFormat">Output format that files should be written to</param>
/// <param name="isZip">True if the input file is an archive, false if the file is TXZ, null otherwise</param>
/// <returns>True if rebuilt properly, false otherwise</returns>
2020-12-10 11:58:46 -08:00
private static bool RebuildTorrentXz(DatFile datFile, DatItem datItem, string file, string outDir, OutputFormat outputFormat, bool? isZip)
2020-12-10 11:07:36 -08:00
{
// If we have a very specific TGZ->TGZ case, just copy it accordingly
XZArchive txz = new(file);
2024-02-28 19:19:50 -05:00
BaseFile? txzRom = txz.GetTorrentXZFileInfo();
2020-12-10 11:07:36 -08:00
if (isZip == false && txzRom != null && (outputFormat == OutputFormat.TorrentXZ || outputFormat == OutputFormat.TorrentXZRomba))
{
logger.User($"Matches found for '{Path.GetFileName(datItem.GetName() ?? string.Empty)}', rebuilding accordingly...");
// Get the proper output path
string sha1 = (datItem as Rom)!.GetStringFieldValue(Models.Metadata.Rom.SHA1Key) ?? string.Empty;
2020-12-10 11:07:36 -08:00
if (outputFormat == OutputFormat.TorrentXZRomba)
2024-03-10 22:08:08 -04:00
outDir = Path.Combine(outDir, Utilities.GetDepotPath(sha1, datFile.Header.GetFieldValue<DepotInformation?>(DatHeader.OutputDepotKey)?.Depth ?? 0) ?? string.Empty).Replace(".gz", ".xz");
2020-12-10 11:07:36 -08:00
else
outDir = Path.Combine(outDir, sha1 + ".xz");
// Make sure the output folder is created
2024-02-28 19:19:50 -05:00
string? dir = Path.GetDirectoryName(outDir);
if (dir != null)
Directory.CreateDirectory(dir);
2020-12-10 11:07:36 -08:00
// Now copy the file over
try
{
2023-04-07 16:13:15 -04:00
System.IO.File.Copy(file, outDir);
2020-12-10 11:07:36 -08:00
return true;
}
catch
{
return false;
}
}
return false;
}
/// <summary>
/// Get the Stream related to a file
/// </summary>
/// <param name="datItem">Information for the current file to rebuild from</param>
/// <param name="file">Name of the file to process</param>
/// <param name="isZip">Non-null if the input file is an archive</param>
/// <param name="stream">Output stream representing the opened file</param>
/// <returns>True if the stream opening succeeded, false otherwise</returns>
2024-02-28 19:19:50 -05:00
private static bool GetFileStream(DatItem datItem, string file, bool? isZip, out Stream? stream)
2020-12-10 11:07:36 -08:00
{
// Get a generic stream for the file
stream = null;
// If we have a zipfile, extract the stream to memory
if (isZip != null)
{
BaseArchive? archive = FileTypeTool.CreateArchiveType(file);
2024-07-15 21:44:05 -04:00
if (archive == null)
return false;
try
2024-07-15 21:34:17 -04:00
{
ItemType itemType = datItem.GetStringFieldValue(Models.Metadata.DatItem.TypeKey).AsEnumValue<ItemType>();
2024-07-15 21:37:38 -04:00
(stream, _) = archive.GetEntryStream(datItem.GetName() ?? itemType.AsStringValue() ?? string.Empty);
2024-07-15 21:34:17 -04:00
}
2024-07-15 21:44:05 -04:00
catch
{
// Ignore the exception for now -- usually an over-large file
stream = null;
return false;
}
2020-12-10 11:07:36 -08:00
}
// Otherwise, just open the filestream
else
{
stream = System.IO.File.Open(file, FileMode.Open, FileAccess.Read, FileShare.ReadWrite);
2020-12-10 11:07:36 -08:00
}
// If the stream is null, then continue
if (stream == null)
return false;
// Seek to the beginning of the stream
if (stream.CanSeek)
stream.Seek(0, SeekOrigin.Begin);
return true;
}
/// <summary>
/// Get the default OutputFormat associated with each PackingFlag
/// </summary>
2020-12-10 11:58:46 -08:00
private static OutputFormat GetOutputFormat(PackingFlag packing)
2020-12-10 11:07:36 -08:00
{
return packing switch
{
PackingFlag.Zip => OutputFormat.TorrentZip,
PackingFlag.Unzip => OutputFormat.Folder,
PackingFlag.Partial => OutputFormat.Folder,
PackingFlag.Flat => OutputFormat.ParentFolder,
2024-12-07 11:27:54 -05:00
PackingFlag.FileOnly => OutputFormat.Folder,
2020-12-10 11:07:36 -08:00
PackingFlag.None => OutputFormat.Folder,
_ => OutputFormat.Folder,
};
}
/// <summary>
/// Get preconfigured Folder for rebuilding
/// </summary>
/// <param name="datFile">Current DatFile object to rebuild from</param>
/// <param name="date">True if the date from the DAT should be used if available, false otherwise</param>
/// <param name="outputFormat">Output format that files should be written to</param>
/// <returns>Folder configured with proper flags</returns>
2025-01-04 22:10:52 -05:00
private static IParent? GetPreconfiguredFolder(DatFile datFile, bool date, OutputFormat outputFormat)
2020-12-10 11:07:36 -08:00
{
2025-01-04 22:10:52 -05:00
IParent? outputArchive = FileTypeTool.CreateFolderType(outputFormat);
2020-12-10 11:07:36 -08:00
if (outputArchive is BaseArchive baseArchive && date)
2025-01-04 21:40:45 -05:00
baseArchive.SetRealDates(date);
2020-12-10 11:07:36 -08:00
// Set the depth fields where appropriate
if (outputArchive is GZipArchive gzipArchive)
2024-03-10 22:08:08 -04:00
gzipArchive.Depth = datFile.Header.GetFieldValue<DepotInformation?>(DatHeader.OutputDepotKey)?.Depth ?? 0;
2020-12-10 11:07:36 -08:00
else if (outputArchive is XZArchive xzArchive)
2024-03-10 22:08:08 -04:00
xzArchive.Depth = datFile.Header.GetFieldValue<DepotInformation?>(DatHeader.OutputDepotKey)?.Depth ?? 0;
2020-12-10 11:07:36 -08:00
return outputArchive;
}
2024-02-28 22:54:56 -05:00
2020-12-10 11:07:36 -08:00
/// <summary>
/// Get string value from input OutputFormat
/// </summary>
/// <param name="itemType">OutputFormat to get value from</param>
/// <returns>String value corresponding to the OutputFormat</returns>
2024-02-28 19:19:50 -05:00
private static string? FromOutputFormat(OutputFormat itemType)
2020-12-10 11:07:36 -08:00
{
return itemType switch
{
OutputFormat.Folder => "directory",
OutputFormat.ParentFolder => "directory",
OutputFormat.TapeArchive => "TAR",
OutputFormat.Torrent7Zip => "Torrent7Z",
OutputFormat.TorrentGzip => "TorrentGZ",
OutputFormat.TorrentGzipRomba => "TorrentGZ",
OutputFormat.TorrentRar => "TorrentRAR",
OutputFormat.TorrentXZ => "TorrentXZ",
OutputFormat.TorrentXZRomba => "TorrentXZ",
OutputFormat.TorrentZip => "TorrentZip",
_ => null,
};
}
}
}