2020-12-10 11:07:36 -08:00
using System.Collections.Generic ;
using System.IO ;
2024-03-05 03:04:47 -05:00
#if NET40_OR_GREATER | | NETCOREAPP
2020-12-10 11:07:36 -08:00
using System.Threading.Tasks ;
2024-03-05 03:04:47 -05:00
#endif
2020-12-10 22:16:53 -08:00
using SabreTools.Core.Tools ;
2020-12-10 23:24:09 -08:00
using SabreTools.DatFiles ;
2020-12-10 11:07:36 -08:00
using SabreTools.DatItems ;
2021-02-02 10:23:43 -08:00
using SabreTools.DatItems.Formats ;
2020-12-10 11:07:36 -08:00
using SabreTools.FileTypes ;
2020-12-10 22:31:23 -08:00
using SabreTools.FileTypes.Archives ;
2024-03-04 23:56:05 -05:00
using SabreTools.Hashing ;
2024-04-24 13:45:38 -04:00
using SabreTools.IO.Extensions ;
2024-10-24 00:36:44 -04:00
using SabreTools.IO.Logging ;
2020-12-10 11:07:36 -08:00
using SabreTools.Skippers ;
2020-12-10 23:24:09 -08:00
namespace SabreTools.DatTools
2020-12-10 11:07:36 -08:00
{
2020-12-21 11:38:56 -08:00
/// <summary>
/// Helper methods for rebuilding from DatFiles
/// </summary>
2020-12-10 14:31:00 -08:00
public class Rebuilder
2020-12-10 11:07:36 -08:00
{
2020-12-10 14:31:00 -08:00
#region Logging
/// <summary>
/// Logging object
/// </summary>
2025-01-08 16:59:44 -05:00
private static readonly Logger _staticLogger = new ( ) ;
2020-12-10 14:31:00 -08:00
#endregion
2020-12-10 11:07:36 -08:00
/// <summary>
/// Process the DAT and find all matches in input files and folders assuming they're a depot
/// </summary>
/// <param name="datFile">Current DatFile object to rebuild from</param>
/// <param name="inputs">List of input files/folders to check</param>
/// <param name="outDir">Output directory to use to build to</param>
/// <param name="date">True if the date from the DAT should be used if available, false otherwise</param>
/// <param name="delete">True if input files should be deleted, false otherwise</param>
/// <param name="inverse">True if the DAT should be used as a filter instead of a template, false otherwise</param>
/// <param name="outputFormat">Output format that files should be written to</param>
/// <returns>True if rebuilding was a success, false otherwise</returns>
2020-12-10 11:58:46 -08:00
public static bool RebuildDepot (
2020-12-10 11:07:36 -08:00
DatFile datFile ,
List < string > inputs ,
string outDir ,
bool date = false ,
bool delete = false ,
bool inverse = false ,
OutputFormat outputFormat = OutputFormat . Folder )
{
#region Perform setup
// If the DAT is not populated and inverse is not set, inform the user and quit
2025-01-12 23:15:30 -05:00
if ( datFile . DatStatistics . TotalCount = = 0 & & ! inverse )
2020-12-10 11:07:36 -08:00
{
2025-01-08 16:59:44 -05:00
_staticLogger . User ( "No entries were found to rebuild, exiting..." ) ;
2020-12-10 11:07:36 -08:00
return false ;
}
// Check that the output directory exists
2020-12-10 22:16:53 -08:00
outDir = outDir . Ensure ( create : true ) ;
2020-12-10 11:07:36 -08:00
// Now we want to get forcepack flag if it's not overridden
2024-07-15 12:48:26 -04:00
PackingFlag forcePacking = datFile . Header . GetStringFieldValue ( Models . Metadata . Header . ForcePackingKey ) . AsEnumValue < PackingFlag > ( ) ;
if ( outputFormat = = OutputFormat . Folder & & forcePacking ! = PackingFlag . None )
outputFormat = GetOutputFormat ( forcePacking ) ;
2020-12-10 11:07:36 -08:00
#endregion
bool success = true ;
#region Rebuild from depots in order
string format = FromOutputFormat ( outputFormat ) ? ? string . Empty ;
2023-04-19 16:39:58 -04:00
InternalStopwatch watch = new ( $"Rebuilding all files to {format}" ) ;
2020-12-10 11:07:36 -08:00
// Now loop through and get only directories from the input paths
2024-02-28 19:19:50 -05:00
List < string > directories = [ ] ;
2024-02-28 22:54:56 -05:00
#if NET452_OR_GREATER | | NETCOREAPP
2024-10-24 05:58:03 -04:00
Parallel . ForEach ( inputs , Core . Globals . ParallelOptions , input = >
2024-02-28 22:54:56 -05:00
#elif NET40_OR_GREATER
Parallel . ForEach ( inputs , input = >
#else
foreach ( var input in inputs )
#endif
2020-12-10 11:07:36 -08:00
{
// Add to the list if the input is a directory
if ( Directory . Exists ( input ) )
{
2025-01-08 16:59:44 -05:00
_staticLogger . Verbose ( $"Adding depot: {input}" ) ;
2020-12-10 11:07:36 -08:00
lock ( directories )
{
directories . Add ( input ) ;
}
}
2024-02-28 21:59:13 -05:00
#if NET40_OR_GREATER | | NETCOREAPP
2020-12-10 11:07:36 -08:00
} ) ;
2024-02-28 21:59:13 -05:00
#else
}
#endif
2020-12-10 11:07:36 -08:00
// If we don't have any directories, we want to exit
if ( directories . Count = = 0 )
return success ;
// Now that we have a list of depots, we want to bucket the input DAT by SHA-1
2025-01-14 20:21:54 -05:00
datFile . BucketBy ( ItemKey . SHA1 ) ;
2020-12-10 11:07:36 -08:00
// Then we want to loop through each of the hashes and see if we can rebuild
2025-01-14 15:59:47 -05:00
foreach ( string hash in datFile . Items . SortedKeys )
2020-12-10 11:07:36 -08:00
{
// Pre-empt any issues that could arise from string length
if ( hash . Length ! = Constants . SHA1Length )
continue ;
2025-01-08 16:59:44 -05:00
_staticLogger . User ( $"Checking hash '{hash}'" ) ;
2020-12-10 11:07:36 -08:00
// Get the extension path for the hash
2025-01-29 22:51:30 -05:00
string? subpath = Utilities . GetDepotPath ( hash , datFile . Modifiers . InputDepot ? . Depth ? ? 0 ) ;
2024-02-28 19:19:50 -05:00
if ( subpath = = null )
continue ;
2020-12-10 11:07:36 -08:00
// Find the first depot that includes the hash
2024-02-28 19:19:50 -05:00
string? foundpath = null ;
2020-12-10 11:07:36 -08:00
foreach ( string directory in directories )
{
2023-04-07 16:13:15 -04:00
if ( System . IO . File . Exists ( Path . Combine ( directory , subpath ) ) )
2020-12-10 11:07:36 -08:00
{
foundpath = Path . Combine ( directory , subpath ) ;
break ;
}
}
// If we didn't find a path, then we continue
if ( foundpath = = null )
continue ;
// If we have a path, we want to try to get the rom information
2023-04-19 16:39:58 -04:00
GZipArchive archive = new ( foundpath ) ;
2024-02-28 19:19:50 -05:00
BaseFile ? fileinfo = archive . GetTorrentGZFileInfo ( ) ;
2020-12-10 11:07:36 -08:00
// If the file information is null, then we continue
if ( fileinfo = = null )
continue ;
// Ensure we are sorted correctly (some other calls can change this)
2025-01-12 23:15:30 -05:00
//datFile.BucketBy(ItemKey.SHA1, DedupeType.None);
2020-12-10 11:07:36 -08:00
// If there are no items in the hash, we continue
2025-01-12 23:15:30 -05:00
var items = datFile . GetItemsForBucket ( hash ) ;
2024-02-28 19:19:50 -05:00
if ( items = = null | | items . Count = = 0 )
2020-12-10 11:07:36 -08:00
continue ;
// Otherwise, we rebuild that file to all locations that we need to
bool usedInternally ;
2024-03-11 16:26:28 -04:00
if ( items [ 0 ] . GetStringFieldValue ( Models . Metadata . DatItem . TypeKey ) . AsEnumValue < ItemType > ( ) = = ItemType . Disk )
2025-01-06 11:18:04 -05:00
usedInternally = RebuildIndividualFile ( datFile , fileinfo . ConvertToDisk ( ) , foundpath , outDir , date , inverse , outputFormat , isZip : false ) ;
2024-03-11 16:26:28 -04:00
else if ( items [ 0 ] . GetStringFieldValue ( Models . Metadata . DatItem . TypeKey ) . AsEnumValue < ItemType > ( ) = = ItemType . File )
2025-01-06 11:18:04 -05:00
usedInternally = RebuildIndividualFile ( datFile , fileinfo . ConvertToFile ( ) , foundpath , outDir , date , inverse , outputFormat , isZip : false ) ;
2024-03-11 16:26:28 -04:00
else if ( items [ 0 ] . GetStringFieldValue ( Models . Metadata . DatItem . TypeKey ) . AsEnumValue < ItemType > ( ) = = ItemType . Media )
2025-01-06 11:18:04 -05:00
usedInternally = RebuildIndividualFile ( datFile , fileinfo . ConvertToMedia ( ) , foundpath , outDir , date , inverse , outputFormat , isZip : false ) ;
2020-12-10 11:07:36 -08:00
else
2025-01-06 11:18:04 -05:00
usedInternally = RebuildIndividualFile ( datFile , fileinfo . ConvertToRom ( ) , foundpath , outDir , date , inverse , outputFormat , isZip : false ) ;
2020-12-10 11:07:36 -08:00
// If we are supposed to delete the depot file, do so
if ( delete & & usedInternally )
2023-04-07 16:13:15 -04:00
System . IO . File . Delete ( foundpath ) ;
2020-12-10 11:07:36 -08:00
}
watch . Stop ( ) ;
#endregion
return success ;
}
/// <summary>
/// Process the DAT and find all matches in input files and folders
/// </summary>
/// <param name="datFile">Current DatFile object to rebuild from</param>
/// <param name="inputs">List of input files/folders to check</param>
/// <param name="outDir">Output directory to use to build to</param>
/// <param name="quickScan">True to enable external scanning of archives, false otherwise</param>
/// <param name="date">True if the date from the DAT should be used if available, false otherwise</param>
/// <param name="delete">True if input files should be deleted, false otherwise</param>
/// <param name="inverse">True if the DAT should be used as a filter instead of a template, false otherwise</param>
/// <param name="outputFormat">Output format that files should be written to</param>
2025-01-05 21:51:35 -05:00
/// <param name="asFile">TreatAsFile representing special format scanning</param>
2020-12-10 11:07:36 -08:00
/// <returns>True if rebuilding was a success, false otherwise</returns>
2020-12-10 11:58:46 -08:00
public static bool RebuildGeneric (
2020-12-10 11:07:36 -08:00
DatFile datFile ,
List < string > inputs ,
string outDir ,
bool quickScan = false ,
bool date = false ,
bool delete = false ,
bool inverse = false ,
OutputFormat outputFormat = OutputFormat . Folder ,
2025-01-05 21:51:35 -05:00
TreatAsFile asFile = 0x00 )
2020-12-10 11:07:36 -08:00
{
#region Perform setup
// If the DAT is not populated and inverse is not set, inform the user and quit
2025-01-12 23:15:30 -05:00
if ( datFile . DatStatistics . TotalCount = = 0 & & ! inverse )
2020-12-10 11:07:36 -08:00
{
2025-01-08 16:59:44 -05:00
_staticLogger . User ( "No entries were found to rebuild, exiting..." ) ;
2020-12-10 11:07:36 -08:00
return false ;
}
// Check that the output directory exists
if ( ! Directory . Exists ( outDir ) )
{
Directory . CreateDirectory ( outDir ) ;
outDir = Path . GetFullPath ( outDir ) ;
}
// Now we want to get forcepack flag if it's not overridden
2024-07-15 12:48:26 -04:00
PackingFlag forcePacking = datFile . Header . GetStringFieldValue ( Models . Metadata . Header . ForcePackingKey ) . AsEnumValue < PackingFlag > ( ) ;
if ( outputFormat = = OutputFormat . Folder & & forcePacking ! = PackingFlag . None )
outputFormat = GetOutputFormat ( forcePacking ) ;
2020-12-10 11:07:36 -08:00
#endregion
bool success = true ;
#region Rebuild from sources in order
string format = FromOutputFormat ( outputFormat ) ? ? string . Empty ;
2023-04-19 16:39:58 -04:00
InternalStopwatch watch = new ( $"Rebuilding all files to {format}" ) ;
2020-12-10 11:07:36 -08:00
// Now loop through all of the files in all of the inputs
foreach ( string input in inputs )
{
// If the input is a file
2023-04-07 16:13:15 -04:00
if ( System . IO . File . Exists ( input ) )
2020-12-10 11:07:36 -08:00
{
2025-01-08 16:59:44 -05:00
_staticLogger . User ( $"Checking file: {input}" ) ;
2025-01-05 21:51:35 -05:00
bool rebuilt = RebuildGenericHelper ( datFile , input , outDir , quickScan , date , inverse , outputFormat , asFile ) ;
2020-12-10 11:07:36 -08:00
// If we are supposed to delete the file, do so
if ( delete & & rebuilt )
2023-04-07 16:13:15 -04:00
System . IO . File . Delete ( input ) ;
2020-12-10 11:07:36 -08:00
}
// If the input is a directory
else if ( Directory . Exists ( input ) )
{
2025-01-08 16:59:44 -05:00
_staticLogger . Verbose ( $"Checking directory: {input}" ) ;
2024-02-29 00:14:16 -05:00
#if NET20 | | NET35
foreach ( string file in Directory . GetFiles ( input , "*" ) )
#else
2020-12-10 11:07:36 -08:00
foreach ( string file in Directory . EnumerateFiles ( input , "*" , SearchOption . AllDirectories ) )
2024-02-29 00:14:16 -05:00
#endif
2020-12-10 11:07:36 -08:00
{
2025-01-08 16:59:44 -05:00
_staticLogger . User ( $"Checking file: {file}" ) ;
2025-01-05 21:51:35 -05:00
bool rebuilt = RebuildGenericHelper ( datFile , file , outDir , quickScan , date , inverse , outputFormat , asFile ) ;
2020-12-10 11:07:36 -08:00
// If we are supposed to delete the file, do so
if ( delete & & rebuilt )
2023-04-18 12:02:09 -04:00
System . IO . File . Delete ( file ) ;
2020-12-10 11:07:36 -08:00
}
}
}
watch . Stop ( ) ;
#endregion
return success ;
}
/// <summary>
/// Attempt to add a file to the output if it matches
/// </summary>
/// <param name="datFile">Current DatFile object to rebuild from</param>
/// <param name="file">Name of the file to process</param>
/// <param name="outDir">Output directory to use to build to</param>
/// <param name="quickScan">True to enable external scanning of archives, false otherwise</param>
/// <param name="date">True if the date from the DAT should be used if available, false otherwise</param>
/// <param name="inverse">True if the DAT should be used as a filter instead of a template, false otherwise</param>
/// <param name="outputFormat">Output format that files should be written to</param>
2025-01-05 21:51:35 -05:00
/// <param name="asFile">TreatAsFile representing special format scanning</param>
2020-12-10 11:07:36 -08:00
/// <returns>True if the file was used to rebuild, false otherwise</returns>
2020-12-10 11:58:46 -08:00
private static bool RebuildGenericHelper (
2020-12-10 11:07:36 -08:00
DatFile datFile ,
string file ,
string outDir ,
bool quickScan ,
bool date ,
bool inverse ,
OutputFormat outputFormat ,
2025-01-05 21:51:35 -05:00
TreatAsFile asFile )
2020-12-10 11:07:36 -08:00
{
// If we somehow have a null filename, return
if ( file = = null )
return false ;
// Set the deletion variables
bool usedExternally = false , usedInternally = false ;
// Create an empty list of BaseFile for archive entries
2024-02-28 19:19:50 -05:00
List < BaseFile > ? entries = null ;
2020-12-10 11:07:36 -08:00
// Get the TGZ and TXZ status for later
2023-04-19 16:39:58 -04:00
GZipArchive tgz = new ( file ) ;
XZArchive txz = new ( file ) ;
2025-01-04 21:42:46 -05:00
bool isSingleTorrent = tgz . IsStandardized ( ) | | txz . IsStandardized ( ) ;
2020-12-10 11:07:36 -08:00
// Get the base archive first
2025-01-04 21:17:02 -05:00
BaseArchive ? archive = FileTypeTool . CreateArchiveType ( file ) ;
2020-12-10 11:07:36 -08:00
// Now get all extracted items from the archive
2025-01-04 21:32:46 -05:00
HashType [ ] hashTypes = quickScan ? [ HashType . CRC32 ] : [ HashType . CRC32 , HashType . MD5 , HashType . SHA1 ] ;
2020-12-10 11:07:36 -08:00
if ( archive ! = null )
{
2025-01-04 21:32:46 -05:00
archive . SetHashTypes ( hashTypes ) ;
2020-12-10 11:07:36 -08:00
entries = archive . GetChildren ( ) ;
}
// If the entries list is null, we encountered an error or have a file and should scan externally
2023-04-07 16:13:15 -04:00
if ( entries = = null & & System . IO . File . Exists ( file ) )
2020-12-10 11:07:36 -08:00
{
2025-01-05 21:35:06 -05:00
BaseFile ? internalFileInfo = FileTypeTool . GetInfo ( file , hashTypes ) ;
2020-12-10 11:07:36 -08:00
// Create the correct DatItem
2024-02-28 19:19:50 -05:00
DatItem ? internalDatItem ;
if ( internalFileInfo = = null )
internalDatItem = null ;
2024-12-28 20:15:32 -05:00
#if NET20 | | NET35
2025-01-05 21:51:35 -05:00
else if ( internalFileInfo is FileTypes . Aaru . AaruFormat & & ( asFile & TreatAsFile . AaruFormat ) = = 0 )
2024-02-28 22:54:56 -05:00
#else
2025-01-05 21:51:35 -05:00
else if ( internalFileInfo is FileTypes . Aaru . AaruFormat & & ! asFile . HasFlag ( TreatAsFile . AaruFormat ) )
2024-02-28 22:54:56 -05:00
#endif
2025-01-06 11:18:04 -05:00
internalDatItem = internalFileInfo . ConvertToMedia ( ) ;
2024-12-28 20:15:32 -05:00
#if NET20 | | NET35
2025-01-05 21:51:35 -05:00
else if ( internalFileInfo is FileTypes . CHD . CHDFile & & ( asFile & TreatAsFile . CHD ) = = 0 )
2024-02-28 22:54:56 -05:00
#else
2025-01-05 21:51:35 -05:00
else if ( internalFileInfo is FileTypes . CHD . CHDFile & & ! asFile . HasFlag ( TreatAsFile . CHD ) )
2024-02-28 22:54:56 -05:00
#endif
2025-01-06 11:18:04 -05:00
internalDatItem = internalFileInfo . ConvertToDisk ( ) ;
2020-12-10 11:07:36 -08:00
else
2025-01-06 11:18:04 -05:00
internalDatItem = internalFileInfo . ConvertToRom ( ) ;
2020-12-10 11:07:36 -08:00
2024-02-28 19:19:50 -05:00
if ( internalDatItem ! = null )
usedExternally = RebuildIndividualFile ( datFile , internalDatItem , file , outDir , date , inverse , outputFormat ) ;
2020-12-10 11:07:36 -08:00
}
// Otherwise, loop through the entries and try to match
2024-02-28 19:19:50 -05:00
else if ( entries ! = null )
2020-12-10 11:07:36 -08:00
{
foreach ( BaseFile entry in entries )
{
2025-01-05 22:16:44 -05:00
DatItem ? internalDatItem = DatItemTool . CreateDatItem ( entry ) ;
2024-02-28 19:19:50 -05:00
if ( internalDatItem = = null )
continue ;
2020-12-10 11:07:36 -08:00
usedInternally | = RebuildIndividualFile ( datFile , internalDatItem , file , outDir , date , inverse , outputFormat , ! isSingleTorrent /* isZip */ ) ;
}
}
return usedExternally | | usedInternally ;
}
/// <summary>
/// Find duplicates and rebuild individual files to output
/// </summary>
/// <param name="datFile">Current DatFile object to rebuild from</param>
/// <param name="datItem">Information for the current file to rebuild from</param>
/// <param name="file">Name of the file to process</param>
/// <param name="outDir">Output directory to use to build to</param>
/// <param name="date">True if the date from the DAT should be used if available, false otherwise</param>
/// <param name="inverse">True if the DAT should be used as a filter instead of a template, false otherwise</param>
/// <param name="outputFormat">Output format that files should be written to</param>
/// <param name="isZip">True if the input file is an archive, false if the file is TGZ/TXZ, null otherwise</param>
/// <returns>True if the file was able to be rebuilt, false otherwise</returns>
2020-12-10 11:58:46 -08:00
private static bool RebuildIndividualFile (
2020-12-10 11:07:36 -08:00
DatFile datFile ,
DatItem datItem ,
string file ,
string outDir ,
bool date ,
bool inverse ,
OutputFormat outputFormat ,
bool? isZip = null )
{
// Set the initial output value
bool rebuilt = false ;
// If the DatItem is a Disk or Media, force rebuilding to a folder except if TGZ or TXZ
2024-03-10 16:49:07 -04:00
if ( ( datItem is Disk | | datItem is Media )
2020-12-10 11:07:36 -08:00
& & ! ( outputFormat = = OutputFormat . TorrentGzip | | outputFormat = = OutputFormat . TorrentGzipRomba )
& & ! ( outputFormat = = OutputFormat . TorrentXZ | | outputFormat = = OutputFormat . TorrentXZRomba ) )
{
outputFormat = OutputFormat . Folder ;
}
2023-04-07 16:13:15 -04:00
// If we have a Disk, File, or Media, change it into a Rom for later use
2024-02-28 19:19:50 -05:00
if ( datItem is Disk disk )
datItem = disk . ConvertToRom ( ) ;
else if ( datItem is DatItems . Formats . File fileItem )
datItem = fileItem . ConvertToRom ( ) ;
else if ( datItem is Media media )
datItem = media . ConvertToRom ( ) ;
2020-12-10 11:07:36 -08:00
// Prepopluate a key string
2024-03-11 15:46:44 -04:00
string crc = ( datItem as Rom ) ! . GetStringFieldValue ( Models . Metadata . Rom . CRCKey ) ? ? string . Empty ;
2020-12-10 11:07:36 -08:00
// Try to get the stream for the file
2024-07-16 15:09:51 -04:00
if ( ! GetFileStream ( datItem , file , isZip , out Stream ? fileStream ) | | fileStream = = null )
2020-12-10 11:07:36 -08:00
return false ;
// If either we have duplicates or we're filtering
2024-10-30 10:59:04 -04:00
if ( ShouldRebuild ( datFile , datItem , fileStream , inverse , out List < DatItem > dupes ) )
//if (ShouldRebuildDB(datFile, datItem, fileStream, inverse, out List<DatItem> dupes))
2020-12-10 11:07:36 -08:00
{
// If we have a very specific TGZ->TGZ case, just copy it accordingly
if ( RebuildTorrentGzip ( datFile , datItem , file , outDir , outputFormat , isZip ) )
return true ;
// If we have a very specific TXZ->TXZ case, just copy it accordingly
if ( RebuildTorrentXz ( datFile , datItem , file , outDir , outputFormat , isZip ) )
return true ;
2024-07-16 15:15:42 -04:00
// Create a temp file if we're compressing the data after or if there are multiple dupes
2024-07-16 15:09:51 -04:00
string? tempFile = null ;
2024-07-16 15:15:42 -04:00
if ( outputFormat ! = OutputFormat . Folder | | dupes . Count > 1 )
2024-07-16 15:09:51 -04:00
{
tempFile = Path . Combine ( outDir , $"tmp{System.Guid.NewGuid()}" ) ;
Stream tempStream = System . IO . File . Open ( tempFile , FileMode . Create , FileAccess . ReadWrite , FileShare . ReadWrite ) ;
byte [ ] tempBuffer = new byte [ 4096 * 128 ] ;
int zlen ;
while ( ( zlen = fileStream . Read ( tempBuffer , 0 , tempBuffer . Length ) ) > 0 )
{
tempStream . Write ( tempBuffer , 0 , zlen ) ;
tempStream . Flush ( ) ;
}
fileStream . Dispose ( ) ;
fileStream = tempStream ;
fileStream . Seek ( 0 , SeekOrigin . Begin ) ;
}
2025-01-08 16:59:44 -05:00
_staticLogger . User ( $"{(inverse ? " No matches " : $" { dupes . Count } Matches ")} found for '{Path.GetFileName(datItem.GetName() ?? datItem.GetStringFieldValue(Models.Metadata.DatItem.TypeKey).AsEnumValue<ItemType>().AsStringValue())}', rebuilding accordingly..." ) ;
2020-12-10 11:07:36 -08:00
rebuilt = true ;
// Special case for partial packing mode
bool shouldCheck = false ;
2024-03-11 21:30:24 -04:00
if ( outputFormat = = OutputFormat . Folder & & datFile . Header . GetStringFieldValue ( Models . Metadata . Header . ForcePackingKey ) . AsEnumValue < PackingFlag > ( ) = = PackingFlag . Partial )
2020-12-10 11:07:36 -08:00
{
shouldCheck = true ;
2025-01-14 20:21:54 -05:00
datFile . BucketBy ( ItemKey . Machine , lower : false ) ;
2020-12-10 11:07:36 -08:00
}
// Now loop through the list and rebuild accordingly
foreach ( DatItem item in dupes )
{
2024-02-28 19:19:50 -05:00
// If we don't have a proper machine
2025-05-02 16:46:20 -04:00
var machine = item . GetMachine ( ) ;
2025-05-02 16:05:08 -04:00
if ( machine ? . GetName ( ) = = null )
2024-02-28 19:19:50 -05:00
continue ;
2020-12-10 11:07:36 -08:00
// If we should check for the items in the machine
2025-05-02 16:05:08 -04:00
var items = datFile . GetItemsForBucket ( machine . GetName ( ) ) ;
2024-02-28 19:19:50 -05:00
if ( shouldCheck & & items ! . Count > 1 )
2020-12-10 11:07:36 -08:00
outputFormat = OutputFormat . Folder ;
2024-02-28 19:19:50 -05:00
else if ( shouldCheck & & items ! . Count = = 1 )
2020-12-10 11:07:36 -08:00
outputFormat = OutputFormat . ParentFolder ;
// Get the output archive, if possible
2025-01-04 22:10:52 -05:00
IParent ? outputArchive = GetPreconfiguredFolder ( datFile , date , outputFormat ) ;
2020-12-10 11:07:36 -08:00
// Now rebuild to the output file
2024-02-28 19:19:50 -05:00
outputArchive ! . Write ( fileStream , outDir , ( item as Rom ) ! . ConvertToBaseFile ( ) ) ;
2020-12-10 11:07:36 -08:00
}
// Close the input stream
2024-07-16 15:09:51 -04:00
fileStream . Dispose ( ) ;
// Delete the file if a temp file was created
if ( tempFile ! = null & & System . IO . File . Exists ( tempFile ) )
System . IO . File . Delete ( tempFile ) ;
2020-12-10 11:07:36 -08:00
}
// Now we want to take care of headers, if applicable
2024-03-11 15:46:44 -04:00
if ( datFile . Header . GetStringFieldValue ( Models . Metadata . Header . HeaderKey ) ! = null )
2020-12-10 11:07:36 -08:00
{
// Check to see if we have a matching header first
2020-12-10 21:29:17 -08:00
SkipperMatch . Init ( ) ;
2024-03-11 15:46:44 -04:00
Rule rule = SkipperMatch . GetMatchingRule ( fileStream , Path . GetFileNameWithoutExtension ( datFile . Header . GetStringFieldValue ( Models . Metadata . Header . HeaderKey ) ! ) ) ;
2020-12-10 11:07:36 -08:00
// If there's a match, create the new file to write
2023-04-04 18:31:19 -04:00
if ( rule . Tests ! = null & & rule . Tests . Length ! = 0 )
2020-12-10 11:07:36 -08:00
{
// If the file could be transformed correctly
2023-04-19 16:39:58 -04:00
MemoryStream transformStream = new ( ) ;
2020-12-10 11:07:36 -08:00
if ( rule . TransformStream ( fileStream , transformStream , keepReadOpen : true , keepWriteOpen : true ) )
{
// Get the file informations that we will be using
2025-01-04 22:50:36 -05:00
HashType [ ] hashes = [ HashType . CRC32 , HashType . MD5 , HashType . SHA1 ] ;
2025-01-06 11:18:04 -05:00
Rom headerless = FileTypeTool . GetInfo ( transformStream , hashes ) . ConvertToRom ( ) ;
2020-12-10 11:07:36 -08:00
// If we have duplicates and we're not filtering
if ( ShouldRebuild ( datFile , headerless , transformStream , false , out dupes ) )
2024-03-20 00:47:40 -04:00
//if (ShouldRebuildDB(datFile, headerless, transformStream, false, out dupes))
2020-12-10 11:07:36 -08:00
{
2025-01-08 16:59:44 -05:00
_staticLogger . User ( $"Headerless matches found for '{Path.GetFileName(datItem.GetName() ?? datItem.GetStringFieldValue(Models.Metadata.DatItem.TypeKey).AsEnumValue<ItemType>().AsStringValue())}', rebuilding accordingly..." ) ;
2020-12-10 11:07:36 -08:00
rebuilt = true ;
// Now loop through the list and rebuild accordingly
foreach ( DatItem item in dupes )
{
// Create a headered item to use as well
datItem . CopyMachineInformation ( item ) ;
2020-12-14 10:11:20 -08:00
datItem . SetName ( $"{datItem.GetName()}_{crc}" ) ;
2020-12-10 11:07:36 -08:00
// Get the output archive, if possible
2025-01-04 22:10:52 -05:00
IParent ? outputArchive = GetPreconfiguredFolder ( datFile , date , outputFormat ) ;
2020-12-10 11:07:36 -08:00
// Now rebuild to the output file
bool eitherSuccess = false ;
2024-02-28 19:19:50 -05:00
eitherSuccess | = outputArchive ! . Write ( transformStream , outDir , ( item as Rom ) ! . ConvertToBaseFile ( ) ) ;
eitherSuccess | = outputArchive . Write ( fileStream , outDir , ( datItem as Rom ) ! . ConvertToBaseFile ( ) ) ;
2020-12-10 11:07:36 -08:00
// Now add the success of either rebuild
rebuilt & = eitherSuccess ;
}
}
}
// Dispose of the stream
transformStream ? . Dispose ( ) ;
}
// Dispose of the stream
fileStream ? . Dispose ( ) ;
}
return rebuilt ;
}
/// <summary>
/// Get the rebuild state for a given item
/// </summary>
/// <param name="datFile">Current DatFile object to rebuild from</param>
/// <param name="datItem">Information for the current file to rebuild from</param>
/// <param name="stream">Stream representing the input file</param>
/// <param name="inverse">True if the DAT should be used as a filter instead of a template, false otherwise</param>
/// <param name="dupes">Output list of duplicate items to rebuild to</param>
/// <returns>True if the item should be rebuilt, false otherwise</returns>
2024-10-30 10:59:04 -04:00
private static bool ShouldRebuild ( DatFile datFile , DatItem datItem , Stream ? stream , bool inverse , out List < DatItem > dupes )
2020-12-10 11:07:36 -08:00
{
// Find if the file has duplicates in the DAT
2025-01-12 23:15:30 -05:00
dupes = datFile . GetDuplicates ( datItem ) ;
2020-12-10 11:07:36 -08:00
bool hasDuplicates = dupes . Count > 0 ;
// If we have duplicates but we're filtering
if ( hasDuplicates & & inverse )
{
return false ;
}
// If we have duplicates without filtering
else if ( hasDuplicates & & ! inverse )
{
return true ;
}
// If we have no duplicates and we're filtering
else if ( ! hasDuplicates & & inverse )
{
2024-02-28 19:19:50 -05:00
string? machinename = null ;
2020-12-10 11:07:36 -08:00
// Get the item from the current file
2025-01-04 22:50:36 -05:00
HashType [ ] hashes = [ HashType . CRC32 , HashType . MD5 , HashType . SHA1 ] ;
2025-01-06 11:18:04 -05:00
Rom item = FileTypeTool . GetInfo ( stream , hashes ) . ConvertToRom ( ) ;
2025-05-02 16:46:20 -04:00
item . GetMachine ( ) ! . SetFieldValue < string? > ( Models . Metadata . Machine . DescriptionKey , Path . GetFileNameWithoutExtension ( item . GetName ( ) ) ) ;
item . GetMachine ( ) ! . SetName ( Path . GetFileNameWithoutExtension ( item . GetName ( ) ) ) ;
2020-12-10 11:07:36 -08:00
// If we are coming from an archive, set the correct machine name
if ( machinename ! = null )
{
2025-05-02 16:46:20 -04:00
item . GetMachine ( ) ! . SetFieldValue < string? > ( Models . Metadata . Machine . DescriptionKey , machinename ) ;
item . GetMachine ( ) ! . SetName ( machinename ) ;
2020-12-10 11:07:36 -08:00
}
dupes . Add ( item ) ;
return true ;
}
2024-03-20 00:47:40 -04:00
// If we have no duplicates and we're not filtering
else
{
return false ;
}
}
/// <summary>
/// Get the rebuild state for a given item
/// </summary>
/// <param name="datFile">Current DatFile object to rebuild from</param>
/// <param name="datItem">Information for the current file to rebuild from</param>
/// <param name="stream">Stream representing the input file</param>
/// <param name="inverse">True if the DAT should be used as a filter instead of a template, false otherwise</param>
/// <param name="dupes">Output list of duplicate items to rebuild to</param>
/// <returns>True if the item should be rebuilt, false otherwise</returns>
2024-12-06 23:16:09 -05:00
private static bool ShouldRebuildDB ( DatFile datFile , KeyValuePair < long , DatItem > datItem , Stream ? stream , bool inverse , out Dictionary < long , DatItem > dupes )
2024-03-20 00:47:40 -04:00
{
// Find if the file has duplicates in the DAT
2025-01-12 23:15:30 -05:00
dupes = datFile . GetDuplicatesDB ( datItem ) ;
2024-03-20 00:47:40 -04:00
bool hasDuplicates = dupes . Count > 0 ;
// If we have duplicates but we're filtering
if ( hasDuplicates & & inverse )
{
return false ;
}
// If we have duplicates without filtering
else if ( hasDuplicates & & ! inverse )
{
return true ;
}
// TODO: Figure out how getting a set of duplicates works with IDDB
// If we have no duplicates and we're filtering
else if ( ! hasDuplicates & & inverse )
{
string? machinename = null ;
// Get the item from the current file
2025-01-04 22:50:36 -05:00
HashType [ ] hashes = [ HashType . CRC32 , HashType . MD5 , HashType . SHA1 ] ;
2025-01-06 11:18:04 -05:00
Rom item = FileTypeTool . GetInfo ( stream , hashes ) . ConvertToRom ( ) ;
2024-03-20 00:47:40 -04:00
// Create a machine for the current item
var machine = new Machine ( ) ;
machine . SetFieldValue < string? > ( Models . Metadata . Machine . DescriptionKey , Path . GetFileNameWithoutExtension ( item . GetName ( ) ) ) ;
2025-05-02 16:05:08 -04:00
machine . SetName ( Path . GetFileNameWithoutExtension ( item . GetName ( ) ) ) ;
2025-01-12 22:10:48 -05:00
long machineIndex = datFile . AddMachineDB ( machine ) ;
2024-03-20 00:47:40 -04:00
// If we are coming from an archive, set the correct machine name
if ( machinename ! = null )
{
machine . SetFieldValue < string? > ( Models . Metadata . Machine . DescriptionKey , machinename ) ;
2025-05-02 16:05:08 -04:00
machine . SetName ( machinename ) ;
2024-03-20 00:47:40 -04:00
}
2025-01-12 22:10:48 -05:00
long index = datFile . AddItemDB ( item , machineIndex , - 1 , false ) ;
2024-12-06 23:16:09 -05:00
dupes [ index ] = item ;
2024-03-20 00:47:40 -04:00
return true ;
}
2020-12-10 11:07:36 -08:00
// If we have no duplicates and we're not filtering
else
{
return false ;
}
}
/// <summary>
/// Rebuild from TorrentGzip to TorrentGzip
/// </summary>
/// <param name="datFile">Current DatFile object to rebuild from</param>
/// <param name="datItem">Information for the current file to rebuild from</param>
/// <param name="file">Name of the file to process</param>
/// <param name="outDir">Output directory to use to build to</param>
/// <param name="outputFormat">Output format that files should be written to</param>
/// <param name="isZip">True if the input file is an archive, false if the file is TGZ, null otherwise</param>
/// <returns>True if rebuilt properly, false otherwise</returns>
2020-12-10 11:58:46 -08:00
private static bool RebuildTorrentGzip ( DatFile datFile , DatItem datItem , string file , string outDir , OutputFormat outputFormat , bool? isZip )
2020-12-10 11:07:36 -08:00
{
// If we have a very specific TGZ->TGZ case, just copy it accordingly
2023-04-19 16:39:58 -04:00
GZipArchive tgz = new ( file ) ;
2024-02-28 19:19:50 -05:00
BaseFile ? tgzRom = tgz . GetTorrentGZFileInfo ( ) ;
2020-12-10 11:07:36 -08:00
if ( isZip = = false & & tgzRom ! = null & & ( outputFormat = = OutputFormat . TorrentGzip | | outputFormat = = OutputFormat . TorrentGzipRomba ) )
{
2025-01-08 16:59:44 -05:00
_staticLogger . User ( $"Matches found for '{Path.GetFileName(datItem.GetName() ?? string.Empty)}', rebuilding accordingly..." ) ;
2020-12-10 11:07:36 -08:00
// Get the proper output path
2024-03-11 15:46:44 -04:00
string sha1 = ( datItem as Rom ) ! . GetStringFieldValue ( Models . Metadata . Rom . SHA1Key ) ? ? string . Empty ;
2020-12-10 11:07:36 -08:00
if ( outputFormat = = OutputFormat . TorrentGzipRomba )
2025-01-29 22:51:30 -05:00
outDir = Path . Combine ( outDir , Utilities . GetDepotPath ( sha1 , datFile . Modifiers . OutputDepot ? . Depth ? ? 0 ) ? ? string . Empty ) ;
2020-12-10 11:07:36 -08:00
else
outDir = Path . Combine ( outDir , sha1 + ".gz" ) ;
// Make sure the output folder is created
2024-02-28 19:19:50 -05:00
string? dir = Path . GetDirectoryName ( outDir ) ;
if ( dir ! = null )
Directory . CreateDirectory ( dir ) ;
2020-12-10 11:07:36 -08:00
// Now copy the file over
try
{
2023-04-07 16:13:15 -04:00
System . IO . File . Copy ( file , outDir ) ;
2020-12-10 11:07:36 -08:00
return true ;
}
catch
{
return false ;
}
}
return false ;
}
/// <summary>
/// Rebuild from TorrentXz to TorrentXz
/// </summary>
/// <param name="datFile">Current DatFile object to rebuild from</param>
/// <param name="datItem">Information for the current file to rebuild from</param>
/// <param name="file">Name of the file to process</param>
/// <param name="outDir">Output directory to use to build to</param>
/// <param name="outputFormat">Output format that files should be written to</param>
/// <param name="isZip">True if the input file is an archive, false if the file is TXZ, null otherwise</param>
/// <returns>True if rebuilt properly, false otherwise</returns>
2020-12-10 11:58:46 -08:00
private static bool RebuildTorrentXz ( DatFile datFile , DatItem datItem , string file , string outDir , OutputFormat outputFormat , bool? isZip )
2020-12-10 11:07:36 -08:00
{
// If we have a very specific TGZ->TGZ case, just copy it accordingly
2023-04-19 16:39:58 -04:00
XZArchive txz = new ( file ) ;
2024-02-28 19:19:50 -05:00
BaseFile ? txzRom = txz . GetTorrentXZFileInfo ( ) ;
2020-12-10 11:07:36 -08:00
if ( isZip = = false & & txzRom ! = null & & ( outputFormat = = OutputFormat . TorrentXZ | | outputFormat = = OutputFormat . TorrentXZRomba ) )
{
2025-01-08 16:59:44 -05:00
_staticLogger . User ( $"Matches found for '{Path.GetFileName(datItem.GetName() ?? string.Empty)}', rebuilding accordingly..." ) ;
2020-12-10 11:07:36 -08:00
// Get the proper output path
2024-03-11 15:46:44 -04:00
string sha1 = ( datItem as Rom ) ! . GetStringFieldValue ( Models . Metadata . Rom . SHA1Key ) ? ? string . Empty ;
2020-12-10 11:07:36 -08:00
if ( outputFormat = = OutputFormat . TorrentXZRomba )
2025-01-29 22:51:30 -05:00
outDir = Path . Combine ( outDir , Utilities . GetDepotPath ( sha1 , datFile . Modifiers . OutputDepot ? . Depth ? ? 0 ) ? ? string . Empty ) . Replace ( ".gz" , ".xz" ) ;
2020-12-10 11:07:36 -08:00
else
outDir = Path . Combine ( outDir , sha1 + ".xz" ) ;
// Make sure the output folder is created
2024-02-28 19:19:50 -05:00
string? dir = Path . GetDirectoryName ( outDir ) ;
if ( dir ! = null )
Directory . CreateDirectory ( dir ) ;
2020-12-10 11:07:36 -08:00
// Now copy the file over
try
{
2023-04-07 16:13:15 -04:00
System . IO . File . Copy ( file , outDir ) ;
2020-12-10 11:07:36 -08:00
return true ;
}
catch
{
return false ;
}
}
return false ;
}
/// <summary>
/// Get the Stream related to a file
/// </summary>
/// <param name="datItem">Information for the current file to rebuild from</param>
/// <param name="file">Name of the file to process</param>
/// <param name="isZip">Non-null if the input file is an archive</param>
/// <param name="stream">Output stream representing the opened file</param>
/// <returns>True if the stream opening succeeded, false otherwise</returns>
2024-02-28 19:19:50 -05:00
private static bool GetFileStream ( DatItem datItem , string file , bool? isZip , out Stream ? stream )
2020-12-10 11:07:36 -08:00
{
// Get a generic stream for the file
stream = null ;
// If we have a zipfile, extract the stream to memory
if ( isZip ! = null )
{
2025-01-04 21:17:02 -05:00
BaseArchive ? archive = FileTypeTool . CreateArchiveType ( file ) ;
2024-07-15 21:44:05 -04:00
if ( archive = = null )
return false ;
try
2024-07-15 21:34:17 -04:00
{
ItemType itemType = datItem . GetStringFieldValue ( Models . Metadata . DatItem . TypeKey ) . AsEnumValue < ItemType > ( ) ;
2024-07-15 21:37:38 -04:00
( stream , _ ) = archive . GetEntryStream ( datItem . GetName ( ) ? ? itemType . AsStringValue ( ) ? ? string . Empty ) ;
2024-07-15 21:34:17 -04:00
}
2024-07-15 21:44:05 -04:00
catch
{
// Ignore the exception for now -- usually an over-large file
stream = null ;
return false ;
}
2020-12-10 11:07:36 -08:00
}
// Otherwise, just open the filestream
else
{
2025-01-04 23:52:16 -05:00
stream = System . IO . File . Open ( file , FileMode . Open , FileAccess . Read , FileShare . ReadWrite ) ;
2020-12-10 11:07:36 -08:00
}
// If the stream is null, then continue
if ( stream = = null )
return false ;
// Seek to the beginning of the stream
if ( stream . CanSeek )
stream . Seek ( 0 , SeekOrigin . Begin ) ;
return true ;
}
/// <summary>
/// Get the default OutputFormat associated with each PackingFlag
/// </summary>
2020-12-10 11:58:46 -08:00
private static OutputFormat GetOutputFormat ( PackingFlag packing )
2020-12-10 11:07:36 -08:00
{
return packing switch
{
PackingFlag . Zip = > OutputFormat . TorrentZip ,
PackingFlag . Unzip = > OutputFormat . Folder ,
PackingFlag . Partial = > OutputFormat . Folder ,
PackingFlag . Flat = > OutputFormat . ParentFolder ,
2024-12-07 11:27:54 -05:00
PackingFlag . FileOnly = > OutputFormat . Folder ,
2020-12-10 11:07:36 -08:00
PackingFlag . None = > OutputFormat . Folder ,
_ = > OutputFormat . Folder ,
} ;
}
/// <summary>
/// Get preconfigured Folder for rebuilding
/// </summary>
/// <param name="datFile">Current DatFile object to rebuild from</param>
/// <param name="date">True if the date from the DAT should be used if available, false otherwise</param>
/// <param name="outputFormat">Output format that files should be written to</param>
/// <returns>Folder configured with proper flags</returns>
2025-01-04 22:10:52 -05:00
private static IParent ? GetPreconfiguredFolder ( DatFile datFile , bool date , OutputFormat outputFormat )
2020-12-10 11:07:36 -08:00
{
2025-01-04 22:10:52 -05:00
IParent ? outputArchive = FileTypeTool . CreateFolderType ( outputFormat ) ;
2020-12-10 11:07:36 -08:00
if ( outputArchive is BaseArchive baseArchive & & date )
2025-01-04 21:40:45 -05:00
baseArchive . SetRealDates ( date ) ;
2020-12-10 11:07:36 -08:00
// Set the depth fields where appropriate
if ( outputArchive is GZipArchive gzipArchive )
2025-01-29 22:51:30 -05:00
gzipArchive . Depth = datFile . Modifiers . OutputDepot ? . Depth ? ? 0 ;
2020-12-10 11:07:36 -08:00
else if ( outputArchive is XZArchive xzArchive )
2025-01-29 22:51:30 -05:00
xzArchive . Depth = datFile . Modifiers . OutputDepot ? . Depth ? ? 0 ;
2020-12-10 11:07:36 -08:00
return outputArchive ;
}
2024-02-28 22:54:56 -05:00
2020-12-10 11:07:36 -08:00
/// <summary>
/// Get string value from input OutputFormat
/// </summary>
/// <param name="itemType">OutputFormat to get value from</param>
/// <returns>String value corresponding to the OutputFormat</returns>
2024-02-28 19:19:50 -05:00
private static string? FromOutputFormat ( OutputFormat itemType )
2020-12-10 11:07:36 -08:00
{
return itemType switch
{
OutputFormat . Folder = > "directory" ,
OutputFormat . ParentFolder = > "directory" ,
OutputFormat . TapeArchive = > "TAR" ,
OutputFormat . Torrent7Zip = > "Torrent7Z" ,
OutputFormat . TorrentGzip = > "TorrentGZ" ,
OutputFormat . TorrentGzipRomba = > "TorrentGZ" ,
OutputFormat . TorrentRar = > "TorrentRAR" ,
OutputFormat . TorrentXZ = > "TorrentXZ" ,
OutputFormat . TorrentXZRomba = > "TorrentXZ" ,
OutputFormat . TorrentZip = > "TorrentZip" ,
_ = > null ,
} ;
}
}
}