2024-10-19 21:41:08 -04:00
using System ;
using System.IO ;
2020-12-10 22:16:53 -08:00
using SabreTools.Core.Tools ;
2020-12-10 22:31:23 -08:00
using SabreTools.FileTypes.Aaru ;
using SabreTools.FileTypes.CHD ;
2024-03-04 21:20:39 -05:00
using SabreTools.Hashing ;
2024-04-24 13:45:38 -04:00
using SabreTools.IO.Extensions ;
2024-02-29 21:20:44 -05:00
using SabreTools.Matching ;
2020-12-08 14:53:49 -08:00
using SabreTools.Skippers ;
2018-02-15 22:06:20 -08:00
2020-12-08 14:53:49 -08:00
namespace SabreTools.FileTypes
2018-02-15 22:06:20 -08:00
{
2019-02-08 20:51:44 -08:00
public class BaseFile
{
2020-12-09 14:33:47 -08:00
#region Constants
2024-07-19 15:35:23 -04:00
protected static readonly byte [ ] SevenZipSignature = [ 0x37 , 0x7a , 0xbc , 0xaf , 0x27 , 0x1c ] ;
protected static readonly byte [ ] AaruFormatSignature = [ 0x41 , 0x41 , 0x52 , 0x55 , 0x46 , 0x52 , 0x4d , 0x54 ] ;
protected static readonly byte [ ] BZ2Signature = [ 0x42 , 0x5a , 0x68 ] ;
protected static readonly byte [ ] CabinetSignature = [ 0x4d , 0x53 , 0x43 , 0x46 ] ;
protected static readonly byte [ ] CHDSignature = [ 0x4d , 0x43 , 0x6f , 0x6d , 0x70 , 0x72 , 0x48 , 0x44 ] ;
protected static readonly byte [ ] ELFSignature = [ 0x7f , 0x45 , 0x4c , 0x46 ] ;
protected static readonly byte [ ] FreeArcSignature = [ 0x41 , 0x72 , 0x43 , 0x01 ] ;
protected static readonly byte [ ] GzSignature = [ 0x1f , 0x8b , 0x08 ] ;
protected static readonly byte [ ] PESignature = [ 0x4d , 0x5a ] ;
protected static readonly byte [ ] RarSignature = [ 0x52 , 0x61 , 0x72 , 0x21 , 0x1a , 0x07 , 0x00 ] ;
protected static readonly byte [ ] RarFiveSignature = [ 0x52 , 0x61 , 0x72 , 0x21 , 0x1a , 0x07 , 0x01 , 0x00 ] ;
protected static readonly byte [ ] TarSignature = [ 0x75 , 0x73 , 0x74 , 0x61 , 0x72 , 0x20 , 0x20 , 0x00 ] ;
protected static readonly byte [ ] TarZeroSignature = [ 0x75 , 0x73 , 0x74 , 0x61 , 0x72 , 0x00 , 0x30 , 0x30 ] ;
protected static readonly byte [ ] XZSignature = [ 0xfd , 0x37 , 0x7a , 0x58 , 0x5a , 0x00 , 0x00 ] ;
protected static readonly byte [ ] ZipSignature = [ 0x50 , 0x4b , 0x03 , 0x04 ] ;
protected static readonly byte [ ] ZipSignatureEmpty = [ 0x50 , 0x4b , 0x05 , 0x06 ] ;
protected static readonly byte [ ] ZipSignatureSpanned = [ 0x50 , 0x4b , 0x07 , 0x08 ] ;
2020-12-09 14:33:47 -08:00
#endregion
2019-02-08 20:51:44 -08:00
// TODO: Get all of these values automatically so there is no public "set"
2020-10-05 17:43:44 -07:00
#region Fields
/// <summary>
/// Filename or path to the file
/// </summary>
2024-02-28 19:19:50 -05:00
public string? Filename { get ; set ; }
2020-10-05 17:43:44 -07:00
/// <summary>
/// Direct parent of the file
/// </summary>
2024-02-28 19:19:50 -05:00
public string? Parent { get ; set ; }
2020-10-05 17:43:44 -07:00
/// <summary>
/// Date stamp of the file
/// </summary>
2024-02-28 19:19:50 -05:00
public string? Date { get ; set ; }
2020-10-05 17:43:44 -07:00
/// <summary>
/// Optional size of the file
/// </summary>
2019-02-08 20:51:44 -08:00
public long? Size { get ; set ; }
2020-10-05 17:43:44 -07:00
/// <summary>
/// Hashes that are available for the file
/// </summary>
2024-03-04 23:56:05 -05:00
public HashType [ ] AvailableHashTypes { get ; set ; } = [ HashType . CRC32 , HashType . MD5 , HashType . SHA1 ] ;
2020-10-05 17:43:44 -07:00
/// <summary>
/// CRC32 hash of the file
/// </summary>
2024-02-28 19:19:50 -05:00
public byte [ ] ? CRC { get ; set ; } = null ;
2020-10-05 17:43:44 -07:00
/// <summary>
/// MD5 hash of the file
/// </summary>
2024-02-28 19:19:50 -05:00
public byte [ ] ? MD5 { get ; set ; } = null ;
2020-10-05 17:43:44 -07:00
/// <summary>
/// SHA-1 hash of the file
/// </summary>
2024-02-28 19:19:50 -05:00
public byte [ ] ? SHA1 { get ; set ; } = null ;
2020-10-05 17:43:44 -07:00
/// <summary>
/// SHA-256 hash of the file
/// </summary>
2024-02-28 19:19:50 -05:00
public byte [ ] ? SHA256 { get ; set ; } = null ;
2020-10-05 17:43:44 -07:00
/// <summary>
/// SHA-384 hash of the file
/// </summary>
2024-02-28 19:19:50 -05:00
public byte [ ] ? SHA384 { get ; set ; } = null ;
2020-10-05 17:43:44 -07:00
/// <summary>
/// SHA-512 hash of the file
/// </summary>
2024-02-28 19:19:50 -05:00
public byte [ ] ? SHA512 { get ; set ; } = null ;
2020-10-05 17:43:44 -07:00
/// <summary>
/// SpamSum fuzzy hash of the file
/// </summary>
2024-02-28 19:19:50 -05:00
public byte [ ] ? SpamSum { get ; set ; } = null ;
2019-02-08 20:51:44 -08:00
#endregion
#region Construtors
/// <summary>
/// Create a new BaseFile with no base file
/// </summary>
public BaseFile ( )
{
}
/// <summary>
/// Create a new BaseFile from the given file
/// </summary>
/// <param name="filename">Name of the file to use</param>
/// <param name="getHashes">True if hashes for this file should be calculated (default), false otherwise</param>
public BaseFile ( string filename , bool getHashes = true )
{
2024-10-19 23:17:37 -04:00
Filename = filename ;
2019-02-08 20:51:44 -08:00
if ( getHashes )
{
2024-10-19 23:17:37 -04:00
BaseFile ? temp = GetInfo ( Filename , hashes : AvailableHashTypes ) ;
2019-02-08 20:51:44 -08:00
if ( temp ! = null )
{
2024-10-19 23:17:37 -04:00
Parent = temp . Parent ;
Date = temp . Date ;
CRC = temp . CRC ;
MD5 = temp . MD5 ;
SHA1 = temp . SHA1 ;
SHA256 = temp . SHA256 ;
SHA384 = temp . SHA384 ;
SHA512 = temp . SHA512 ;
SpamSum = temp . SpamSum ;
2019-02-08 20:51:44 -08:00
}
}
}
/// <summary>
/// Create a new BaseFile from the given file
/// </summary>
/// <param name="filename">Name of the file to use</param>
/// <param name="stream">Stream to populate information from</param>
/// <param name="getHashes">True if hashes for this file should be calculated (default), false otherwise</param>
public BaseFile ( string filename , Stream stream , bool getHashes = true )
{
2024-10-19 23:17:37 -04:00
Filename = filename ;
2019-02-08 20:51:44 -08:00
if ( getHashes )
{
2024-10-19 23:17:37 -04:00
BaseFile temp = GetInfo ( stream , hashes : AvailableHashTypes ) ;
2020-07-15 09:41:59 -07:00
if ( temp ! = null )
2019-02-08 20:51:44 -08:00
{
2024-10-19 23:17:37 -04:00
Parent = temp . Parent ;
Date = temp . Date ;
CRC = temp . CRC ;
MD5 = temp . MD5 ;
SHA1 = temp . SHA1 ;
SHA256 = temp . SHA256 ;
SHA384 = temp . SHA384 ;
SHA512 = temp . SHA512 ;
SpamSum = temp . SpamSum ;
2019-02-08 20:51:44 -08:00
}
}
2020-07-15 09:41:59 -07:00
2019-02-08 20:51:44 -08:00
}
#endregion
2020-12-07 22:32:37 -08:00
#region Static Methods
2020-12-08 00:13:22 -08:00
/// <summary>
/// Returns the file type of an input file
/// </summary>
/// <param name="input">Input file to check</param>
/// <returns>FileType of inputted file (null on error)</returns>
public static FileType ? GetFileType ( string input )
{
FileType ? outFileType = null ;
// If the file is null, then we have no archive type
if ( input = = null )
return outFileType ;
// First line of defense is going to be the extension, for better or worse
2020-12-10 22:16:53 -08:00
if ( ! HasValidArchiveExtension ( input ) )
2020-12-08 00:13:22 -08:00
return outFileType ;
// Read the first bytes of the file and get the magic number
2023-04-19 16:39:58 -04:00
BinaryReader br = new ( File . OpenRead ( input ) ) ;
2020-12-08 00:13:22 -08:00
byte [ ] magic = br . ReadBytes ( 8 ) ;
2024-02-28 21:59:13 -05:00
#if NET40_OR_GREATER
2020-12-08 00:13:22 -08:00
br . Dispose ( ) ;
2024-02-28 21:59:13 -05:00
#endif
2020-12-08 00:13:22 -08:00
// Now try to match it to a known signature
2020-12-09 14:33:47 -08:00
if ( magic . StartsWith ( SevenZipSignature ) )
2020-12-08 00:13:22 -08:00
{
outFileType = FileType . SevenZipArchive ;
}
2020-12-09 14:33:47 -08:00
else if ( magic . StartsWith ( AaruFormatSignature ) )
2020-12-08 00:13:22 -08:00
{
outFileType = FileType . AaruFormat ;
}
2020-12-09 14:33:47 -08:00
else if ( magic . StartsWith ( CHDSignature ) )
2020-12-08 00:13:22 -08:00
{
outFileType = FileType . CHD ;
}
2020-12-09 14:33:47 -08:00
else if ( magic . StartsWith ( GzSignature ) )
2020-12-08 00:13:22 -08:00
{
outFileType = FileType . GZipArchive ;
}
2020-12-09 14:33:47 -08:00
else if ( magic . StartsWith ( RarSignature )
| | magic . StartsWith ( RarFiveSignature ) )
2020-12-08 00:13:22 -08:00
{
outFileType = FileType . RarArchive ;
}
2020-12-09 14:33:47 -08:00
else if ( magic . StartsWith ( TarSignature )
| | magic . StartsWith ( TarZeroSignature ) )
2020-12-08 00:13:22 -08:00
{
outFileType = FileType . TapeArchive ;
}
2020-12-09 14:33:47 -08:00
else if ( magic . StartsWith ( XZSignature ) )
2020-12-08 00:13:22 -08:00
{
outFileType = FileType . XZArchive ;
}
2020-12-09 14:33:47 -08:00
else if ( magic . StartsWith ( ZipSignature )
| | magic . StartsWith ( ZipSignatureEmpty )
| | magic . StartsWith ( ZipSignatureSpanned ) )
2020-12-08 00:13:22 -08:00
{
outFileType = FileType . ZipArchive ;
}
return outFileType ;
}
/// <summary>
/// Retrieve file information for a single file
/// </summary>
/// <param name="input">Filename to get information from</param>
/// <param name="header">Populated string representing the name of the skipper to use, a blank string to use the first available checker, null otherwise</param>
/// <param name="hashes">Hashes to include in the information</param>
/// <param name="asFiles">TreatAsFiles representing special format scanning</param>
/// <returns>Populated BaseFile object if success, empty one on error</returns>
2024-03-04 23:56:05 -05:00
public static BaseFile ? GetInfo ( string input , string? header = null , HashType [ ] ? hashes = null , TreatAsFile asFiles = 0x00 )
2020-12-08 00:13:22 -08:00
{
// Add safeguard if file doesn't exist
if ( ! File . Exists ( input ) )
return null ;
2024-03-04 23:56:05 -05:00
// If no hashes are set, use the standard array
hashes ? ? = [ HashType . CRC32 , HashType . MD5 , HashType . SHA1 ] ;
2020-12-08 00:13:22 -08:00
// Get input information
var fileType = GetFileType ( input ) ;
Stream inputStream = File . OpenRead ( input ) ;
// Try to match the supplied header skipper
if ( header ! = null )
{
2020-12-10 21:29:17 -08:00
SkipperMatch . Init ( ) ;
2020-12-08 14:53:49 -08:00
var rule = SkipperMatch . GetMatchingRule ( input , Path . GetFileNameWithoutExtension ( header ) ) ;
2020-12-08 00:13:22 -08:00
// If there's a match, transform the stream before getting info
2023-04-04 18:31:19 -04:00
if ( rule . Tests ! = null & & rule . Tests . Length ! = 0 )
2020-12-08 00:13:22 -08:00
{
// Create the output stream
2023-04-19 16:39:58 -04:00
MemoryStream outputStream = new ( ) ;
2020-12-08 00:13:22 -08:00
// Transform the stream and get the information from it
rule . TransformStream ( inputStream , outputStream , keepReadOpen : false , keepWriteOpen : true ) ;
inputStream = outputStream ;
}
}
// Get the info in the proper manner
2024-02-28 19:19:50 -05:00
BaseFile ? baseFile ;
2024-02-28 21:59:13 -05:00
#if NETFRAMEWORK
if ( fileType = = FileType . AaruFormat & & ( asFiles & TreatAsFile . AaruFormat ) = = 0 )
baseFile = AaruFormat . Create ( inputStream ) ;
else if ( fileType = = FileType . CHD & & ( asFiles & TreatAsFile . CHD ) = = 0 )
baseFile = CHDFile . Create ( inputStream ) ;
#else
2020-12-08 00:13:22 -08:00
if ( fileType = = FileType . AaruFormat & & ! asFiles . HasFlag ( TreatAsFile . AaruFormat ) )
baseFile = AaruFormat . Create ( inputStream ) ;
else if ( fileType = = FileType . CHD & & ! asFiles . HasFlag ( TreatAsFile . CHD ) )
baseFile = CHDFile . Create ( inputStream ) ;
2024-02-28 21:59:13 -05:00
#endif
2020-12-08 00:13:22 -08:00
else
baseFile = GetInfo ( inputStream , hashes : hashes , keepReadOpen : false ) ;
// Dispose of the input stream
inputStream ? . Dispose ( ) ;
// Add unique data from the file
2024-02-28 19:19:50 -05:00
baseFile ! . Filename = Path . GetFileName ( input ) ;
2020-12-08 00:13:22 -08:00
baseFile . Date = new FileInfo ( input ) . LastWriteTime . ToString ( "yyyy/MM/dd HH:mm:ss" ) ;
return baseFile ;
}
2020-12-07 22:32:37 -08:00
/// <summary>
/// Retrieve file information for a single file
/// </summary>
/// <param name="input">Filename to get information from</param>
/// <param name="size">Size of the input stream</param>
/// <param name="hashes">Hashes to include in the information</param>
/// <param name="keepReadOpen">True if the underlying read stream should be kept open, false otherwise</param>
/// <returns>Populated BaseFile object if success, empty one on error</returns>
2024-03-04 23:56:05 -05:00
public static BaseFile GetInfo ( Stream ? input , long size = - 1 , HashType [ ] ? hashes = null , bool keepReadOpen = false )
2020-12-07 22:32:37 -08:00
{
2024-02-28 19:19:50 -05:00
// If we have no stream
if ( input = = null )
return new BaseFile ( ) ;
2024-03-04 23:56:05 -05:00
// If no hashes are set, use the standard array
hashes ? ? = [ HashType . CRC32 , HashType . MD5 , HashType . SHA1 ] ;
2020-12-07 22:32:37 -08:00
// If we want to automatically set the size
if ( size = = - 1 )
size = input . Length ;
2024-03-04 21:20:39 -05:00
// Run the hashing on the input stream
2024-03-04 23:56:05 -05:00
var hashDict = HashTool . GetStreamHashes ( input , hashes ) ;
2024-03-04 21:20:39 -05:00
if ( hashDict = = null )
return new BaseFile ( ) ;
2020-12-07 22:32:37 -08:00
2024-03-04 21:20:39 -05:00
// Create a base file with the resulting hashes
var baseFile = new BaseFile ( )
{
Size = size ,
2024-10-24 00:46:28 -04:00
CRC = hashDict . ContainsKey ( HashType . CRC32 ) ? ByteArrayExtensions . StringToByteArray ( hashDict [ HashType . CRC32 ] ) : null ,
MD5 = hashDict . ContainsKey ( HashType . MD5 ) ? ByteArrayExtensions . StringToByteArray ( hashDict [ HashType . MD5 ] ) : null ,
SHA1 = hashDict . ContainsKey ( HashType . SHA1 ) ? ByteArrayExtensions . StringToByteArray ( hashDict [ HashType . SHA1 ] ) : null ,
SHA256 = hashDict . ContainsKey ( HashType . SHA256 ) ? ByteArrayExtensions . StringToByteArray ( hashDict [ HashType . SHA256 ] ) : null ,
SHA384 = hashDict . ContainsKey ( HashType . SHA384 ) ? ByteArrayExtensions . StringToByteArray ( hashDict [ HashType . SHA384 ] ) : null ,
SHA512 = hashDict . ContainsKey ( HashType . SHA512 ) ? ByteArrayExtensions . StringToByteArray ( hashDict [ HashType . SHA512 ] ) : null ,
SpamSum = hashDict . ContainsKey ( HashType . SpamSum ) ? ByteArrayExtensions . StringToByteArray ( hashDict [ HashType . SpamSum ] ) : null ,
2024-03-04 21:20:39 -05:00
} ;
2020-12-07 22:32:37 -08:00
2024-03-04 21:20:39 -05:00
// Deal with the input stream
if ( ! keepReadOpen )
2024-07-19 15:14:30 -04:00
{
input . Close ( ) ;
2024-03-04 21:20:39 -05:00
input . Dispose ( ) ;
2024-07-19 15:14:30 -04:00
}
2024-03-04 21:20:39 -05:00
else
2024-07-19 15:14:30 -04:00
{
2024-03-04 21:20:39 -05:00
input . SeekIfPossible ( ) ;
2024-07-19 15:14:30 -04:00
}
2020-12-07 22:32:37 -08:00
2024-03-04 21:20:39 -05:00
return baseFile ;
2020-12-07 22:32:37 -08:00
}
2020-12-10 22:16:53 -08:00
/// <summary>
/// Get if the given path has a valid DAT extension
/// </summary>
/// <param name="path">Path to check</param>
/// <returns>True if the extension is valid, false otherwise</returns>
private static bool HasValidArchiveExtension ( string path )
{
// Get the extension from the path, if possible
2024-02-28 19:19:50 -05:00
string? ext = path . GetNormalizedExtension ( ) ;
2020-12-10 22:16:53 -08:00
// Check against the list of known archive extensions
2023-04-19 16:39:58 -04:00
return ext switch
2020-12-10 22:16:53 -08:00
{
// Aaruformat
2023-04-19 16:39:58 -04:00
"aaru" = > true ,
"aaruf" = > true ,
"aaruformat" = > true ,
"aif" = > true ,
"dicf" = > true ,
2024-02-28 21:59:13 -05:00
2023-04-19 16:39:58 -04:00
// Archive
"7z" = > true ,
"gz" = > true ,
"lzma" = > true ,
"rar" = > true ,
"rev" = > true ,
"r00" = > true ,
"r01" = > true ,
"tar" = > true ,
"tgz" = > true ,
"tlz" = > true ,
"zip" = > true ,
"zipx" = > true ,
2024-02-28 21:59:13 -05:00
2020-12-10 22:16:53 -08:00
// CHD
2023-04-19 16:39:58 -04:00
"chd" = > true ,
2024-02-28 21:59:13 -05:00
2023-04-19 16:39:58 -04:00
_ = > false ,
} ;
2020-12-10 22:16:53 -08:00
}
2020-12-07 22:32:37 -08:00
#endregion
2019-02-08 20:51:44 -08:00
}
2018-02-15 22:06:20 -08:00
}