Files
SabreTools/SabreTools.FileTypes/Archives/GZipArchive.cs

509 lines
17 KiB
C#
Raw Normal View History

using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
2017-11-02 01:03:36 -07:00
using System.Text.RegularExpressions;
using Compress;
using Compress.gZip;
using Compress.Support.Compression.Deflate;
2024-03-04 23:56:05 -05:00
using SabreTools.Core.Tools;
using SabreTools.Hashing;
2024-04-24 13:45:38 -04:00
using SabreTools.IO.Extensions;
2020-12-10 22:31:23 -08:00
namespace SabreTools.FileTypes.Archives
{
/// <summary>
/// Represents a TorrentGZip archive for reading and writing
/// </summary>
public class GZipArchive : BaseArchive
{
2020-12-09 14:45:11 -08:00
#region Constants
/* (Torrent)GZ Header Format
https://tools.ietf.org/html/rfc1952
00-01 Identification (0x1F, 0x8B) GzSignature
02 Compression Method (0-7 reserved, 8 deflate; 0x08)
03 Flags (0 FTEXT, 1 FHCRC, 2 FEXTRA, 3 FNAME, 4 FCOMMENT, 5 reserved, 6 reserved, 7 reserved; 0x04)
04-07 Modification time (Unix format; 0x00, 0x00, 0x00, 0x00)
08 Extra Flags (2 maximum compression, 4 fastest algorithm; 0x00)
09 OS (See list on https://tools.ietf.org/html/rfc1952; 0x00)
0A-0B Length of extra field (mirrored; 0x1C, 0x00)
0C-27 Extra field
0C-1B MD5 Hash
1C-1F CRC hash
20-27 Int64 size (mirrored)
*/
2024-02-28 19:19:50 -05:00
private readonly static byte[] TorrentGZHeader = [0x1f, 0x8b, 0x08, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1c, 0x00];
2020-12-09 14:45:11 -08:00
#endregion
#region Fields
/// <summary>
/// Positive value for depth of the output depot, defaults to 4
/// </summary>
public int Depth { get; set; } = 4;
#endregion
#region Constructors
/// <summary>
/// Create a new TorrentGZipArchive with no base file
/// </summary>
public GZipArchive()
: base()
{
}
/// <summary>
/// Create a new TorrentGZipArchive from the given file
/// </summary>
/// <param name="filename">Name of the file to use as an archive</param>
/// <param name="read">True for opening file as read, false for opening file as write</param>
/// <param name="getHashes">True if hashes for this file should be calculated, false otherwise (default)</param>
public GZipArchive(string filename, bool getHashes = false)
: base(filename, getHashes)
{
}
#endregion
#region Extraction
2020-12-08 11:09:05 -08:00
/// <inheritdoc/>
public override bool CopyAll(string outDir)
{
bool encounteredErrors = true;
2024-02-28 19:19:50 -05:00
// If we have an invalid file
if (this.Filename == null)
return true;
try
{
// Create the temp directory
Directory.CreateDirectory(outDir);
// Decompress the _filename stream
2020-12-08 00:13:22 -08:00
FileStream outstream = File.Create(Path.Combine(outDir, Path.GetFileNameWithoutExtension(this.Filename)));
var gz = new gZip();
ZipReturn ret = gz.ZipFileOpen(this.Filename);
2024-04-24 16:08:17 -04:00
ret = gz.ZipFileOpenReadStream(0, out Stream? gzstream, out ulong streamSize);
2024-02-28 21:59:13 -05:00
byte[] buffer = new byte[32768];
int read;
2024-04-24 16:08:17 -04:00
while ((read = gzstream!.Read(buffer, 0, buffer.Length)) > 0)
2024-02-28 21:59:13 -05:00
{
outstream.Write(buffer, 0, read);
}
// Dispose of the streams
outstream.Dispose();
ret = gz.ZipFileCloseReadStream();
gz.ZipFileClose();
encounteredErrors = false;
}
2020-09-15 12:12:13 -07:00
catch (EndOfStreamException ex)
{
// Catch this but don't count it as an error because SharpCompress is unsafe
logger.Verbose(ex);
}
2020-09-15 12:12:13 -07:00
catch (InvalidOperationException ex)
{
logger.Warning(ex);
encounteredErrors = true;
}
catch (Exception ex)
{
logger.Error(ex);
encounteredErrors = true;
}
2024-02-28 19:19:50 -05:00
return encounteredErrors;
}
2020-12-08 11:09:05 -08:00
/// <inheritdoc/>
2024-02-28 19:19:50 -05:00
public override string? CopyToFile(string entryName, string outDir)
{
// Try to extract a stream using the given information
2024-07-17 14:46:14 -04:00
(Stream? stream, string? realEntry) = GetEntryStream(entryName);
2024-07-17 15:19:15 -04:00
if (stream == null || realEntry == null)
return null;
2024-07-15 21:34:17 -04:00
// If the stream and the entry name are both non-null, we write to file
2024-07-17 15:19:15 -04:00
realEntry = Path.Combine(outDir, realEntry);
2024-07-17 15:19:15 -04:00
// Create the output subfolder now
string? dir = Path.GetDirectoryName(realEntry);
if (dir != null)
Directory.CreateDirectory(dir);
2024-07-17 15:19:15 -04:00
// Now open and write the file if possible
FileStream fs = File.Create(realEntry);
if (fs != null)
{
if (stream.CanSeek)
stream.Seek(0, SeekOrigin.Begin);
byte[] zbuffer = new byte[_bufferSize];
int zlen;
while ((zlen = stream.Read(zbuffer, 0, _bufferSize)) > 0)
{
2024-07-17 15:19:15 -04:00
fs.Write(zbuffer, 0, zlen);
fs.Flush();
}
2024-07-17 15:19:15 -04:00
stream?.Dispose();
fs?.Dispose();
}
else
{
stream?.Dispose();
fs?.Dispose();
realEntry = null;
}
return realEntry;
}
2020-12-08 11:09:05 -08:00
/// <inheritdoc/>
2024-07-15 21:37:38 -04:00
public override (Stream?, string?) GetEntryStream(string entryName)
{
2024-02-28 19:19:50 -05:00
// If we have an invalid file
if (this.Filename == null)
return (null, null);
try
{
2024-07-16 14:58:04 -04:00
// Open the entry stream
string realEntry = Path.GetFileNameWithoutExtension(this.Filename);
var gz = new gZip();
ZipReturn ret = gz.ZipFileOpen(this.Filename);
2024-07-16 14:58:04 -04:00
ret = gz.ZipFileOpenReadStream(0, out Stream? stream, out ulong streamSize);
2024-07-16 14:58:04 -04:00
// Return the stream
return (stream, realEntry);
}
catch (Exception ex)
{
logger.Error(ex);
2024-07-16 14:58:04 -04:00
return (null, null);
}
}
#endregion
#region Information
2020-12-08 11:09:05 -08:00
/// <inheritdoc/>
2024-02-28 19:19:50 -05:00
public override List<BaseFile>? GetChildren()
{
2024-02-28 19:19:50 -05:00
// If we have an invalid file
if (this.Filename == null)
return null;
2024-07-17 15:19:15 -04:00
// If we have children cached already
if (_children != null && _children.Count > 0)
return _children;
_children = [];
2024-07-17 15:19:15 -04:00
string gamename = Path.GetFileNameWithoutExtension(this.Filename);
2024-07-17 15:19:15 -04:00
BaseFile? possibleTgz = GetTorrentGZFileInfo();
2024-07-17 15:19:15 -04:00
// If it was, then add it to the outputs and continue
if (possibleTgz != null && possibleTgz.Filename != null)
{
_children.Add(possibleTgz);
return _children;
}
try
{
// Create a blank item for the entry
BaseFile gzipEntryRom = new();
// Perform a quickscan, if flagged to
if (this.AvailableHashTypes.Length == 1 && this.AvailableHashTypes[0] == HashType.CRC32)
{
2024-07-17 15:19:15 -04:00
gzipEntryRom.Filename = gamename;
using BinaryReader br = new(File.OpenRead(this.Filename));
br.BaseStream.Seek(-8, SeekOrigin.End);
gzipEntryRom.CRC = br.ReadBytesBigEndian(4);
gzipEntryRom.Size = br.ReadInt32BigEndian();
}
2024-07-17 15:19:15 -04:00
// Otherwise, use the stream directly
else
{
2024-07-17 15:19:15 -04:00
var gz = new gZip();
ZipReturn ret = gz.ZipFileOpen(this.Filename);
ret = gz.ZipFileOpenReadStream(0, out Stream? gzstream, out ulong streamSize);
gzipEntryRom = GetInfo(gzstream, hashes: this.AvailableHashTypes);
gzipEntryRom.Filename = gz.GetLocalFile(0).Filename;
gzipEntryRom.Parent = gamename;
gzipEntryRom.Date = (gz.TimeStamp > 0 ? gz.TimeStamp.ToString() : null);
gzstream!.Dispose();
}
2024-07-17 15:19:15 -04:00
2024-07-19 15:14:30 -04:00
// Fill in common details and add to the list
2024-07-17 15:19:15 -04:00
gzipEntryRom.Parent = gamename;
_children.Add(gzipEntryRom);
}
catch (Exception ex)
{
logger.Error(ex);
return null;
}
return _children;
}
2020-12-08 11:09:05 -08:00
/// <inheritdoc/>
public override List<string> GetEmptyFolders()
{
// GZip files don't contain directories
2024-02-28 19:19:50 -05:00
return [];
}
2020-12-08 11:09:05 -08:00
/// <inheritdoc/>
public override bool IsTorrent()
{
// Check for the file existing first
2024-04-24 13:45:38 -04:00
if (this.Filename == null || !File.Exists(this.Filename))
return false;
string datum = Path.GetFileName(this.Filename).ToLowerInvariant();
long filesize = new FileInfo(this.Filename).Length;
// If we have the romba depot files, just skip them gracefully
if (datum == ".romba_size" || datum == ".romba_size.backup")
{
logger.Verbose($"Romba depot file found, skipping: {this.Filename}");
return false;
}
// Check if the name is the right length
if (!Regex.IsMatch(datum, @"^[0-9a-f]{" + Constants.SHA1Length + @"}\.gz"))
{
logger.Warning($"Non SHA-1 filename found, skipping: '{Path.GetFullPath(this.Filename)}'");
return false;
}
// Check if the file is at least the minimum length
if (filesize < 40 /* bytes */)
{
2020-12-08 11:09:05 -08:00
logger.Warning($"Possibly corrupt file '{Path.GetFullPath(this.Filename)}' with size {filesize}");
return false;
}
// Get the Romba-specific header data
BinaryReader br = new(File.OpenRead(this.Filename));
byte[] header = br.ReadBytes(12); // Get preamble header for checking
br.ReadBytes(16); // headermd5
br.ReadBytes(4); // headercrc
br.ReadUInt64(); // headersz
2024-02-28 21:59:13 -05:00
#if NET40_OR_GREATER
br.Dispose();
2024-02-28 21:59:13 -05:00
#endif
// If the header is not correct, return a blank rom
bool correct = true;
for (int i = 0; i < header.Length; i++)
{
// This is a temp fix to ignore the modification time and OS until romba can be fixed
if (i == 4 || i == 5 || i == 6 || i == 7 || i == 9)
continue;
2020-12-09 14:45:11 -08:00
correct &= (header[i] == TorrentGZHeader[i]);
}
if (!correct)
return false;
return true;
}
/// <summary>
/// Retrieve file information for a single torrent GZ file
/// </summary>
/// <returns>Populated DatItem object if success, empty one on error</returns>
2024-02-28 19:19:50 -05:00
public BaseFile? GetTorrentGZFileInfo()
{
// Check for the file existing first
2024-04-24 13:45:38 -04:00
if (this.Filename == null || !File.Exists(this.Filename))
return null;
string datum = Path.GetFileName(this.Filename).ToLowerInvariant();
long filesize = new FileInfo(this.Filename).Length;
// If we have the romba depot files, just skip them gracefully
if (datum == ".romba_size" || datum == ".romba_size.backup")
{
logger.Verbose($"Romba depot file found, skipping: {this.Filename}");
return null;
}
// Check if the name is the right length
if (!Regex.IsMatch(datum, @"^[0-9a-f]{" + Constants.SHA1Length + @"}\.gz"))
{
logger.Warning($"Non SHA-1 filename found, skipping: '{Path.GetFullPath(this.Filename)}'");
return null;
}
// Check if the file is at least the minimum length
if (filesize < 40 /* bytes */)
{
2020-12-08 11:09:05 -08:00
logger.Warning($"Possibly corrupt file '{Path.GetFullPath(this.Filename)}' with size {filesize}");
return null;
}
// Get the Romba-specific header data
byte[] header; // Get preamble header for checking
byte[] headermd5; // MD5
byte[] headercrc; // CRC
ulong headersz; // Int64 size
BinaryReader br = new(File.OpenRead(this.Filename));
header = br.ReadBytes(12);
headermd5 = br.ReadBytes(16);
headercrc = br.ReadBytes(4);
headersz = br.ReadUInt64();
2024-02-28 21:59:13 -05:00
#if NET40_OR_GREATER
br.Dispose();
2024-02-28 21:59:13 -05:00
#endif
// If the header is not correct, return a blank rom
bool correct = true;
for (int i = 0; i < header.Length; i++)
{
// This is a temp fix to ignore the modification time and OS until romba can be fixed
if (i == 4 || i == 5 || i == 6 || i == 7 || i == 9)
{
continue;
}
2020-12-09 14:45:11 -08:00
correct &= (header[i] == TorrentGZHeader[i]);
}
2024-02-28 19:19:50 -05:00
if (!correct)
return null;
// Now convert the data and get the right position
long extractedsize = (long)headersz;
BaseFile baseFile = new()
{
Filename = Path.GetFileNameWithoutExtension(this.Filename).ToLowerInvariant(),
Size = extractedsize,
CRC = headercrc,
MD5 = headermd5,
2023-08-14 13:36:37 -04:00
SHA1 = TextHelper.StringToByteArray(Path.GetFileNameWithoutExtension(this.Filename)),
Parent = Path.GetFileNameWithoutExtension(this.Filename).ToLowerInvariant(),
};
return baseFile;
}
#endregion
#region Writing
2020-12-08 11:09:05 -08:00
/// <inheritdoc/>
2024-02-28 19:19:50 -05:00
public override bool Write(string inputFile, string outDir, BaseFile? baseFile = null)
{
// Check that the input file exists
if (!File.Exists(inputFile))
{
logger.Warning($"File '{inputFile}' does not exist!");
return false;
}
inputFile = Path.GetFullPath(inputFile);
// Get the file stream for the file and write out
2020-12-08 11:09:05 -08:00
return Write(File.OpenRead(inputFile), outDir, baseFile);
}
2020-12-08 11:09:05 -08:00
/// <inheritdoc/>
2024-02-28 19:19:50 -05:00
public override bool Write(Stream? inputStream, string outDir, BaseFile? baseFile = null)
{
bool success = false;
// If the stream is not readable, return
2024-02-28 19:19:50 -05:00
if (inputStream == null || !inputStream.CanRead)
return success;
// Make sure the output directory exists
if (!Directory.Exists(outDir))
Directory.CreateDirectory(outDir);
outDir = Path.GetFullPath(outDir);
// Now get the Rom info for the file so we have hashes and size
2020-12-08 11:09:05 -08:00
baseFile = GetInfo(inputStream, keepReadOpen: true);
// Get the output file name
2024-02-28 19:19:50 -05:00
string outfile = Path.Combine(outDir, Utilities.GetDepotPath(TextHelper.ByteArrayToString(baseFile.SHA1), Depth) ?? string.Empty);
2020-08-18 23:39:13 -07:00
// Check to see if the folder needs to be created
if (!Directory.Exists(Path.GetDirectoryName(outfile)))
2024-02-28 19:19:50 -05:00
Directory.CreateDirectory(Path.GetDirectoryName(outfile)!);
// If the output file exists, don't try to write again
if (!File.Exists(outfile))
{
// Compress the input stream
2020-12-08 00:13:22 -08:00
FileStream outputStream = File.Create(outfile);
// Open the output file for writing
BinaryWriter sw = new(outputStream);
// Write standard header and TGZ info
2020-12-09 14:45:11 -08:00
byte[] data = TorrentGZHeader
2024-02-28 19:19:50 -05:00
.Concat(baseFile.MD5!) // MD5
.Concat(baseFile.CRC!) // CRC
.ToArray();
sw.Write(data);
2020-12-08 11:09:05 -08:00
sw.Write((ulong)(baseFile.Size ?? 0)); // Long size (Unsigned, Mirrored)
// Now create a deflatestream from the input file
ZlibBaseStream ds = new(outputStream, CompressionMode.Compress, CompressionLevel.BestCompression, ZlibStreamFlavor.DEFLATE, true);
// Copy the input stream to the output
byte[] ibuffer = new byte[_bufferSize];
int ilen;
while ((ilen = inputStream.Read(ibuffer, 0, _bufferSize)) > 0)
{
ds.Write(ibuffer, 0, ilen);
ds.Flush();
}
2020-08-28 21:38:27 -07:00
ds.Dispose();
// Now write the standard footer
sw.Write([.. baseFile.CRC!.Reverse()]);
2020-12-08 11:09:05 -08:00
sw.Write((uint)(baseFile.Size ?? 0));
// Dispose of everything
2024-02-28 21:59:13 -05:00
#if NET40_OR_GREATER
sw.Dispose();
2024-02-28 21:59:13 -05:00
#endif
outputStream.Dispose();
}
return true;
}
2020-12-08 11:09:05 -08:00
/// <inheritdoc/>
2024-02-28 19:19:50 -05:00
public override bool Write(List<string> inputFiles, string outDir, List<BaseFile>? baseFile)
{
throw new NotImplementedException();
}
#endregion
}
}