using System;
using System.IO;
using System.Text;
using SabreTools.Hashing;
using SabreTools.IO.Extensions;
using SabreTools.Numerics.Extensions;
namespace SabreTools.IO.Compression.Deflate
{
///
/// Wrapper to handle DEFLATE decompression with data verification
///
public class InflateWrapper
{
#region Constants
///
/// Buffer size for decompression
///
private const int BufferSize = 1024 * 1024;
///
/// Local file header signature
///
private const uint LocalFileHeaderSignature = 0x04034B50;
#endregion
#region Private Classes
///
/// Minimal PKZIP local file header information
///
private class MinLocalFileHeader
{
///
/// Signature (0x04034B50)
///
public uint Signature { get; set; }
///
/// CRC-32
///
public uint CRC32 { get; set; }
///
/// Compressed size
///
public uint CompressedSize { get; set; }
///
/// Uncompressed size
///
public uint UncompressedSize { get; set; }
///
/// File name (variable size)
///
public string? FileName { get; set; }
}
#endregion
#region Extraction
///
/// Attempt to extract a file defined by a filename
///
/// Stream representing the deflated data
/// Output filename, null to auto-generate
/// Output directory to write to
/// Expected DEFLATE stream information
/// Indicates if PKZIP containers are used
/// True to include debug data, false otherwise
/// Extraction status representing the final state
/// Assumes that the current stream position is where the compressed data lives
public static ExtractionStatus ExtractFile(Stream source,
string? filename,
string outputDirectory,
DeflateInfo expected,
bool pkzip,
bool includeDebug)
{
// Debug output
if (includeDebug) Console.WriteLine($"Attempting to extract {filename}");
// Extract the file
var destination = new MemoryStream();
ExtractionStatus status = ExtractStream(source,
destination,
expected,
pkzip,
includeDebug,
out var foundFilename);
// If the extracted data is invalid
if (status != ExtractionStatus.GOOD || destination is null)
return status;
// Ensure directory separators are consistent
filename ??= foundFilename ?? $"FILE_[{expected.InputSize}, {expected.OutputSize}, {expected.Crc32}]";
if (Path.DirectorySeparatorChar == '\\')
filename = filename.Replace('/', '\\');
else if (Path.DirectorySeparatorChar == '/')
filename = filename.Replace('\\', '/');
// Ensure the full output directory exists
filename = Path.Combine(outputDirectory, filename);
var directoryName = Path.GetDirectoryName(filename);
if (directoryName is not null && !Directory.Exists(directoryName))
Directory.CreateDirectory(directoryName);
// Write the output file
File.WriteAllBytes(filename, destination.ToArray());
return status;
}
///
/// Attempt to extract a file to a stream
///
/// Stream representing the deflated data
/// Stream where the inflated data will be written
/// Expected DEFLATE stream information
/// Indicates if PKZIP containers are used
/// True to include debug data, false otherwise
/// Output filename if extracted from the data, null otherwise
/// Extraction status representing the final state
/// Assumes that the current stream position is where the compressed data lives
public static ExtractionStatus ExtractStream(Stream source,
Stream destination,
DeflateInfo expected,
bool pkzip,
bool includeDebug,
out string? filename)
{
// If PKZIP containers are used
if (pkzip)
return ExtractStreamWithContainer(source, destination, expected, includeDebug, out filename);
// If post-data checksums are used
filename = null;
return ExtractStreamWithChecksum(source, destination, expected, includeDebug);
}
///
/// Extract source data in a PKZIP container
///
/// Stream representing the deflated data
/// Stream where the inflated data will be written
/// Expected DEFLATE stream information
/// True to include debug data, false otherwise
/// Filename from the PKZIP header, if it exists
///
public static ExtractionStatus ExtractStreamWithContainer(Stream source,
Stream destination,
DeflateInfo expected,
bool includeDebug,
out string? filename)
{
// Set default values
filename = null;
// Debug output
if (includeDebug) Console.WriteLine($"Offset: {source.Position:X8}, Expected Read: {expected.InputSize}, Expected Write: {expected.OutputSize}, Expected CRC-32: {expected.Crc32:X8}");
// Check the validity of the inputs
if (expected.InputSize == 0)
{
if (includeDebug) Console.Error.WriteLine($"Not attempting to extract, expected to read 0 bytes");
return ExtractionStatus.INVALID;
}
else if (expected.InputSize > (source.Length - source.Position))
{
if (includeDebug) Console.Error.WriteLine($"Not attempting to extract, expected to read {expected.InputSize} bytes but only {source.Length - source.Position} bytes remain");
return ExtractionStatus.INVALID;
}
// Cache the current offset
long current = source.Position;
// Parse the PKZIP header, if it exists
MinLocalFileHeader? zipHeader = ParseLocalFileHeader(source);
long zipHeaderBytes = source.Position - current;
// Always trust the PKZIP CRC-32 value over what is supplied
if (zipHeader is not null)
expected.Crc32 = zipHeader.CRC32;
// If the filename is [NULL], replace with the zip filename
if (zipHeader?.FileName is not null)
{
filename = zipHeader.FileName;
if (includeDebug) Console.WriteLine($"Filename from PKZIP header: {filename}");
}
// Debug output
if (includeDebug) Console.WriteLine($"PKZIP Filename: {zipHeader?.FileName}, PKZIP Expected Read: {zipHeader?.CompressedSize}, PKZIP Expected Write: {zipHeader?.UncompressedSize}, PKZIP Expected CRC-32: {zipHeader?.CRC32:X4}");
// Extract the file
var actual = Inflate(source, destination);
if (actual is null)
{
if (includeDebug) Console.Error.WriteLine($"Could not extract {filename}");
return ExtractionStatus.FAIL;
}
// Account for the header bytes read
actual.InputSize += zipHeaderBytes;
source.Seek(current + actual.InputSize, SeekOrigin.Begin);
// Verify the extracted data
return VerifyExtractedData(source, current, expected, actual, includeDebug);
}
///
/// Extract source data with a trailing CRC-32 checksum
///
/// Stream representing the deflated data
/// Stream where the inflated data will be written
/// Expected DEFLATE stream information
/// True to include debug data, false otherwise
///
public static ExtractionStatus ExtractStreamWithChecksum(Stream source,
Stream destination,
DeflateInfo expected,
bool includeDebug)
{
// Debug output
if (includeDebug) Console.WriteLine($"Offset: {source.Position:X8}, Expected Read: {expected.InputSize}, Expected Write: {expected.OutputSize}, Expected CRC-32: {expected.Crc32:X8}");
// Check the validity of the inputs
if (expected.InputSize == 0)
{
if (includeDebug) Console.Error.WriteLine($"Not attempting to extract, expected to read 0 bytes");
return ExtractionStatus.INVALID;
}
else if (expected.InputSize > (source.Length - source.Position))
{
if (includeDebug) Console.Error.WriteLine($"Not attempting to extract, expected to read {expected.InputSize} bytes but only {source.Length - source.Position} bytes remain");
return ExtractionStatus.INVALID;
}
// Cache the current offset
long current = source.Position;
// Extract the file
var actual = Inflate(source, destination);
if (actual is null)
{
if (includeDebug) Console.Error.WriteLine($"Could not extract");
return ExtractionStatus.FAIL;
}
// Seek to the true end of the data
source.Seek(current + actual.InputSize, SeekOrigin.Begin);
// If the read value is off-by-one after checksum
if (actual.InputSize == expected.InputSize - 5)
{
// If not at the end of the file, get the corrected offset
if (source.Position + 5 < source.Length)
{
// TODO: What does this byte represent?
byte padding = source.ReadByteValue();
actual.InputSize += 1;
// Debug output
if (includeDebug) Console.WriteLine($"Off-by-one padding byte detected: 0x{padding:X2}");
}
else
{
// Debug output
if (includeDebug) Console.WriteLine($"Not enough data to adjust offset");
}
}
// If there is enough data to read the full CRC
uint deflateCrc;
if (source.Position + 4 < source.Length)
{
deflateCrc = source.ReadUInt32LittleEndian();
actual.InputSize += 4;
}
// Otherwise, read what is possible and pad with 0x00
else
{
byte[] deflateCrcBytes = new byte[4];
int realCrcLength = source.Read(deflateCrcBytes, 0, (int)(source.Length - source.Position));
// Parse as a little-endian 32-bit value
deflateCrc = (uint)(deflateCrcBytes[0]
| (deflateCrcBytes[1] << 8)
| (deflateCrcBytes[2] << 16)
| (deflateCrcBytes[3] << 24));
actual.InputSize += realCrcLength;
}
// If the CRC to check isn't set
if (expected.Crc32 == 0)
expected.Crc32 = deflateCrc;
// Debug output
if (includeDebug) Console.WriteLine($"DeflateStream CRC-32: {deflateCrc:X8}");
// Verify the extracted data
return VerifyExtractedData(source, current, expected, actual, includeDebug);
}
///
/// Parse a Stream into a minimal local file header
///
/// Stream to parse
/// Filled minimal local file header on success, null on error
/// Partial mirror of method in Serialization
private static MinLocalFileHeader? ParseLocalFileHeader(Stream data)
{
var header = new MinLocalFileHeader();
header.Signature = data.ReadUInt32LittleEndian();
if (header.Signature != LocalFileHeaderSignature)
return null;
_ = data.ReadUInt16LittleEndian(); // Version
_ = data.ReadUInt16LittleEndian(); // Flags
_ = data.ReadUInt16LittleEndian(); // CompressionMethod
_ = data.ReadUInt16LittleEndian(); // LastModifedFileTime
_ = data.ReadUInt16LittleEndian(); // LastModifiedFileDate
header.CRC32 = data.ReadUInt32LittleEndian();
header.CompressedSize = data.ReadUInt32LittleEndian();
header.UncompressedSize = data.ReadUInt32LittleEndian();
ushort fileNameLength = data.ReadUInt16LittleEndian();
ushort extraFieldLength = data.ReadUInt16LittleEndian();
if (fileNameLength > 0 && data.Position + fileNameLength <= data.Length)
{
byte[] filenameBytes = data.ReadBytes(fileNameLength);
header.FileName = Encoding.ASCII.GetString(filenameBytes);
}
// Parsing extras is skipped here, unlike in Serialization
if (extraFieldLength > 0 && data.Position + extraFieldLength <= data.Length)
_ = data.ReadBytes(extraFieldLength);
return header;
}
///
/// Verify the extracted stream data, seeking to the original location on failure
///
/// Stream representing the deflated data
/// Position representing the start of the deflated data
/// Expected deflation info
/// Actual deflation info
/// True to include debug data, false otherwise
/// Extraction status representing the final state
private static ExtractionStatus VerifyExtractedData(Stream source,
long start,
DeflateInfo expected,
DeflateInfo actual,
bool includeDebug)
{
// Debug output
if (includeDebug) Console.WriteLine($"Actual Read: {actual.InputSize}, Actual Write: {actual.OutputSize}, Actual CRC-32: {actual.Crc32:X8}");
// If there's a mismatch during both reading and writing
if (expected.InputSize >= 0 && expected.InputSize != actual.InputSize)
{
// This in/out check helps catch false positives, such as
// files that have an off-by-one mismatch for read values
// but properly match the output written values.
// If the written bytes not correct as well
if (expected.OutputSize >= 0 && expected.OutputSize != actual.OutputSize)
{
if (includeDebug) Console.Error.WriteLine($"Mismatched read/write values!");
source.Seek(start, SeekOrigin.Begin);
return ExtractionStatus.WRONG_SIZE;
}
// If the written bytes are not being verified
else if (expected.OutputSize < 0)
{
if (includeDebug) Console.Error.WriteLine($"Mismatched read/write values!");
source.Seek(start, SeekOrigin.Begin);
return ExtractionStatus.WRONG_SIZE;
}
}
// If there's just a mismatch during only writing
if (expected.InputSize >= 0 && expected.InputSize == actual.InputSize)
{
// We want to log this but ignore the error
if (expected.OutputSize >= 0 && expected.OutputSize != actual.OutputSize)
{
if (includeDebug) Console.WriteLine($"Ignoring mismatched write values because read values match!");
}
}
// Otherwise, the write size should be checked normally
else if (expected.InputSize == 0 && expected.OutputSize >= 0 && expected.OutputSize != actual.OutputSize)
{
if (includeDebug) Console.Error.WriteLine($"Mismatched write values!");
source.Seek(start, SeekOrigin.Begin);
return ExtractionStatus.WRONG_SIZE;
}
// If there's a mismatch with the CRC-32
if (expected.Crc32 != 0 && expected.Crc32 != actual.Crc32)
{
if (includeDebug) Console.Error.WriteLine($"Mismatched CRC-32 values!");
source.Seek(start, SeekOrigin.Begin);
return ExtractionStatus.BAD_CRC;
}
return ExtractionStatus.GOOD;
}
#endregion
#region Inflation
///
/// Inflate an input stream to an output stream
///
/// Stream representing the deflated data
/// Stream where the inflated data will be written
/// Deflate information representing the processed data on success, null on error
public static DeflateInfo? Inflate(Stream source, Stream destination)
{
try
{
// Setup the hasher for CRC-32 calculation
using var hasher = new HashWrapper(HashType.CRC32);
// Create a DeflateStream from the input
using var ds = new DeflateStream(source, CompressionMode.Decompress, leaveOpen: true);
// Decompress in blocks
while (true)
{
byte[] buf = new byte[BufferSize];
int read = ds.Read(buf, 0, buf.Length);
if (read == 0)
break;
hasher.Process(buf, 0, read);
destination.Write(buf, 0, read);
}
// Finalize the hash
hasher.Terminate();
byte[] hashBytes = hasher.CurrentHashBytes!;
// Save the deflate values
return new DeflateInfo
{
InputSize = ds.TotalIn,
OutputSize = ds.TotalOut,
Crc32 = BitConverter.ToUInt32(hashBytes, 0),
};
}
catch
{
return null;
}
}
#endregion
}
}