[ext2/3/4] Add support for compressed files.

This commit is contained in:
2026-02-28 19:06:45 +00:00
parent 6a1403f47c
commit c01325ee47
8 changed files with 562 additions and 13 deletions

View File

@@ -54,6 +54,8 @@
<Compile Include="LZIP.cs"/> <Compile Include="LZIP.cs"/>
<Compile Include="LZMA.cs"/> <Compile Include="LZMA.cs"/>
<Compile Include="LZO.cs"/> <Compile Include="LZO.cs"/>
<Compile Include="LZRW3A.cs"/>
<Compile Include="LZV1.cs"/>
<Compile Include="LZVN.cs"/> <Compile Include="LZVN.cs"/>
<Compile Include="Native.cs"/> <Compile Include="Native.cs"/>
<Compile Include="Pak\CrushStream.cs"/> <Compile Include="Pak\CrushStream.cs"/>

View File

@@ -0,0 +1,92 @@
// /***************************************************************************
// Aaru Data Preservation Suite
// ----------------------------------------------------------------------------
//
// Filename : LZRW3A.cs
// Author(s) : Natalia Portillo <claunia@claunia.com>
//
// Component : Compression algorithms.
//
// --[ Description ] ----------------------------------------------------------
//
// Implements the LZRW3A decompression algorithm by Ross Williams.
// Used by the e2compr ext2 compression patches.
//
// --[ License ] --------------------------------------------------------------
//
// This library is free software; you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as
// published by the Free Software Foundation; either version 2.1 of the
// License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful, but
// WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, see <http://www.gnu.org/licenses/>.
//
// ----------------------------------------------------------------------------
// Copyright © 2011-2026 Natalia Portillo
// ****************************************************************************/
namespace Aaru.Compression;
/// <summary>Implements the LZRW3A decompression algorithm (Ross Williams, used by e2compr)</summary>
public static class LZRW3A
{
/// <summary>Set to <c>true</c> if this algorithm is supported, <c>false</c> otherwise.</summary>
public static bool IsSupported => true;
/// <summary>Decodes a buffer compressed with LZRW3A</summary>
/// <param name="source">Compressed buffer</param>
/// <param name="destination">Buffer where to write the decoded data</param>
/// <returns>The number of decoded bytes, or -1 on error</returns>
public static int DecodeBuffer(byte[] source, byte[] destination)
{
if(source == null || destination == null) return -1;
var srcPos = 0;
var dstPos = 0;
int srcLen = source.Length;
int dstLen = destination.Length;
while(srcPos < srcLen && dstPos < dstLen)
{
// Read flag byte — each bit controls one item (LSB first)
if(srcPos >= srcLen) break;
byte flags = source[srcPos++];
for(var bit = 0; bit < 8 && srcPos < srcLen && dstPos < dstLen; bit++)
{
if((flags & 1 << bit) == 0)
{
// Literal byte
destination[dstPos++] = source[srcPos++];
}
else
{
// Match: 2-byte encoding
if(srcPos + 1 >= srcLen) return dstPos;
int word = source[srcPos] | source[srcPos + 1] << 8;
srcPos += 2;
int offset = (word >> 4) + 1;
int length = (word & 0x0F) + 3;
if(offset > dstPos) return -1;
int matchPos = dstPos - offset;
for(var i = 0; i < length && dstPos < dstLen; i++)
destination[dstPos++] = destination[matchPos + i];
}
}
}
return dstPos;
}
}

89
Aaru.Compression/LZV1.cs Normal file
View File

@@ -0,0 +1,89 @@
// /***************************************************************************
// Aaru Data Preservation Suite
// ----------------------------------------------------------------------------
//
// Filename : LZV1.cs
// Author(s) : Natalia Portillo <claunia@claunia.com>
//
// Component : Compression algorithms.
//
// --[ Description ] ----------------------------------------------------------
//
// Implements the LZV1 decompression algorithm by Hermann Vogt.
// Used by the e2compr ext2 compression patches.
//
// --[ License ] --------------------------------------------------------------
//
// This library is free software; you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as
// published by the Free Software Foundation; either version 2.1 of the
// License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful, but
// WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, see <http://www.gnu.org/licenses/>.
//
// ----------------------------------------------------------------------------
// Copyright © 2011-2026 Natalia Portillo
// ****************************************************************************/
namespace Aaru.Compression;
/// <summary>Implements the LZV1 decompression algorithm (Hermann Vogt, used by e2compr)</summary>
public static class LZV1
{
/// <summary>Set to <c>true</c> if this algorithm is supported, <c>false</c> otherwise.</summary>
public static bool IsSupported => true;
/// <summary>Decodes a buffer compressed with LZV1</summary>
/// <param name="source">Compressed buffer</param>
/// <param name="destination">Buffer where to write the decoded data</param>
/// <returns>The number of decoded bytes, or -1 on error</returns>
public static int DecodeBuffer(byte[] source, byte[] destination)
{
if(source == null || destination == null) return -1;
var srcPos = 0;
var dstPos = 0;
int srcLen = source.Length;
int dstLen = destination.Length;
while(srcPos < srcLen && dstPos < dstLen)
{
byte ctrl = source[srcPos++];
int matchLen = ctrl >> 4;
if(matchLen == 0)
{
// Literal run: copy (ctrl + 1) bytes
int litLen = ctrl + 1;
for(var i = 0; i < litLen && srcPos < srcLen && dstPos < dstLen; i++)
destination[dstPos++] = source[srcPos++];
}
else
{
// Match: length = high nibble + 1, offset encoded in low nibble + next byte
if(srcPos >= srcLen) return dstPos;
int offset = (ctrl & 0x0F) << 8 | source[srcPos++];
offset++;
matchLen++;
if(offset > dstPos) return -1;
int matchPos = dstPos - offset;
for(var i = 0; i < matchLen && dstPos < dstLen; i++) destination[dstPos++] = destination[matchPos + i];
}
}
return dstPos;
}
}

View File

@@ -28,7 +28,12 @@
using System; using System;
using System.Collections.Generic; using System.Collections.Generic;
using System.IO;
using System.IO.Compression;
using Aaru.CommonTypes.Enums; using Aaru.CommonTypes.Enums;
using Aaru.Compression;
using Aaru.Logging;
using Marshal = Aaru.Helpers.Marshal;
namespace Aaru.Filesystems; namespace Aaru.Filesystems;
@@ -49,8 +54,9 @@ public sealed partial class ext2FS
// Direct blocks (0-11) // Direct blocks (0-11)
for(uint i = 0; i < 12 && blockIndex < blocksUsed; i++, blockIndex++) for(uint i = 0; i < 12 && blockIndex < blocksUsed; i++, blockIndex++)
if(inode.block[i] != 0) {
blockList.Add((inode.block[i], 1)); if(inode.block[i] != 0) blockList.Add((inode.block[i], 1));
}
// Single indirect (block[12]) // Single indirect (block[12])
if(blockIndex < blocksUsed && inode.block[12] != 0) if(blockIndex < blocksUsed && inode.block[12] != 0)
@@ -226,4 +232,273 @@ public sealed partial class ext2FS
return ErrorNumber.NoError; return ErrorNumber.NoError;
} }
/// <summary>Reads a logical block from a compressed file, decompressing the cluster as needed</summary>
/// <param name="fileNode">The file node with compression state and cluster cache</param>
/// <param name="logicalBlock">The logical block index to read</param>
/// <param name="blockData">The decompressed block data</param>
/// <returns>Error number indicating success or failure</returns>
ErrorNumber ReadCompressedLogicalBlock(Ext2FileNode fileNode, ulong logicalBlock, out byte[] blockData)
{
blockData = null;
uint clusterNBlocks = fileNode.ClusterNBlocks;
var clusterIndex = (long)(logicalBlock / clusterNBlocks);
var blockInCluster = (int)(logicalBlock % clusterNBlocks);
var blockOffset = (int)((ulong)blockInCluster * _blockSize);
// Check cluster cache
if(fileNode.DecompressedClusterCache.TryGetValue(clusterIndex, out byte[] clusterData))
{
if(blockOffset + (int)_blockSize <= clusterData.Length)
{
blockData = new byte[_blockSize];
Array.Copy(clusterData, blockOffset, blockData, 0, (int)_blockSize);
}
else if(blockOffset < clusterData.Length)
{
// Partial last block
int available = clusterData.Length - blockOffset;
blockData = new byte[_blockSize];
Array.Copy(clusterData, blockOffset, blockData, 0, available);
}
else
blockData = new byte[_blockSize];
return ErrorNumber.NoError;
}
// Read the cluster and decompress it
ErrorNumber errno = ReadAndDecompressCluster(fileNode, clusterIndex, out clusterData);
if(errno != ErrorNumber.NoError) return errno;
// Cache the decompressed cluster
fileNode.DecompressedClusterCache[clusterIndex] = clusterData;
// Extract the requested block
if(blockOffset + (int)_blockSize <= clusterData.Length)
{
blockData = new byte[_blockSize];
Array.Copy(clusterData, blockOffset, blockData, 0, (int)_blockSize);
}
else if(blockOffset < clusterData.Length)
{
int available = clusterData.Length - blockOffset;
blockData = new byte[_blockSize];
Array.Copy(clusterData, blockOffset, blockData, 0, available);
}
else
blockData = new byte[_blockSize];
return ErrorNumber.NoError;
}
/// <summary>Reads all physical blocks of a cluster and decompresses them</summary>
/// <param name="fileNode">The file node</param>
/// <param name="clusterIndex">The cluster index within the file</param>
/// <param name="decompressedData">The decompressed cluster data</param>
/// <returns>Error number indicating success or failure</returns>
ErrorNumber ReadAndDecompressCluster(Ext2FileNode fileNode, long clusterIndex, out byte[] decompressedData)
{
decompressedData = null;
uint clusterNBlocks = fileNode.ClusterNBlocks;
ulong firstLogicalBlock = (ulong)clusterIndex * clusterNBlocks;
uint clusterSizeInBytes = clusterNBlocks * _blockSize;
AaruLogging.Debug(MODULE_NAME,
"ReadAndDecompressCluster: cluster={0}, firstBlock={1}, nblocks={2}",
clusterIndex,
firstLogicalBlock,
clusterNBlocks);
// Read all blocks of the cluster
var rawCluster = new byte[clusterSizeInBytes];
var bytesRead = 0;
for(uint i = 0; i < clusterNBlocks; i++)
{
ulong logBlock = firstLogicalBlock + i;
ErrorNumber errno = ReadLogicalBlock(fileNode.BlockList, logBlock, out byte[] blockData);
if(errno != ErrorNumber.NoError)
{
AaruLogging.Debug(MODULE_NAME,
"ReadAndDecompressCluster: failed reading block {0}: {1}",
logBlock,
errno);
return errno;
}
if(blockData != null && blockData.Length > 0)
{
int toCopy = Math.Min(blockData.Length, (int)_blockSize);
Array.Copy(blockData, 0, rawCluster, bytesRead, toCopy);
bytesRead += toCopy;
}
else
bytesRead += (int)_blockSize;
}
// Check for cluster head magic in the first 2 bytes
var magic = BitConverter.ToUInt16(rawCluster, 0);
if(magic != EXT2_COMPRESS_MAGIC_04X)
{
// Not compressed — return raw data
AaruLogging.Debug(MODULE_NAME,
"ReadAndDecompressCluster: cluster {0} not compressed (magic=0x{1:X4})",
clusterIndex,
magic);
decompressedData = rawCluster;
return ErrorNumber.NoError;
}
// Parse the cluster head
int headSize = Marshal.SizeOf<CompressedClusterHead>();
CompressedClusterHead head =
Marshal.ByteArrayToStructureLittleEndian<CompressedClusterHead>(rawCluster, 0, headSize);
AaruLogging.Debug(MODULE_NAME,
"ReadAndDecompressCluster: method={0}, ulen={1}, clen={2}, holemap_nbytes={3}",
head.method,
head.ulen,
head.clen,
head.holemap_nbytes);
// Calculate offset to the compressed data (after header + holemap)
int compressedDataOffset = headSize + head.holemap_nbytes;
if(compressedDataOffset + (int)head.clen > rawCluster.Length)
{
AaruLogging.Debug(MODULE_NAME, "ReadAndDecompressCluster: compressed data extends beyond cluster");
return ErrorNumber.InvalidArgument;
}
// Extract compressed data
var compressedData = new byte[head.clen];
Array.Copy(rawCluster, compressedDataOffset, compressedData, 0, (int)head.clen);
// Decompress
decompressedData = new byte[head.ulen];
ErrorNumber decompResult = DecompressData(head.method, compressedData, decompressedData);
if(decompResult != ErrorNumber.NoError)
{
AaruLogging.Debug(MODULE_NAME,
"ReadAndDecompressCluster: decompression failed for method {0}: {1}",
head.method,
decompResult);
return decompResult;
}
AaruLogging.Debug(MODULE_NAME, "ReadAndDecompressCluster: decompressed {0} -> {1} bytes", head.clen, head.ulen);
return ErrorNumber.NoError;
}
/// <summary>Decompresses data using the specified e2compr algorithm</summary>
/// <param name="method">e2compr algorithm id</param>
/// <param name="compressedData">The compressed data</param>
/// <param name="decompressedData">Pre-allocated buffer for decompressed output</param>
/// <returns>Error number indicating success or failure</returns>
static ErrorNumber DecompressData(byte method, byte[] compressedData, byte[] decompressedData)
{
switch(method)
{
case EXT2_GZIP_ALG:
{
try
{
using var ms = new MemoryStream(compressedData);
using var zlib = new ZLibStream(ms, CompressionMode.Decompress);
var pos = 0;
while(pos < decompressedData.Length)
{
int read = zlib.Read(decompressedData, pos, decompressedData.Length - pos);
if(read == 0) break;
pos += read;
}
return ErrorNumber.NoError;
}
catch(Exception)
{
// The e2compr gzip format uses raw deflate (no zlib header), try that
try
{
using var ms = new MemoryStream(compressedData);
using var deflate = new DeflateStream(ms, CompressionMode.Decompress);
var pos = 0;
while(pos < decompressedData.Length)
{
int read = deflate.Read(decompressedData, pos, decompressedData.Length - pos);
if(read == 0) break;
pos += read;
}
return ErrorNumber.NoError;
}
catch(Exception)
{
return ErrorNumber.InvalidArgument;
}
}
}
case EXT2_BZIP2_ALG:
{
int decoded = BZip2.DecodeBuffer(compressedData, decompressedData);
return decoded > 0 ? ErrorNumber.NoError : ErrorNumber.InvalidArgument;
}
case EXT2_LZO_ALG:
{
int decoded = LZO.DecodeBuffer(compressedData, decompressedData, LZO.Algorithm.LZO1X);
return decoded > 0 ? ErrorNumber.NoError : ErrorNumber.InvalidArgument;
}
case EXT2_NONE_ALG:
{
int toCopy = Math.Min(compressedData.Length, decompressedData.Length);
Array.Copy(compressedData, 0, decompressedData, 0, toCopy);
return ErrorNumber.NoError;
}
case EXT2_LZRW3A_ALG:
{
int decoded = LZRW3A.DecodeBuffer(compressedData, decompressedData);
return decoded > 0 ? ErrorNumber.NoError : ErrorNumber.InvalidArgument;
}
case EXT2_LZV1_ALG:
{
int decoded = LZV1.DecodeBuffer(compressedData, decompressedData);
return decoded > 0 ? ErrorNumber.NoError : ErrorNumber.InvalidArgument;
}
default:
return ErrorNumber.NotSupported;
}
}
} }

View File

@@ -198,7 +198,8 @@ public sealed partial class ext2FS
const string FS_TYPE_EXT4 = "ext4"; const string FS_TYPE_EXT4 = "ext4";
// Incompatible features supported by this read-only implementation // Incompatible features supported by this read-only implementation
const uint EXT2_SUPPORTED_INCOMPAT = EXT2_FEATURE_INCOMPAT_FILETYPE | const uint EXT2_SUPPORTED_INCOMPAT = EXT2_FEATURE_INCOMPAT_COMPRESSION |
EXT2_FEATURE_INCOMPAT_FILETYPE |
EXT3_FEATURE_INCOMPAT_RECOVER | EXT3_FEATURE_INCOMPAT_RECOVER |
EXT2_FEATURE_INCOMPAT_META_BG | EXT2_FEATURE_INCOMPAT_META_BG |
EXT4_FEATURE_INCOMPAT_EXTENTS | EXT4_FEATURE_INCOMPAT_EXTENTS |
@@ -256,6 +257,8 @@ public sealed partial class ext2FS
const uint EXT2_NOCOMPR_FL = 0x00000400; const uint EXT2_NOCOMPR_FL = 0x00000400;
/// <summary>Encrypted inode (also historically compression error)</summary> /// <summary>Encrypted inode (also historically compression error)</summary>
const uint EXT4_ENCRYPT_FL = 0x00000800; const uint EXT4_ENCRYPT_FL = 0x00000800;
/// <summary>Compression error (e2compr, same bit as EXT4_ENCRYPT_FL)</summary>
const uint EXT2_ECOMPR_FL = 0x00000800;
/// <summary>B-tree/hash-indexed directory</summary> /// <summary>B-tree/hash-indexed directory</summary>
const uint EXT2_INDEX_FL = 0x00001000; const uint EXT2_INDEX_FL = 0x00001000;
/// <summary>AFS directory</summary> /// <summary>AFS directory</summary>
@@ -328,4 +331,24 @@ public sealed partial class ext2FS
const byte EXT4_XATTR_INDEX_RICHACL = 8; const byte EXT4_XATTR_INDEX_RICHACL = 8;
const byte EXT4_XATTR_INDEX_ENCRYPTION = 9; const byte EXT4_XATTR_INDEX_ENCRYPTION = 9;
const byte EXT4_XATTR_INDEX_HURD = 10; const byte EXT4_XATTR_INDEX_HURD = 10;
// e2compr compressed cluster head magic
/// <summary>Magic number for e2compr v0.4.x compressed cluster heads</summary>
const ushort EXT2_COMPRESS_MAGIC_04X = 0x5EF2;
// e2compr compression algorithm IDs
/// <summary>No compression</summary>
const byte EXT2_NONE_ALG = 0;
/// <summary>gzip / zlib deflate</summary>
const byte EXT2_GZIP_ALG = 1;
/// <summary>bzip2</summary>
const byte EXT2_BZIP2_ALG = 2;
/// <summary>LZO</summary>
const byte EXT2_LZO_ALG = 3;
/// <summary>LZRW3-A</summary>
const byte EXT2_LZRW3A_ALG = 4;
/// <summary>LZV1</summary>
const byte EXT2_LZV1_ALG = 5;
/// <summary>Number of supported e2compr algorithms</summary>
const byte EXT2_N_ALGORITHMS = 6;
} }

View File

@@ -126,16 +126,43 @@ public sealed partial class ext2FS
ulong fileSize = (ulong)inode.size_high << 32 | inode.size_lo; ulong fileSize = (ulong)inode.size_high << 32 | inode.size_lo;
// Detect e2compr compressed file
bool isCompressed = (inode.i_flags & EXT2_COMPR_FL) != 0;
byte comprMethod = 0;
uint clusterNBlocks = 1;
if(isCompressed)
{
// Per-inode compression params are stored in the obsoleted fragment address field (obso_faddr)
// Bits 0-7: compression method, Bits 8-15: log2(cluster_nblocks)
comprMethod = (byte)(inode.obso_faddr & 0xFF);
var log2CluNBlocks = (byte)(inode.obso_faddr >> 8 & 0xFF);
// Default cluster size if not set
if(log2CluNBlocks == 0) log2CluNBlocks = 3; // 8 blocks per cluster
clusterNBlocks = 1u << log2CluNBlocks;
AaruLogging.Debug(MODULE_NAME,
"OpenFile: compressed file, method={0}, log2_clu_nblocks={1}, clu_nblocks={2}",
comprMethod,
log2CluNBlocks,
clusterNBlocks);
}
node = new Ext2FileNode node = new Ext2FileNode
{ {
Path = path, Path = path,
Length = (long)fileSize, Length = (long)fileSize,
Offset = 0, Offset = 0,
InodeNumber = inodeNumber, InodeNumber = inodeNumber,
Inode = inode, Inode = inode,
BlockList = blockList, BlockList = blockList,
CachedBlock = null, CachedBlock = null,
CachedBlockIndex = -1 CachedBlockIndex = -1,
IsCompressed = isCompressed,
CompressionMethod = comprMethod,
ClusterNBlocks = clusterNBlocks
}; };
AaruLogging.Debug(MODULE_NAME, "OpenFile: success, inode={0}, size={1}", inodeNumber, fileSize); AaruLogging.Debug(MODULE_NAME, "OpenFile: success, inode={0}, size={1}", inodeNumber, fileSize);
@@ -150,6 +177,7 @@ public sealed partial class ext2FS
fileNode.CachedBlock = null; fileNode.CachedBlock = null;
fileNode.CachedBlockIndex = -1; fileNode.CachedBlockIndex = -1;
fileNode.DecompressedClusterCache.Clear();
return ErrorNumber.NoError; return ErrorNumber.NoError;
} }
@@ -194,8 +222,13 @@ public sealed partial class ext2FS
blockData = fileNode.CachedBlock; blockData = fileNode.CachedBlock;
else else
{ {
// Find the physical block from the pre-computed block list ErrorNumber errno;
ErrorNumber errno = ReadLogicalBlock(fileNode.BlockList, (ulong)blockIndex, out blockData);
// For compressed files, use cluster-aware reading with decompression
if(fileNode.IsCompressed)
errno = ReadCompressedLogicalBlock(fileNode, (ulong)blockIndex, out blockData);
else
errno = ReadLogicalBlock(fileNode.BlockList, (ulong)blockIndex, out blockData);
if(errno != ErrorNumber.NoError) if(errno != ErrorNumber.NoError)
{ {

View File

@@ -59,6 +59,18 @@ public sealed partial class ext2FS
/// <summary>Logical block index of the cached block (-1 if none)</summary> /// <summary>Logical block index of the cached block (-1 if none)</summary>
internal long CachedBlockIndex { get; set; } = -1; internal long CachedBlockIndex { get; set; } = -1;
/// <summary>Whether this file uses e2compr compression</summary>
internal bool IsCompressed { get; init; }
/// <summary>Number of blocks per compression cluster (power of 2)</summary>
internal uint ClusterNBlocks { get; init; }
/// <summary>Compression algorithm id for this file</summary>
internal byte CompressionMethod { get; init; }
/// <summary>Cached decompressed cluster data, keyed by cluster index</summary>
internal Dictionary<long, byte[]> DecompressedClusterCache { get; } = [];
/// <inheritdoc /> /// <inheritdoc />
public string Path { get; init; } public string Path { get; init; }

View File

@@ -638,6 +638,29 @@ public sealed partial class ext2FS
#endregion #endregion
#region Nested type: CompressedClusterHead
/// <summary>e2compr v0.4.x compressed cluster head (stored at start of a compressed cluster)</summary>
[StructLayout(LayoutKind.Sequential, Pack = 1)]
[SuppressMessage("ReSharper", "InconsistentNaming")]
readonly struct CompressedClusterHead
{
/// <summary>0x00, Magic number (EXT2_COMPRESS_MAGIC_04X = 0x5EF2)</summary>
public readonly ushort magic;
/// <summary>0x02, Compression method id</summary>
public readonly byte method;
/// <summary>0x03, Number of bytes in holemap</summary>
public readonly byte holemap_nbytes;
/// <summary>0x04, Adler32 checksum of uncompressed data</summary>
public readonly uint checksum;
/// <summary>0x08, Uncompressed data length in bytes</summary>
public readonly uint ulen;
/// <summary>0x0C, Compressed data length in bytes (after header + holemap)</summary>
public readonly uint clen;
}
#endregion
#region Nested type: FastCommitTagLength #region Nested type: FastCommitTagLength
/// <summary>ext4 fast commit on-disk tag-length structure</summary> /// <summary>ext4 fast commit on-disk tag-length structure</summary>