From c01325ee47c1c404d9b6e2745a2e1a8b6e56d6d1 Mon Sep 17 00:00:00 2001 From: Natalia Portillo Date: Sat, 28 Feb 2026 19:06:45 +0000 Subject: [PATCH] [ext2/3/4] Add support for compressed files. --- Aaru.Compression/Aaru.Compression.csproj | 2 + Aaru.Compression/LZRW3A.cs | 92 ++++++++ Aaru.Compression/LZV1.cs | 89 ++++++++ Aaru.Filesystems/ext2FS/Block.cs | 279 ++++++++++++++++++++++- Aaru.Filesystems/ext2FS/Consts.cs | 25 +- Aaru.Filesystems/ext2FS/File.cs | 53 ++++- Aaru.Filesystems/ext2FS/Internal.cs | 12 + Aaru.Filesystems/ext2FS/Structs.cs | 23 ++ 8 files changed, 562 insertions(+), 13 deletions(-) create mode 100644 Aaru.Compression/LZRW3A.cs create mode 100644 Aaru.Compression/LZV1.cs diff --git a/Aaru.Compression/Aaru.Compression.csproj b/Aaru.Compression/Aaru.Compression.csproj index 3146a437d..c51b1d9a0 100644 --- a/Aaru.Compression/Aaru.Compression.csproj +++ b/Aaru.Compression/Aaru.Compression.csproj @@ -54,6 +54,8 @@ + + diff --git a/Aaru.Compression/LZRW3A.cs b/Aaru.Compression/LZRW3A.cs new file mode 100644 index 000000000..80c25ff3c --- /dev/null +++ b/Aaru.Compression/LZRW3A.cs @@ -0,0 +1,92 @@ +// /*************************************************************************** +// Aaru Data Preservation Suite +// ---------------------------------------------------------------------------- +// +// Filename : LZRW3A.cs +// Author(s) : Natalia Portillo +// +// Component : Compression algorithms. +// +// --[ Description ] ---------------------------------------------------------- +// +// Implements the LZRW3A decompression algorithm by Ross Williams. +// Used by the e2compr ext2 compression patches. +// +// --[ License ] -------------------------------------------------------------- +// +// This library is free software; you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as +// published by the Free Software Foundation; either version 2.1 of the +// License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, see . +// +// ---------------------------------------------------------------------------- +// Copyright © 2011-2026 Natalia Portillo +// ****************************************************************************/ + +namespace Aaru.Compression; + +/// Implements the LZRW3A decompression algorithm (Ross Williams, used by e2compr) +public static class LZRW3A +{ + /// Set to true if this algorithm is supported, false otherwise. + public static bool IsSupported => true; + + /// Decodes a buffer compressed with LZRW3A + /// Compressed buffer + /// Buffer where to write the decoded data + /// The number of decoded bytes, or -1 on error + public static int DecodeBuffer(byte[] source, byte[] destination) + { + if(source == null || destination == null) return -1; + + var srcPos = 0; + var dstPos = 0; + int srcLen = source.Length; + int dstLen = destination.Length; + + while(srcPos < srcLen && dstPos < dstLen) + { + // Read flag byte — each bit controls one item (LSB first) + if(srcPos >= srcLen) break; + + byte flags = source[srcPos++]; + + for(var bit = 0; bit < 8 && srcPos < srcLen && dstPos < dstLen; bit++) + { + if((flags & 1 << bit) == 0) + { + // Literal byte + destination[dstPos++] = source[srcPos++]; + } + else + { + // Match: 2-byte encoding + if(srcPos + 1 >= srcLen) return dstPos; + + int word = source[srcPos] | source[srcPos + 1] << 8; + srcPos += 2; + + int offset = (word >> 4) + 1; + int length = (word & 0x0F) + 3; + + if(offset > dstPos) return -1; + + int matchPos = dstPos - offset; + + for(var i = 0; i < length && dstPos < dstLen; i++) + destination[dstPos++] = destination[matchPos + i]; + } + } + } + + return dstPos; + } +} \ No newline at end of file diff --git a/Aaru.Compression/LZV1.cs b/Aaru.Compression/LZV1.cs new file mode 100644 index 000000000..b2ded5f87 --- /dev/null +++ b/Aaru.Compression/LZV1.cs @@ -0,0 +1,89 @@ +// /*************************************************************************** +// Aaru Data Preservation Suite +// ---------------------------------------------------------------------------- +// +// Filename : LZV1.cs +// Author(s) : Natalia Portillo +// +// Component : Compression algorithms. +// +// --[ Description ] ---------------------------------------------------------- +// +// Implements the LZV1 decompression algorithm by Hermann Vogt. +// Used by the e2compr ext2 compression patches. +// +// --[ License ] -------------------------------------------------------------- +// +// This library is free software; you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as +// published by the Free Software Foundation; either version 2.1 of the +// License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, see . +// +// ---------------------------------------------------------------------------- +// Copyright © 2011-2026 Natalia Portillo +// ****************************************************************************/ + +namespace Aaru.Compression; + +/// Implements the LZV1 decompression algorithm (Hermann Vogt, used by e2compr) +public static class LZV1 +{ + /// Set to true if this algorithm is supported, false otherwise. + public static bool IsSupported => true; + + /// Decodes a buffer compressed with LZV1 + /// Compressed buffer + /// Buffer where to write the decoded data + /// The number of decoded bytes, or -1 on error + public static int DecodeBuffer(byte[] source, byte[] destination) + { + if(source == null || destination == null) return -1; + + var srcPos = 0; + var dstPos = 0; + int srcLen = source.Length; + int dstLen = destination.Length; + + while(srcPos < srcLen && dstPos < dstLen) + { + byte ctrl = source[srcPos++]; + + int matchLen = ctrl >> 4; + + if(matchLen == 0) + { + // Literal run: copy (ctrl + 1) bytes + int litLen = ctrl + 1; + + for(var i = 0; i < litLen && srcPos < srcLen && dstPos < dstLen; i++) + destination[dstPos++] = source[srcPos++]; + } + else + { + // Match: length = high nibble + 1, offset encoded in low nibble + next byte + if(srcPos >= srcLen) return dstPos; + + int offset = (ctrl & 0x0F) << 8 | source[srcPos++]; + offset++; + + matchLen++; + + if(offset > dstPos) return -1; + + int matchPos = dstPos - offset; + + for(var i = 0; i < matchLen && dstPos < dstLen; i++) destination[dstPos++] = destination[matchPos + i]; + } + } + + return dstPos; + } +} \ No newline at end of file diff --git a/Aaru.Filesystems/ext2FS/Block.cs b/Aaru.Filesystems/ext2FS/Block.cs index 75490bff9..b8ad9071f 100644 --- a/Aaru.Filesystems/ext2FS/Block.cs +++ b/Aaru.Filesystems/ext2FS/Block.cs @@ -28,7 +28,12 @@ using System; using System.Collections.Generic; +using System.IO; +using System.IO.Compression; using Aaru.CommonTypes.Enums; +using Aaru.Compression; +using Aaru.Logging; +using Marshal = Aaru.Helpers.Marshal; namespace Aaru.Filesystems; @@ -49,8 +54,9 @@ public sealed partial class ext2FS // Direct blocks (0-11) for(uint i = 0; i < 12 && blockIndex < blocksUsed; i++, blockIndex++) - if(inode.block[i] != 0) - blockList.Add((inode.block[i], 1)); + { + if(inode.block[i] != 0) blockList.Add((inode.block[i], 1)); + } // Single indirect (block[12]) if(blockIndex < blocksUsed && inode.block[12] != 0) @@ -226,4 +232,273 @@ public sealed partial class ext2FS return ErrorNumber.NoError; } + + /// Reads a logical block from a compressed file, decompressing the cluster as needed + /// The file node with compression state and cluster cache + /// The logical block index to read + /// The decompressed block data + /// Error number indicating success or failure + ErrorNumber ReadCompressedLogicalBlock(Ext2FileNode fileNode, ulong logicalBlock, out byte[] blockData) + { + blockData = null; + + uint clusterNBlocks = fileNode.ClusterNBlocks; + var clusterIndex = (long)(logicalBlock / clusterNBlocks); + var blockInCluster = (int)(logicalBlock % clusterNBlocks); + var blockOffset = (int)((ulong)blockInCluster * _blockSize); + + // Check cluster cache + if(fileNode.DecompressedClusterCache.TryGetValue(clusterIndex, out byte[] clusterData)) + { + if(blockOffset + (int)_blockSize <= clusterData.Length) + { + blockData = new byte[_blockSize]; + Array.Copy(clusterData, blockOffset, blockData, 0, (int)_blockSize); + } + else if(blockOffset < clusterData.Length) + { + // Partial last block + int available = clusterData.Length - blockOffset; + blockData = new byte[_blockSize]; + Array.Copy(clusterData, blockOffset, blockData, 0, available); + } + else + blockData = new byte[_blockSize]; + + return ErrorNumber.NoError; + } + + // Read the cluster and decompress it + ErrorNumber errno = ReadAndDecompressCluster(fileNode, clusterIndex, out clusterData); + + if(errno != ErrorNumber.NoError) return errno; + + // Cache the decompressed cluster + fileNode.DecompressedClusterCache[clusterIndex] = clusterData; + + // Extract the requested block + if(blockOffset + (int)_blockSize <= clusterData.Length) + { + blockData = new byte[_blockSize]; + Array.Copy(clusterData, blockOffset, blockData, 0, (int)_blockSize); + } + else if(blockOffset < clusterData.Length) + { + int available = clusterData.Length - blockOffset; + blockData = new byte[_blockSize]; + Array.Copy(clusterData, blockOffset, blockData, 0, available); + } + else + blockData = new byte[_blockSize]; + + return ErrorNumber.NoError; + } + + /// Reads all physical blocks of a cluster and decompresses them + /// The file node + /// The cluster index within the file + /// The decompressed cluster data + /// Error number indicating success or failure + ErrorNumber ReadAndDecompressCluster(Ext2FileNode fileNode, long clusterIndex, out byte[] decompressedData) + { + decompressedData = null; + + uint clusterNBlocks = fileNode.ClusterNBlocks; + ulong firstLogicalBlock = (ulong)clusterIndex * clusterNBlocks; + uint clusterSizeInBytes = clusterNBlocks * _blockSize; + + AaruLogging.Debug(MODULE_NAME, + "ReadAndDecompressCluster: cluster={0}, firstBlock={1}, nblocks={2}", + clusterIndex, + firstLogicalBlock, + clusterNBlocks); + + // Read all blocks of the cluster + var rawCluster = new byte[clusterSizeInBytes]; + var bytesRead = 0; + + for(uint i = 0; i < clusterNBlocks; i++) + { + ulong logBlock = firstLogicalBlock + i; + + ErrorNumber errno = ReadLogicalBlock(fileNode.BlockList, logBlock, out byte[] blockData); + + if(errno != ErrorNumber.NoError) + { + AaruLogging.Debug(MODULE_NAME, + "ReadAndDecompressCluster: failed reading block {0}: {1}", + logBlock, + errno); + + return errno; + } + + if(blockData != null && blockData.Length > 0) + { + int toCopy = Math.Min(blockData.Length, (int)_blockSize); + Array.Copy(blockData, 0, rawCluster, bytesRead, toCopy); + bytesRead += toCopy; + } + else + bytesRead += (int)_blockSize; + } + + // Check for cluster head magic in the first 2 bytes + var magic = BitConverter.ToUInt16(rawCluster, 0); + + if(magic != EXT2_COMPRESS_MAGIC_04X) + { + // Not compressed — return raw data + AaruLogging.Debug(MODULE_NAME, + "ReadAndDecompressCluster: cluster {0} not compressed (magic=0x{1:X4})", + clusterIndex, + magic); + + decompressedData = rawCluster; + + return ErrorNumber.NoError; + } + + // Parse the cluster head + int headSize = Marshal.SizeOf(); + + CompressedClusterHead head = + Marshal.ByteArrayToStructureLittleEndian(rawCluster, 0, headSize); + + AaruLogging.Debug(MODULE_NAME, + "ReadAndDecompressCluster: method={0}, ulen={1}, clen={2}, holemap_nbytes={3}", + head.method, + head.ulen, + head.clen, + head.holemap_nbytes); + + // Calculate offset to the compressed data (after header + holemap) + int compressedDataOffset = headSize + head.holemap_nbytes; + + if(compressedDataOffset + (int)head.clen > rawCluster.Length) + { + AaruLogging.Debug(MODULE_NAME, "ReadAndDecompressCluster: compressed data extends beyond cluster"); + + return ErrorNumber.InvalidArgument; + } + + // Extract compressed data + var compressedData = new byte[head.clen]; + Array.Copy(rawCluster, compressedDataOffset, compressedData, 0, (int)head.clen); + + // Decompress + decompressedData = new byte[head.ulen]; + + ErrorNumber decompResult = DecompressData(head.method, compressedData, decompressedData); + + if(decompResult != ErrorNumber.NoError) + { + AaruLogging.Debug(MODULE_NAME, + "ReadAndDecompressCluster: decompression failed for method {0}: {1}", + head.method, + decompResult); + + return decompResult; + } + + AaruLogging.Debug(MODULE_NAME, "ReadAndDecompressCluster: decompressed {0} -> {1} bytes", head.clen, head.ulen); + + return ErrorNumber.NoError; + } + + /// Decompresses data using the specified e2compr algorithm + /// e2compr algorithm id + /// The compressed data + /// Pre-allocated buffer for decompressed output + /// Error number indicating success or failure + static ErrorNumber DecompressData(byte method, byte[] compressedData, byte[] decompressedData) + { + switch(method) + { + case EXT2_GZIP_ALG: + { + try + { + using var ms = new MemoryStream(compressedData); + using var zlib = new ZLibStream(ms, CompressionMode.Decompress); + var pos = 0; + + while(pos < decompressedData.Length) + { + int read = zlib.Read(decompressedData, pos, decompressedData.Length - pos); + + if(read == 0) break; + + pos += read; + } + + return ErrorNumber.NoError; + } + catch(Exception) + { + // The e2compr gzip format uses raw deflate (no zlib header), try that + try + { + using var ms = new MemoryStream(compressedData); + using var deflate = new DeflateStream(ms, CompressionMode.Decompress); + var pos = 0; + + while(pos < decompressedData.Length) + { + int read = deflate.Read(decompressedData, pos, decompressedData.Length - pos); + + if(read == 0) break; + + pos += read; + } + + return ErrorNumber.NoError; + } + catch(Exception) + { + return ErrorNumber.InvalidArgument; + } + } + } + + case EXT2_BZIP2_ALG: + { + int decoded = BZip2.DecodeBuffer(compressedData, decompressedData); + + return decoded > 0 ? ErrorNumber.NoError : ErrorNumber.InvalidArgument; + } + + case EXT2_LZO_ALG: + { + int decoded = LZO.DecodeBuffer(compressedData, decompressedData, LZO.Algorithm.LZO1X); + + return decoded > 0 ? ErrorNumber.NoError : ErrorNumber.InvalidArgument; + } + + case EXT2_NONE_ALG: + { + int toCopy = Math.Min(compressedData.Length, decompressedData.Length); + Array.Copy(compressedData, 0, decompressedData, 0, toCopy); + + return ErrorNumber.NoError; + } + + case EXT2_LZRW3A_ALG: + { + int decoded = LZRW3A.DecodeBuffer(compressedData, decompressedData); + + return decoded > 0 ? ErrorNumber.NoError : ErrorNumber.InvalidArgument; + } + + case EXT2_LZV1_ALG: + { + int decoded = LZV1.DecodeBuffer(compressedData, decompressedData); + + return decoded > 0 ? ErrorNumber.NoError : ErrorNumber.InvalidArgument; + } + + default: + return ErrorNumber.NotSupported; + } + } } \ No newline at end of file diff --git a/Aaru.Filesystems/ext2FS/Consts.cs b/Aaru.Filesystems/ext2FS/Consts.cs index 505ac7483..05da06f4c 100644 --- a/Aaru.Filesystems/ext2FS/Consts.cs +++ b/Aaru.Filesystems/ext2FS/Consts.cs @@ -198,7 +198,8 @@ public sealed partial class ext2FS const string FS_TYPE_EXT4 = "ext4"; // Incompatible features supported by this read-only implementation - const uint EXT2_SUPPORTED_INCOMPAT = EXT2_FEATURE_INCOMPAT_FILETYPE | + const uint EXT2_SUPPORTED_INCOMPAT = EXT2_FEATURE_INCOMPAT_COMPRESSION | + EXT2_FEATURE_INCOMPAT_FILETYPE | EXT3_FEATURE_INCOMPAT_RECOVER | EXT2_FEATURE_INCOMPAT_META_BG | EXT4_FEATURE_INCOMPAT_EXTENTS | @@ -256,6 +257,8 @@ public sealed partial class ext2FS const uint EXT2_NOCOMPR_FL = 0x00000400; /// Encrypted inode (also historically compression error) const uint EXT4_ENCRYPT_FL = 0x00000800; + /// Compression error (e2compr, same bit as EXT4_ENCRYPT_FL) + const uint EXT2_ECOMPR_FL = 0x00000800; /// B-tree/hash-indexed directory const uint EXT2_INDEX_FL = 0x00001000; /// AFS directory @@ -328,4 +331,24 @@ public sealed partial class ext2FS const byte EXT4_XATTR_INDEX_RICHACL = 8; const byte EXT4_XATTR_INDEX_ENCRYPTION = 9; const byte EXT4_XATTR_INDEX_HURD = 10; + + // e2compr compressed cluster head magic + /// Magic number for e2compr v0.4.x compressed cluster heads + const ushort EXT2_COMPRESS_MAGIC_04X = 0x5EF2; + + // e2compr compression algorithm IDs + /// No compression + const byte EXT2_NONE_ALG = 0; + /// gzip / zlib deflate + const byte EXT2_GZIP_ALG = 1; + /// bzip2 + const byte EXT2_BZIP2_ALG = 2; + /// LZO + const byte EXT2_LZO_ALG = 3; + /// LZRW3-A + const byte EXT2_LZRW3A_ALG = 4; + /// LZV1 + const byte EXT2_LZV1_ALG = 5; + /// Number of supported e2compr algorithms + const byte EXT2_N_ALGORITHMS = 6; } \ No newline at end of file diff --git a/Aaru.Filesystems/ext2FS/File.cs b/Aaru.Filesystems/ext2FS/File.cs index 6efd4a75c..caad6cb62 100644 --- a/Aaru.Filesystems/ext2FS/File.cs +++ b/Aaru.Filesystems/ext2FS/File.cs @@ -126,16 +126,43 @@ public sealed partial class ext2FS ulong fileSize = (ulong)inode.size_high << 32 | inode.size_lo; + // Detect e2compr compressed file + bool isCompressed = (inode.i_flags & EXT2_COMPR_FL) != 0; + byte comprMethod = 0; + uint clusterNBlocks = 1; + + if(isCompressed) + { + // Per-inode compression params are stored in the obsoleted fragment address field (obso_faddr) + // Bits 0-7: compression method, Bits 8-15: log2(cluster_nblocks) + comprMethod = (byte)(inode.obso_faddr & 0xFF); + var log2CluNBlocks = (byte)(inode.obso_faddr >> 8 & 0xFF); + + // Default cluster size if not set + if(log2CluNBlocks == 0) log2CluNBlocks = 3; // 8 blocks per cluster + + clusterNBlocks = 1u << log2CluNBlocks; + + AaruLogging.Debug(MODULE_NAME, + "OpenFile: compressed file, method={0}, log2_clu_nblocks={1}, clu_nblocks={2}", + comprMethod, + log2CluNBlocks, + clusterNBlocks); + } + node = new Ext2FileNode { - Path = path, - Length = (long)fileSize, - Offset = 0, - InodeNumber = inodeNumber, - Inode = inode, - BlockList = blockList, - CachedBlock = null, - CachedBlockIndex = -1 + Path = path, + Length = (long)fileSize, + Offset = 0, + InodeNumber = inodeNumber, + Inode = inode, + BlockList = blockList, + CachedBlock = null, + CachedBlockIndex = -1, + IsCompressed = isCompressed, + CompressionMethod = comprMethod, + ClusterNBlocks = clusterNBlocks }; AaruLogging.Debug(MODULE_NAME, "OpenFile: success, inode={0}, size={1}", inodeNumber, fileSize); @@ -150,6 +177,7 @@ public sealed partial class ext2FS fileNode.CachedBlock = null; fileNode.CachedBlockIndex = -1; + fileNode.DecompressedClusterCache.Clear(); return ErrorNumber.NoError; } @@ -194,8 +222,13 @@ public sealed partial class ext2FS blockData = fileNode.CachedBlock; else { - // Find the physical block from the pre-computed block list - ErrorNumber errno = ReadLogicalBlock(fileNode.BlockList, (ulong)blockIndex, out blockData); + ErrorNumber errno; + + // For compressed files, use cluster-aware reading with decompression + if(fileNode.IsCompressed) + errno = ReadCompressedLogicalBlock(fileNode, (ulong)blockIndex, out blockData); + else + errno = ReadLogicalBlock(fileNode.BlockList, (ulong)blockIndex, out blockData); if(errno != ErrorNumber.NoError) { diff --git a/Aaru.Filesystems/ext2FS/Internal.cs b/Aaru.Filesystems/ext2FS/Internal.cs index 041738c32..1bd249de5 100644 --- a/Aaru.Filesystems/ext2FS/Internal.cs +++ b/Aaru.Filesystems/ext2FS/Internal.cs @@ -59,6 +59,18 @@ public sealed partial class ext2FS /// Logical block index of the cached block (-1 if none) internal long CachedBlockIndex { get; set; } = -1; + /// Whether this file uses e2compr compression + internal bool IsCompressed { get; init; } + + /// Number of blocks per compression cluster (power of 2) + internal uint ClusterNBlocks { get; init; } + + /// Compression algorithm id for this file + internal byte CompressionMethod { get; init; } + + /// Cached decompressed cluster data, keyed by cluster index + internal Dictionary DecompressedClusterCache { get; } = []; + /// public string Path { get; init; } diff --git a/Aaru.Filesystems/ext2FS/Structs.cs b/Aaru.Filesystems/ext2FS/Structs.cs index 95b0a10d8..e67c550d0 100644 --- a/Aaru.Filesystems/ext2FS/Structs.cs +++ b/Aaru.Filesystems/ext2FS/Structs.cs @@ -638,6 +638,29 @@ public sealed partial class ext2FS #endregion +#region Nested type: CompressedClusterHead + + /// e2compr v0.4.x compressed cluster head (stored at start of a compressed cluster) + [StructLayout(LayoutKind.Sequential, Pack = 1)] + [SuppressMessage("ReSharper", "InconsistentNaming")] + readonly struct CompressedClusterHead + { + /// 0x00, Magic number (EXT2_COMPRESS_MAGIC_04X = 0x5EF2) + public readonly ushort magic; + /// 0x02, Compression method id + public readonly byte method; + /// 0x03, Number of bytes in holemap + public readonly byte holemap_nbytes; + /// 0x04, Adler32 checksum of uncompressed data + public readonly uint checksum; + /// 0x08, Uncompressed data length in bytes + public readonly uint ulen; + /// 0x0C, Compressed data length in bytes (after header + holemap) + public readonly uint clen; + } + +#endregion + #region Nested type: FastCommitTagLength /// ext4 fast commit on-disk tag-length structure