Arj's methods CompressedMost (1), Compressed (2) and CompressedFaster (3) implemented.

This commit is contained in:
Twan van Dongen
2025-11-14 15:39:58 +01:00
parent a9017d7c25
commit 4efb109da8
14 changed files with 786 additions and 29 deletions

View File

@@ -38,6 +38,11 @@ namespace SharpCompress.Common.Arj
Header.CompressedSize
);
break;
case CompressionMethod.CompressedMost:
case CompressionMethod.Compressed:
case CompressionMethod.CompressedFaster:
compressedStream = new LhaStream<Lh7DecoderCfg>(_stream, (int)Header.OriginalSize);
break;
case CompressionMethod.CompressedFastest:
compressedStream = new LHDecoderStream(_stream, (int)Header.OriginalSize);
break;

View File

@@ -4,56 +4,68 @@ using System.IO;
namespace SharpCompress.Compressors.Arj
{
[CLSCompliant(true)]
public sealed class BitReader
public class BitReader
{
private readonly Stream _stream;
private int _bitBuffer;
private int _bitsRemaining;
private bool _disposed;
private readonly Stream _input;
private int _bitBuffer; // currently buffered bits
private int _bitCount; // number of bits in buffer
public BitReader(Stream input)
{
_stream = input ?? throw new ArgumentNullException(nameof(input));
if (!input.CanRead)
throw new ArgumentException("Stream must be readable.", nameof(input));
_input = input ?? throw new ArgumentNullException(nameof(input));
_bitBuffer = 0;
_bitCount = 0;
}
/// <summary>
/// Reads a single bit from the stream. Returns 0 or 1.
/// </summary>
public int ReadBit()
{
if (_bitCount == 0)
{
int nextByte = _input.ReadByte();
if (nextByte < 0)
{
throw new EndOfStreamException("No more data available in BitReader.");
}
_bitBuffer = nextByte;
_bitCount = 8;
}
int bit = (_bitBuffer >> (_bitCount - 1)) & 1;
_bitCount--;
return bit;
}
/// <summary>
/// Reads n bits (up to 32) from the stream.
/// </summary>
public int ReadBits(int count)
{
if (_disposed)
throw new ObjectDisposedException(nameof(BitReader));
if (count <= 0 || count > 32)
if (count < 0 || count > 32)
{
throw new ArgumentOutOfRangeException(
nameof(count),
"Bit count must be between 1 and 32."
"Count must be between 0 and 32."
);
}
int result = 0;
for (int i = 0; i < count; i++)
{
if (_bitsRemaining == 0)
{
int nextByte = _stream.ReadByte();
if (nextByte == -1)
throw new EndOfStreamException();
_bitBuffer = nextByte;
_bitsRemaining = 8;
}
// hoogste bit eerst
result = (result << 1) | ((_bitBuffer >> 7) & 1);
_bitBuffer <<= 1;
_bitsRemaining--;
result = (result << 1) | ReadBit();
}
return result;
}
/// <summary>
/// Resets any buffered bits.
/// </summary>
public void AlignToByte()
{
_bitsRemaining = 0;
_bitCount = 0;
_bitBuffer = 0;
}
}

View File

@@ -0,0 +1,43 @@
using System;
using System.Collections;
using System.Collections.Generic;
namespace SharpCompress.Compressors.Arj
{
/// <summary>
/// Iterator that reads & pushes values back into the ring buffer.
/// </summary>
public class HistoryIterator : IEnumerator<byte>
{
private int _index;
private readonly IRingBuffer _ring;
public HistoryIterator(IRingBuffer ring, int startIndex)
{
_ring = ring;
_index = startIndex;
}
public bool MoveNext()
{
Current = _ring[_index];
_index = unchecked(_index + 1);
// Push value back into the ring buffer
_ring.Push(Current);
return true; // iterator is infinite
}
public void Reset()
{
throw new NotSupportedException();
}
public byte Current { get; private set; }
object IEnumerator.Current => Current;
public void Dispose() { }
}
}

View File

@@ -0,0 +1,218 @@
using System;
using System.Collections.Generic;
using System.IO;
using System.Text;
namespace SharpCompress.Compressors.Arj
{
[CLSCompliant(true)]
public enum NodeType
{
Leaf,
Branch,
}
[CLSCompliant(true)]
public sealed class TreeEntry
{
public readonly NodeType Type;
public readonly int LeafValue;
public readonly int BranchIndex;
public const int MAX_INDEX = 4096;
private TreeEntry(NodeType type, int leafValue, int branchIndex)
{
Type = type;
LeafValue = leafValue;
BranchIndex = branchIndex;
}
public static TreeEntry Leaf(int value)
{
return new TreeEntry(NodeType.Leaf, value, -1);
}
public static TreeEntry Branch(int index)
{
if (index >= MAX_INDEX)
{
throw new ArgumentOutOfRangeException(
nameof(index),
"Branch index exceeds MAX_INDEX"
);
}
return new TreeEntry(NodeType.Branch, 0, index);
}
}
[CLSCompliant(true)]
public sealed class HuffTree
{
private readonly List<TreeEntry> _tree;
public HuffTree(int capacity = 0)
{
_tree = new List<TreeEntry>(capacity);
}
public void SetSingle(int value)
{
_tree.Clear();
_tree.Add(TreeEntry.Leaf(value));
}
public void BuildTree(byte[] lengths, int count)
{
if (lengths == null)
{
throw new ArgumentNullException(nameof(lengths));
}
if (count < 0 || count > lengths.Length)
{
throw new ArgumentOutOfRangeException(nameof(count));
}
if (count > TreeEntry.MAX_INDEX / 2)
{
throw new ArgumentException(
$"Count exceeds maximum allowed: {TreeEntry.MAX_INDEX / 2}"
);
}
byte[] slice = new byte[count];
Array.Copy(lengths, slice, count);
BuildTree(slice);
}
public void BuildTree(byte[] valueLengths)
{
if (valueLengths == null)
{
throw new ArgumentNullException(nameof(valueLengths));
}
if (valueLengths.Length > TreeEntry.MAX_INDEX / 2)
{
throw new InvalidOperationException("Too many code lengths");
}
_tree.Clear();
int maxAllocated = 1; // start with a single (root) node
for (byte currentLen = 1; ; currentLen++)
{
// add missing branches up to current limit
int maxLimit = maxAllocated;
for (int i = _tree.Count; i < maxLimit; i++)
{
// TreeEntry.Branch may throw if index too large
try
{
_tree.Add(TreeEntry.Branch(maxAllocated));
}
catch (ArgumentOutOfRangeException e)
{
_tree.Clear();
throw new InvalidOperationException("Branch index exceeds limit", e);
}
// each branch node allocates two children
maxAllocated += 2;
}
// fill tree with leaves found in the lengths table at the current length
bool moreLeaves = false;
for (int value = 0; value < valueLengths.Length; value++)
{
byte len = valueLengths[value];
if (len == currentLen)
{
_tree.Add(TreeEntry.Leaf(value));
}
else if (len > currentLen)
{
moreLeaves = true; // there are more leaves to process
}
}
// sanity check (too many leaves)
if (_tree.Count > maxAllocated)
{
throw new InvalidOperationException("Too many leaves");
}
// stop when no longer finding longer codes
if (!moreLeaves)
{
break;
}
}
// ensure tree is complete
if (_tree.Count != maxAllocated)
{
throw new InvalidOperationException(
$"Missing some leaves: tree count = {_tree.Count}, expected = {maxAllocated}"
);
}
}
public int ReadEntry(BitReader reader)
{
if (_tree.Count == 0)
{
throw new InvalidOperationException("Tree not initialized");
}
TreeEntry node = _tree[0];
while (true)
{
if (node.Type == NodeType.Leaf)
{
return node.LeafValue;
}
int bit = reader.ReadBit();
int index = node.BranchIndex + bit;
if (index >= _tree.Count)
{
throw new InvalidOperationException("Invalid branch index during read");
}
node = _tree[index];
}
}
public override string ToString()
{
var result = new StringBuilder();
void FormatStep(int index, string prefix)
{
var node = _tree[index];
if (node.Type == NodeType.Leaf)
{
result.AppendLine($"{prefix} -> {node.LeafValue}");
}
else
{
FormatStep(node.BranchIndex, prefix + "0");
FormatStep(node.BranchIndex + 1, prefix + "1");
}
}
if (_tree.Count > 0)
{
FormatStep(0, "");
}
return result.ToString();
}
}
}

View File

@@ -0,0 +1,9 @@
namespace SharpCompress.Compressors.Arj
{
public interface ILhaDecoderConfig
{
int HistoryBits { get; }
int OffsetBits { get; }
RingBuffer RingBuffer { get; }
}
}

View File

@@ -0,0 +1,17 @@
namespace SharpCompress.Compressors.Arj
{
public interface IRingBuffer
{
int BufferSize { get; }
int Cursor { get; }
void SetCursor(int pos);
void Push(byte value);
HistoryIterator IterFromOffset(int offset);
HistoryIterator IterFromPos(int pos);
byte this[int index] { get; }
}
}

View File

@@ -0,0 +1,9 @@
namespace SharpCompress.Compressors.Arj
{
public class Lh5DecoderCfg : ILhaDecoderConfig
{
public int HistoryBits => 14;
public int OffsetBits => 4;
public RingBuffer RingBuffer { get; } = new RingBuffer(1 << 14);
}
}

View File

@@ -0,0 +1,9 @@
namespace SharpCompress.Compressors.Arj
{
public class Lh7DecoderCfg : ILhaDecoderConfig
{
public int HistoryBits => 17;
public int OffsetBits => 5;
public RingBuffer RingBuffer { get; } = new RingBuffer(1 << 17);
}
}

View File

@@ -0,0 +1,359 @@
using System;
using System.Data;
using System.IO;
using System.Linq;
using SharpCompress.IO;
namespace SharpCompress.Compressors.Arj
{
[CLSCompliant(true)]
public sealed class LhaStream<C> : Stream, IStreamStack
where C : ILhaDecoderConfig, new()
{
private readonly BitReader _bitReader;
private readonly Stream _stream;
private readonly HuffTree _commandTree;
private readonly HuffTree _offsetTree;
private int _remainingCommands;
private (int offset, int count)? _copyProgress;
private readonly RingBuffer _ringBuffer;
private readonly C _config = new C();
private const int NUM_COMMANDS = 510;
private const int NUM_TEMP_CODELEN = 20;
private readonly int _originalSize;
private int _producedBytes = 0;
#if DEBUG_STREAMS
long IStreamStack.InstanceId { get; set; }
#endif
int IStreamStack.DefaultBufferSize { get; set; }
Stream IStreamStack.BaseStream() => _stream;
int IStreamStack.BufferSize
{
get => 0;
set { }
}
int IStreamStack.BufferPosition
{
get => 0;
set { }
}
void IStreamStack.SetPosition(long position) { }
public LhaStream(Stream compressedStream, int originalSize)
{
_stream = compressedStream ?? throw new ArgumentNullException(nameof(compressedStream));
_bitReader = new BitReader(compressedStream);
_ringBuffer = _config.RingBuffer;
_commandTree = new HuffTree(NUM_COMMANDS * 2);
_offsetTree = new HuffTree(NUM_TEMP_CODELEN * 2);
_remainingCommands = 0;
_copyProgress = null;
_originalSize = originalSize;
}
public override bool CanRead => true;
public override bool CanSeek => false;
public override bool CanWrite => false;
public override long Length => throw new NotSupportedException();
public override long Position
{
get => throw new NotSupportedException();
set => throw new NotSupportedException();
}
public override void Flush() { }
public override long Seek(long offset, SeekOrigin origin) =>
throw new NotSupportedException();
public override void SetLength(long value) => throw new NotSupportedException();
public override void Write(byte[] buffer, int offset, int count) =>
throw new NotSupportedException();
public override int Read(byte[] buffer, int offset, int count)
{
if (buffer == null)
{
throw new ArgumentNullException(nameof(buffer));
}
if (offset < 0 || count < 0 || (offset + count) > buffer.Length)
{
throw new ArgumentOutOfRangeException();
}
if (_producedBytes >= _originalSize)
{
return 0; // EOF
}
if (count == 0)
{
return 0;
}
byte[] temp = new byte[count];
FillBuffer(temp);
Array.Copy(temp, 0, buffer, offset, temp.Length);
return _producedBytes;
}
private byte ReadCodeLength()
{
byte len = (byte)_bitReader.ReadBits(3);
if (len == 7)
{
while (_bitReader.ReadBit() != 0)
{
len++;
if (len > 255)
{
throw new InvalidOperationException("Code length overflow");
}
}
}
return len;
}
private int ReadCodeSkip(int skipRange)
{
int bits;
int increment;
switch (skipRange)
{
case 0:
return 1;
case 1:
bits = 4;
increment = 3; // 3..=18
break;
default:
bits = 9;
increment = 20; // 20..=531
break;
}
int skip = _bitReader.ReadBits(bits);
return skip + increment;
}
private void ReadTempTree()
{
byte[] codeLengths = new byte[NUM_TEMP_CODELEN];
// number of codes to read (5 bits)
int numCodes = _bitReader.ReadBits(5);
// single code only
if (numCodes == 0)
{
int code = _bitReader.ReadBits(5);
_offsetTree.SetSingle((byte)code);
return;
}
if (numCodes > NUM_TEMP_CODELEN)
{
throw new Exception("temporary codelen table has invalid size");
}
// read actual lengths
int count = Math.Min(3, numCodes);
for (int i = 0; i < count; i++)
{
codeLengths[i] = (byte)ReadCodeLength();
}
// 2-bit skip value follows
int skip = _bitReader.ReadBits(2);
if (3 + skip > numCodes)
{
throw new Exception("temporary codelen table has invalid size");
}
for (int i = 3 + skip; i < numCodes; i++)
{
codeLengths[i] = (byte)ReadCodeLength();
}
_offsetTree.BuildTree(codeLengths, numCodes);
}
private void ReadCommandTree()
{
byte[] codeLengths = new byte[NUM_COMMANDS];
// number of codes to read (9 bits)
int numCodes = _bitReader.ReadBits(9);
// single code only
if (numCodes == 0)
{
int code = _bitReader.ReadBits(9);
_commandTree.SetSingle((ushort)code);
return;
}
if (numCodes > NUM_COMMANDS)
{
throw new Exception("commands codelen table has invalid size");
}
int index = 0;
while (index < numCodes)
{
for (int n = 0; n < numCodes - index; n++)
{
int code = _offsetTree.ReadEntry(_bitReader);
if (code >= 0 && code <= 2) // skip range
{
int skipCount = ReadCodeSkip(code);
index += n + skipCount;
goto outerLoop;
}
else
{
codeLengths[index + n] = (byte)(code - 2);
}
}
break;
outerLoop:
;
}
_commandTree.BuildTree(codeLengths, numCodes);
}
private void ReadOffsetTree()
{
int numCodes = _bitReader.ReadBits(_config.OffsetBits);
if (numCodes == 0)
{
int code = _bitReader.ReadBits(_config.OffsetBits);
_offsetTree.SetSingle(code);
return;
}
if (numCodes > _config.HistoryBits)
{
throw new InvalidDataException("Offset code table too large");
}
byte[] codeLengths = new byte[NUM_TEMP_CODELEN];
for (int i = 0; i < numCodes; i++)
{
codeLengths[i] = (byte)ReadCodeLength();
}
_offsetTree.BuildTree(codeLengths, numCodes);
}
private void BeginNewBlock()
{
_remainingCommands = _bitReader.ReadBits(16);
ReadTempTree();
ReadCommandTree();
ReadOffsetTree();
}
private int ReadCommand() => _commandTree.ReadEntry(_bitReader);
private int ReadOffset()
{
int bits = _offsetTree.ReadEntry(_bitReader);
if (bits <= 1)
{
return bits;
}
int res = _bitReader.ReadBits(bits - 1);
return res | (1 << (bits - 1));
}
private int CopyFromHistory(byte[] target, int targetIndex, int offset, int count)
{
var historyIter = _ringBuffer.IterFromOffset(offset);
int copied = 0;
while (
copied < count && historyIter.MoveNext() && (targetIndex + copied) < target.Length
)
{
target[targetIndex + copied] = historyIter.Current;
copied++;
}
if (copied < count)
{
_copyProgress = (offset, count - copied);
}
return copied;
}
public void FillBuffer(byte[] buffer)
{
int bufLen = buffer.Length;
int bufIndex = 0;
// stop when we reached original size
if (_producedBytes >= _originalSize)
return;
// calculate limit, so that we don't go over the original size
int remaining = (int)Math.Min(bufLen, _originalSize - _producedBytes);
while (bufIndex < remaining)
{
if (_copyProgress.HasValue)
{
var (offset, count) = _copyProgress.Value;
int copied = CopyFromHistory(
buffer,
bufIndex,
offset,
(int)Math.Min(count, remaining - bufIndex)
);
bufIndex += copied;
_copyProgress = null;
}
if (_remainingCommands == 0)
{
BeginNewBlock();
}
_remainingCommands--;
int command = ReadCommand();
if (command >= 0 && command <= 0xFF)
{
byte value = (byte)command;
buffer[bufIndex++] = value;
_ringBuffer.Push(value);
}
else
{
int count = command - 0x100 + 3;
int offset = ReadOffset();
int copyCount = (int)Math.Min(count, remaining - bufIndex);
bufIndex += CopyFromHistory(buffer, bufIndex, offset, copyCount);
}
}
_producedBytes += bufIndex;
}
}
}

View File

@@ -0,0 +1,67 @@
using System;
using System.Collections;
using System.Collections.Generic;
namespace SharpCompress.Compressors.Arj
{
/// <summary>
/// A fixed-size ring buffer where N must be a power of two.
/// </summary>
public class RingBuffer : IRingBuffer
{
private readonly byte[] _buffer;
private int _cursor;
public int BufferSize { get; }
public int Cursor => _cursor;
private readonly int _mask;
public RingBuffer(int size)
{
if ((size & (size - 1)) != 0)
{
throw new ArgumentException("RingArrayBuffer size must be a power of two");
}
BufferSize = size;
_buffer = new byte[size];
_cursor = 0;
_mask = size - 1;
// Fill with spaces
for (int i = 0; i < size; i++)
{
_buffer[i] = (byte)' ';
}
}
public void SetCursor(int pos)
{
_cursor = pos & _mask;
}
public void Push(byte value)
{
int index = _cursor;
_buffer[index & _mask] = value;
_cursor = (index + 1) & _mask;
}
public byte this[int index] => _buffer[index & _mask];
public HistoryIterator IterFromOffset(int offset)
{
int masked = (offset & _mask) + 1;
int startIndex = _cursor + BufferSize - masked;
return new HistoryIterator(this, startIndex);
}
public HistoryIterator IterFromPos(int pos)
{
int startIndex = pos & _mask;
return new HistoryIterator(this, startIndex);
}
}
}

View File

@@ -22,6 +22,15 @@ namespace SharpCompress.Test.Arj
[Fact]
public void Arj_Uncompressed_Read() => Read("Arj.store.arj", CompressionType.None);
[Fact]
public void Arj_Method1_Read() => Read("Arj.method1.arj");
[Fact]
public void Arj_Method2_Read() => Read("Arj.method2.arj");
[Fact]
public void Arj_Method3_Read() => Read("Arj.method3.arj");
[Fact]
public void Arj_Method4_Read() => Read("Arj.method4.arj");

Binary file not shown.

Binary file not shown.

Binary file not shown.