Merge pull request #1006 from TwanVanDongen/master

ARJ multi-part archive handling improved
This commit is contained in:
Adam Hathcock
2025-11-05 08:37:54 +00:00
committed by GitHub
15 changed files with 502 additions and 120 deletions

View File

@@ -39,16 +39,7 @@ namespace SharpCompress.Common.Arj
);
break;
case CompressionMethod.CompressedFastest:
byte[] compressedData = new byte[Header.CompressedSize];
_stream.Position = Header.DataStartPosition;
_stream.Read(compressedData, 0, compressedData.Length);
byte[] decompressedData = LHDecoder.DecodeFastest(
compressedData,
(int)Header.OriginalSize // ARJ can only handle files up to 2GB, so casting to int should not be an issue.
);
compressedStream = new MemoryStream(decompressedData);
compressedStream = new LHDecoderStream(_stream, (int)Header.OriginalSize);
break;
default:
throw new NotSupportedException(

View File

@@ -4,6 +4,8 @@ using System.IO;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using SharpCompress.Common.Rar;
using SharpCompress.Common.Rar.Headers;
using SharpCompress.Readers;
namespace SharpCompress.Common.Arj
@@ -12,5 +14,23 @@ namespace SharpCompress.Common.Arj
{
public ArjVolume(Stream stream, ReaderOptions readerOptions, int index = 0)
: base(stream, readerOptions, index) { }
public override bool IsFirstVolume
{
get { return true; }
}
/// <summary>
/// ArjArchive is part of a multi-part archive.
/// </summary>
public override bool IsMultiVolume
{
get { return false; }
}
internal IEnumerable<ArjFilePart> GetVolumeFileParts()
{
return new List<ArjFilePart>();
}
}
}

View File

@@ -0,0 +1,60 @@
using System;
using System.IO;
namespace SharpCompress.Compressors.Arj
{
[CLSCompliant(true)]
public sealed class BitReader
{
private readonly Stream _stream;
private int _bitBuffer;
private int _bitsRemaining;
private bool _disposed;
public BitReader(Stream input)
{
_stream = input ?? throw new ArgumentNullException(nameof(input));
if (!input.CanRead)
throw new ArgumentException("Stream must be readable.", nameof(input));
}
public int ReadBits(int count)
{
if (_disposed)
throw new ObjectDisposedException(nameof(BitReader));
if (count <= 0 || count > 32)
throw new ArgumentOutOfRangeException(
nameof(count),
"Bit count must be between 1 and 32."
);
int result = 0;
for (int i = 0; i < count; i++)
{
if (_bitsRemaining == 0)
{
int nextByte = _stream.ReadByte();
if (nextByte == -1)
throw new EndOfStreamException();
_bitBuffer = nextByte;
_bitsRemaining = 8;
}
// hoogste bit eerst
result = (result << 1) | ((_bitBuffer >> 7) & 1);
_bitBuffer <<= 1;
_bitsRemaining--;
}
return result;
}
public void AlignToByte()
{
_bitsRemaining = 0;
_bitBuffer = 0;
}
}
}

View File

@@ -1,99 +0,0 @@
using System;
using System.Collections.Generic;
using System.IO;
using System.Text;
namespace SharpCompress.Compressors.Arj
{
[CLSCompliant(true)]
public class BitReader
{
private readonly byte[] data;
private int bytePos = 0;
private int bitPos = 0;
public BitReader(byte[] input)
{
data = input;
}
public int ReadBits(int count)
{
int result = 0;
for (int i = 0; i < count; i++)
{
if (bytePos >= data.Length)
{
throw new EndOfStreamException();
}
int bit = (data[bytePos] >> (7 - bitPos)) & 1;
result = (result << 1) | bit;
bitPos++;
if (bitPos == 8)
{
bitPos = 0;
bytePos++;
}
}
return result;
}
}
[CLSCompliant(true)]
public static class LHDecoder
{
private const int THRESHOLD = 3;
private static int DecodeVal(BitReader r, int from, int to)
{
int add = 0;
int bit = from;
while (bit < to && r.ReadBits(1) == 1)
{
add |= 1 << bit;
bit++;
}
int res = bit > 0 ? r.ReadBits(bit) : 0;
return res + add;
}
public static byte[] DecodeFastest(byte[] data, int originalSize)
{
var res = new List<byte>(originalSize);
var r = new BitReader(data);
while (res.Count < originalSize)
{
int len = DecodeVal(r, 0, 7);
if (len == 0)
{
byte nextChar = (byte)r.ReadBits(8);
res.Add(nextChar);
}
else
{
int repCount = len + THRESHOLD - 1;
int backPtr = DecodeVal(r, 9, 13);
if (backPtr >= res.Count)
{
throw new InvalidDataException("invalid back_ptr");
}
int i = res.Count - 1 - backPtr;
for (int j = 0; j < repCount; j++)
{
res.Add(res[i]);
i++;
}
}
}
return res.ToArray();
}
}
}

View File

@@ -0,0 +1,191 @@
using System;
using System.Collections.Generic;
using System.IO;
using SharpCompress.IO;
namespace SharpCompress.Compressors.Arj
{
[CLSCompliant(true)]
public sealed class LHDecoderStream : Stream, IStreamStack
{
#if DEBUG_STREAMS
long IStreamStack.InstanceId { get; set; }
#endif
int IStreamStack.DefaultBufferSize { get; set; }
Stream IStreamStack.BaseStream() => _stream;
int IStreamStack.BufferSize
{
get => 0;
set { }
}
int IStreamStack.BufferPosition
{
get => 0;
set { }
}
void IStreamStack.SetPosition(long position) { }
private readonly BitReader _bitReader;
private readonly Stream _stream;
// Buffer containing *all* bytes decoded so far.
private readonly List<byte> _buffer = new();
private long _readPosition;
private readonly int _originalSize;
private bool _finishedDecoding;
private bool _disposed;
private const int THRESHOLD = 3;
public LHDecoderStream(Stream compressedStream, int originalSize)
{
_stream = compressedStream ?? throw new ArgumentNullException(nameof(compressedStream));
if (!compressedStream.CanRead)
throw new ArgumentException(
"compressedStream must be readable.",
nameof(compressedStream)
);
_bitReader = new BitReader(compressedStream);
_originalSize = originalSize;
_readPosition = 0;
_finishedDecoding = (originalSize == 0);
}
public Stream BaseStream => _stream;
public override bool CanRead => true;
public override bool CanSeek => false;
public override bool CanWrite => false;
public override long Length => _originalSize;
public override long Position
{
get => _readPosition;
set => throw new NotSupportedException();
}
/// <summary>
/// Decodes a single element (literal or back-reference) and appends it to _buffer.
/// Returns true if data was added, or false if all input has already been decoded.
/// </summary>
private bool DecodeNext()
{
if (_buffer.Count >= _originalSize)
{
_finishedDecoding = true;
return false;
}
int len = DecodeVal(0, 7);
if (len == 0)
{
byte nextChar = (byte)_bitReader.ReadBits(8);
_buffer.Add(nextChar);
}
else
{
int repCount = len + THRESHOLD - 1;
int backPtr = DecodeVal(9, 13);
if (backPtr >= _buffer.Count)
throw new InvalidDataException("Invalid back_ptr in LH stream");
int srcIndex = _buffer.Count - 1 - backPtr;
for (int j = 0; j < repCount && _buffer.Count < _originalSize; j++)
{
byte b = _buffer[srcIndex];
_buffer.Add(b);
srcIndex++;
// srcIndex may grow; it's allowed (source region can overlap destination)
}
}
if (_buffer.Count >= _originalSize)
{
_finishedDecoding = true;
}
return true;
}
private int DecodeVal(int from, int to)
{
int add = 0;
int bit = from;
while (bit < to && _bitReader.ReadBits(1) == 1)
{
add |= 1 << bit;
bit++;
}
int res = bit > 0 ? _bitReader.ReadBits(bit) : 0;
return res + add;
}
/// <summary>
/// Reads decompressed bytes into buffer[offset..offset+count].
/// The method decodes additional data on demand when needed.
/// </summary>
public override int Read(byte[] buffer, int offset, int count)
{
if (_disposed)
throw new ObjectDisposedException(nameof(LHDecoderStream));
if (buffer == null)
throw new ArgumentNullException(nameof(buffer));
if (offset < 0 || count < 0 || offset + count > buffer.Length)
throw new ArgumentOutOfRangeException("offset/count");
if (_readPosition >= _originalSize)
return 0; // EOF
int totalRead = 0;
while (totalRead < count && _readPosition < _originalSize)
{
if (_readPosition >= _buffer.Count)
{
bool had = DecodeNext();
if (!had)
{
break;
}
}
int available = _buffer.Count - (int)_readPosition;
if (available <= 0)
{
if (!_finishedDecoding)
{
continue;
}
break;
}
int toCopy = Math.Min(available, count - totalRead);
_buffer.CopyTo((int)_readPosition, buffer, offset + totalRead, toCopy);
_readPosition += toCopy;
totalRead += toCopy;
}
return totalRead;
}
public override void Flush() => throw new NotSupportedException();
public override long Seek(long offset, SeekOrigin origin) =>
throw new NotSupportedException();
public override void SetLength(long value) => throw new NotSupportedException();
public override void Write(byte[] buffer, int offset, int count) =>
throw new NotSupportedException();
}
}

View File

@@ -1,23 +1,25 @@
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using SharpCompress.Common;
using SharpCompress.Common.Arj;
using SharpCompress.Common.Arj.Headers;
using SharpCompress.Common.Zip;
using SharpCompress.Common.Zip.Headers;
using SharpCompress.Readers.Rar;
namespace SharpCompress.Readers.Arj
{
public class ArjReader : AbstractReader<ArjEntry, ArjVolume>
public abstract class ArjReader : AbstractReader<ArjEntry, ArjVolume>
{
private ArjReader(Stream stream, ReaderOptions options)
: base(options, ArchiveType.Arj) => Volume = new ArjVolume(stream, options, 0);
internal ArjReader(ReaderOptions options)
: base(options, ArchiveType.Arj) { }
public override ArjVolume Volume { get; }
/// <summary>
/// Derived class must create or manage the Volume itself.
/// AbstractReader.Volume is get-only, so it cannot be set here.
/// </summary>
public override ArjVolume? Volume => _volume;
private ArjVolume? _volume;
/// <summary>
/// Opens an ArjReader for Non-seeking usage with a single volume
@@ -28,15 +30,42 @@ namespace SharpCompress.Readers.Arj
public static ArjReader Open(Stream stream, ReaderOptions? options = null)
{
stream.NotNull(nameof(stream));
return new ArjReader(stream, options ?? new ReaderOptions());
return new SingleVolumeArjReader(stream, options ?? new ReaderOptions());
}
/// <summary>
/// Opens an ArjReader for Non-seeking usage with multiple volumes
/// </summary>
/// <param name="streams"></param>
/// <param name="options"></param>
/// <returns></returns>
public static ArjReader Open(IEnumerable<Stream> streams, ReaderOptions? options = null)
{
streams.NotNull(nameof(streams));
return new MultiVolumeArjReader(streams, options ?? new ReaderOptions());
}
protected abstract void ValidateArchive(ArjVolume archive);
protected override IEnumerable<ArjEntry> GetEntries(Stream stream)
{
ArchiveEncoding encoding = new ArchiveEncoding();
var encoding = new ArchiveEncoding();
var mainHeaderReader = new ArjMainHeader(encoding);
var localHeaderReader = new ArjLocalHeader(encoding);
var mainHeader = mainHeaderReader.Read(stream);
if (mainHeader?.IsVolume == true)
{
throw new MultiVolumeExtractionException(
"Multi volumes are currently not supported"
);
}
if (_volume == null)
{
_volume = new ArjVolume(stream, Options, 0);
ValidateArchive(_volume);
}
while (true)
{
@@ -47,5 +76,8 @@ namespace SharpCompress.Readers.Arj
yield return new ArjEntry(new ArjFilePart((ArjLocalHeader)localHeader, stream));
}
}
protected virtual IEnumerable<FilePart> CreateFilePartEnumerableForCurrentEntry() =>
Entry.Parts;
}
}

View File

@@ -0,0 +1,117 @@
#nullable disable
using System;
using System.Collections;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using SharpCompress.Common;
using SharpCompress.Common.Arj;
using SharpCompress.Readers.Rar;
namespace SharpCompress.Readers.Arj;
internal class MultiVolumeArjReader : ArjReader
{
private readonly IEnumerator<Stream> streams;
private Stream tempStream;
internal MultiVolumeArjReader(IEnumerable<Stream> streams, ReaderOptions options)
: base(options) => this.streams = streams.GetEnumerator();
protected override void ValidateArchive(ArjVolume archive) { }
protected override Stream RequestInitialStream()
{
if (streams.MoveNext())
{
return streams.Current;
}
throw new MultiVolumeExtractionException(
"No stream provided when requested by MultiVolumeArjReader"
);
}
internal override bool NextEntryForCurrentStream()
{
if (!base.NextEntryForCurrentStream())
{
// if we're got another stream to try to process then do so
return streams.MoveNext() && LoadStreamForReading(streams.Current);
}
return true;
}
protected override IEnumerable<FilePart> CreateFilePartEnumerableForCurrentEntry()
{
var enumerator = new MultiVolumeStreamEnumerator(this, streams, tempStream);
tempStream = null;
return enumerator;
}
private class MultiVolumeStreamEnumerator : IEnumerable<FilePart>, IEnumerator<FilePart>
{
private readonly MultiVolumeArjReader reader;
private readonly IEnumerator<Stream> nextReadableStreams;
private Stream tempStream;
private bool isFirst = true;
internal MultiVolumeStreamEnumerator(
MultiVolumeArjReader r,
IEnumerator<Stream> nextReadableStreams,
Stream tempStream
)
{
reader = r;
this.nextReadableStreams = nextReadableStreams;
this.tempStream = tempStream;
}
public IEnumerator<FilePart> GetEnumerator() => this;
IEnumerator IEnumerable.GetEnumerator() => this;
public FilePart Current { get; private set; }
public void Dispose() { }
object IEnumerator.Current => Current;
public bool MoveNext()
{
if (isFirst)
{
Current = reader.Entry.Parts.First();
isFirst = false; //first stream already to go
return true;
}
if (!reader.Entry.IsSplitAfter)
{
return false;
}
if (tempStream != null)
{
reader.LoadStreamForReading(tempStream);
tempStream = null;
}
else if (!nextReadableStreams.MoveNext())
{
throw new MultiVolumeExtractionException(
"No stream provided when requested by MultiVolumeArjReader"
);
}
else
{
reader.LoadStreamForReading(nextReadableStreams.Current);
}
Current = reader.Entry.Parts.First();
return true;
}
public void Reset() { }
}
}

View File

@@ -0,0 +1,31 @@
using System;
using System.IO;
using SharpCompress.Common;
using SharpCompress.Common.Arj;
namespace SharpCompress.Readers.Arj
{
internal class SingleVolumeArjReader : ArjReader
{
private readonly Stream _stream;
internal SingleVolumeArjReader(Stream stream, ReaderOptions options)
: base(options)
{
stream.NotNull(nameof(stream));
_stream = stream;
}
protected override Stream RequestInitialStream() => _stream;
protected override void ValidateArchive(ArjVolume archive)
{
if (archive.IsMultiVolume)
{
throw new MultiVolumeExtractionException(
"Streamed archive is a Multi-volume archive. Use a different ArjReader method to extract."
);
}
}
}
}

View File

@@ -6,6 +6,7 @@ using System.Text;
using System.Threading.Tasks;
using SharpCompress.Common;
using SharpCompress.Readers;
using SharpCompress.Readers.Arj;
using Xunit;
namespace SharpCompress.Test.Arj
@@ -23,5 +24,43 @@ namespace SharpCompress.Test.Arj
[Fact]
public void Arj_Method4_Read() => Read("Arj.method4.arj");
[Fact]
public void Arj_Multi_Reader()
{
var exception = Assert.Throws<MultiVolumeExtractionException>(() =>
DoArj_Multi_Reader(
[
"Arj.store.split.arj",
"Arj.store.split.a01",
"Arj.store.split.a02",
"Arj.store.split.a03",
"Arj.store.split.a04",
"Arj.store.split.a05",
]
)
);
}
private void DoArj_Multi_Reader(string[] archives)
{
using (
var reader = ArjReader.Open(
archives
.Select(s => Path.Combine(TEST_ARCHIVES_PATH, s))
.Select(p => File.OpenRead(p))
)
)
{
while (reader.MoveToNextEntry())
{
reader.WriteEntryToDirectory(
SCRATCH_FILES_PATH,
new ExtractionOptions { ExtractFullPath = true, Overwrite = true }
);
}
}
VerifyFiles();
}
}
}

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.