Dynamic RingBuffer for BZip2: expand buffer after format detection

Agent-Logs-Url: https://github.com/adamhathcock/sharpcompress/sessions/2d1412f8-34f8-4a32-8802-e52770342940

Co-authored-by: adamhathcock <527620+adamhathcock@users.noreply.github.com>
This commit is contained in:
copilot-swe-agent[bot]
2026-03-31 09:01:21 +00:00
committed by GitHub
parent ddcbb3e474
commit ca52cec0b3
7 changed files with 200 additions and 8 deletions

View File

@@ -59,6 +59,7 @@ public class TarFactory
if (wrapper.IsMatch(sharpCompressStream))
{
sharpCompressStream.Rewind();
sharpCompressStream.EnsureMinimumRewindBufferSize(wrapper.MinimumRewindBufferSize);
var decompressedStream = CreateProbeDecompressionStream(
sharpCompressStream,
wrapper.CompressionType,
@@ -95,6 +96,7 @@ public class TarFactory
)
{
sharpCompressStream.Rewind();
sharpCompressStream.EnsureMinimumRewindBufferSize(wrapper.MinimumRewindBufferSize);
var decompressedStream = await CreateProbeDecompressionStreamAsync(
sharpCompressStream,
wrapper.CompressionType,
@@ -326,6 +328,7 @@ public class TarFactory
if (wrapper.IsMatch(sharpCompressStream))
{
sharpCompressStream.Rewind();
sharpCompressStream.EnsureMinimumRewindBufferSize(wrapper.MinimumRewindBufferSize);
var decompressedStream = CreateProbeDecompressionStream(
sharpCompressStream,
wrapper.CompressionType,
@@ -363,6 +366,7 @@ public class TarFactory
)
{
sharpCompressStream.Rewind();
sharpCompressStream.EnsureMinimumRewindBufferSize(wrapper.MinimumRewindBufferSize);
var decompressedStream = await CreateProbeDecompressionStreamAsync(
sharpCompressStream,
wrapper.CompressionType,

View File

@@ -22,7 +22,8 @@ public class TarWrapper(
Func<Stream, Stream> createStream,
Func<Stream, CancellationToken, ValueTask<Stream>> createStreamAsync,
IEnumerable<string> knownExtensions,
bool wrapInSharpCompressStream = true
bool wrapInSharpCompressStream = true,
int? minimumRewindBufferSize = null
)
{
public CompressionType CompressionType { get; } = type;
@@ -30,6 +31,15 @@ public class TarWrapper(
public Func<Stream, CancellationToken, ValueTask<bool>> IsMatchAsync { get; } = canHandleAsync;
public bool WrapInSharpCompressStream { get; } = wrapInSharpCompressStream;
/// <summary>
/// The minimum ring buffer size required to detect and probe this format.
/// Format detection reads a decompressed block to check the tar header, so
/// the ring buffer must be large enough to hold the compressed bytes consumed
/// during that probe. Defaults to <see cref="Common.Constants.RewindableBufferSize"/>.
/// </summary>
public int MinimumRewindBufferSize { get; } =
minimumRewindBufferSize ?? Common.Constants.RewindableBufferSize;
public Func<Stream, Stream> CreateStream { get; } = createStream;
public Func<Stream, CancellationToken, ValueTask<Stream>> CreateStreamAsync { get; } =
createStreamAsync;
@@ -57,7 +67,11 @@ public class TarWrapper(
await BZip2Stream
.CreateAsync(stream, CompressionMode.Decompress, false)
.ConfigureAwait(false),
["tar.bz2", "tb2", "tbz", "tbz2", "tz2"]
["tar.bz2", "tb2", "tbz", "tbz2", "tz2"],
// BZip2 decompresses in whole blocks; the compressed size of the first block
// can be close to the uncompressed maximum (9 × 100 000 = 900 000 bytes).
// The ring buffer must hold all compressed bytes read during format detection.
minimumRewindBufferSize: BZip2Constants.baseBlockSize * 9
),
new(
CompressionType.GZip,

View File

@@ -202,6 +202,37 @@ public partial class SharpCompressStream : Stream, IStreamStack
_isRecording = true;
}
/// <summary>
/// Ensures the ring buffer has at least the specified minimum capacity.
/// If the current buffer is smaller, it is replaced with a larger one while
/// preserving all existing buffered data. Call this after detecting a compression
/// format that requires a larger buffer for format detection (e.g. BZip2 whose
/// first block can be up to 900 KB of compressed data).
/// </summary>
/// <param name="minSize">Minimum required ring buffer capacity in bytes.</param>
internal void EnsureMinimumRewindBufferSize(int minSize)
{
if (_isPassthrough || _ringBuffer is null || _ringBuffer.Capacity >= minSize)
{
return;
}
// Create a new larger buffer with the required capacity
var newBuffer = new RingBuffer(minSize);
// Preserve existing buffered data in the new buffer
var existingLength = _ringBuffer.Length;
if (existingLength > 0)
{
var existingData = new byte[existingLength];
_ringBuffer.ReadFromEnd(existingLength, existingData, 0, existingLength);
newBuffer.Write(existingData, 0, existingLength);
}
_ringBuffer.Dispose();
_ringBuffer = newBuffer;
}
public override bool CanRead => true;
public override bool CanSeek => !_isPassthrough || stream.CanSeek;

View File

@@ -107,6 +107,7 @@ public partial class TarReader
}
sharpCompressStream.Position = pos;
sharpCompressStream.EnsureMinimumRewindBufferSize(wrapper.MinimumRewindBufferSize);
var testStream = await CreateProbeDecompressionStreamAsync(
sharpCompressStream,
wrapper.CompressionType,
@@ -182,6 +183,7 @@ public partial class TarReader
}
sharpCompressStream.Position = pos;
sharpCompressStream.EnsureMinimumRewindBufferSize(wrapper.MinimumRewindBufferSize);
var testStream = CreateProbeDecompressionStream(
sharpCompressStream,
wrapper.CompressionType,

View File

@@ -268,9 +268,9 @@
"net10.0": {
"Microsoft.NET.ILLink.Tasks": {
"type": "Direct",
"requested": "[10.0.2, )",
"resolved": "10.0.2",
"contentHash": "sXdDtMf2qcnbygw9OdE535c2lxSxrZP8gO4UhDJ0xiJbl1wIqXS1OTcTDFTIJPOFd6Mhcm8gPEthqWGUxBsTqw=="
"requested": "[10.0.0, )",
"resolved": "10.0.0",
"contentHash": "kICGrGYEzCNI3wPzfEXcwNHgTvlvVn9yJDhSdRK+oZQy4jvYH529u7O0xf5ocQKzOMjfS07+3z9PKRIjrFMJDA=="
},
"Microsoft.NETFramework.ReferenceAssemblies": {
"type": "Direct",
@@ -442,9 +442,9 @@
"net8.0": {
"Microsoft.NET.ILLink.Tasks": {
"type": "Direct",
"requested": "[8.0.23, )",
"resolved": "8.0.23",
"contentHash": "GqHiB1HbbODWPbY/lc5xLQH8siEEhNA0ptpJCC6X6adtAYNEzu5ZlqV3YHA3Gh7fuEwgA8XqVwMtH2KNtuQM1Q=="
"requested": "[8.0.22, )",
"resolved": "8.0.22",
"contentHash": "MhcMithKEiyyNkD2ZfbDZPmcOdi0GheGfg8saEIIEfD/fol3iHmcV8TsZkD4ZYz5gdUuoX4YtlVySUU7Sxl9SQ=="
},
"Microsoft.NETFramework.ReferenceAssemblies": {
"type": "Direct",

View File

@@ -127,4 +127,96 @@ public class SharpCompressStreamSeekTest
Assert.Equal(3, readBuffer[0]);
Assert.Equal(4, readBuffer[1]);
}
[Fact]
public void EnsureMinimumRewindBufferSize_ExpandsSmallBuffer_PreservesExistingData()
{
// Arrange: create a stream with a small initial buffer (size 10)
var ms = new MemoryStream(new byte[] { 1, 2, 3, 4, 5, 6, 7, 8 });
var nonSeekableMs = new NonSeekableStreamWrapper(ms);
var stream = SharpCompressStream.Create(nonSeekableMs, 10);
stream.StartRecording();
// Read 4 bytes — they are now in the ring buffer
var buffer = new byte[8];
stream.Read(buffer, 0, 4);
Assert.Equal(4, stream.Position);
// Rewind to verify 4 bytes are present
stream.Rewind();
// Act: expand the ring buffer to 200 bytes while data is present
stream.EnsureMinimumRewindBufferSize(200);
// Verify the data is still replayable after expansion
var readBuffer = new byte[4];
stream.Read(readBuffer, 0, 4);
Assert.Equal(1, readBuffer[0]);
Assert.Equal(2, readBuffer[1]);
Assert.Equal(3, readBuffer[2]);
Assert.Equal(4, readBuffer[3]);
}
[Fact]
public void EnsureMinimumRewindBufferSize_BufferAlreadyLarger_DoesNotShrink()
{
// Arrange: create a stream with a large initial buffer (size 200)
var ms = new MemoryStream(new byte[] { 1, 2, 3, 4, 5 });
var nonSeekableMs = new NonSeekableStreamWrapper(ms);
var stream = SharpCompressStream.Create(nonSeekableMs, 200);
stream.StartRecording();
stream.Read(new byte[5], 0, 5);
// Act: request a smaller minimum — buffer should stay at 200
stream.EnsureMinimumRewindBufferSize(50);
// Assert: buffer can still hold the 5 bytes written before expansion request
stream.Rewind();
var readBuffer = new byte[5];
stream.Read(readBuffer, 0, 5);
Assert.Equal(1, readBuffer[0]);
Assert.Equal(5, readBuffer[4]);
}
[Fact]
public void EnsureMinimumRewindBufferSize_AllowsRewindAfterLargeRead()
{
// Simulate the BZip2 scenario: small initial buffer, expand after format detection,
// then verify a large read still allows Rewind.
const int initialSize = 10;
const int expandedSize = 100;
const int largeReadSize = 80;
var data = new byte[100];
for (var i = 0; i < data.Length; i++)
{
data[i] = (byte)(i + 1);
}
var ms = new MemoryStream(data);
var nonSeekableMs = new NonSeekableStreamWrapper(ms);
var stream = SharpCompressStream.Create(nonSeekableMs, initialSize);
stream.StartRecording();
// Read 4 bytes (format detection — magic bytes)
var buffer = new byte[4];
stream.Read(buffer, 0, 4);
stream.Rewind();
// Expand the ring buffer to cover the anticipated large probe read
stream.EnsureMinimumRewindBufferSize(expandedSize);
// Read a large amount (simulating BZip2 block decompression)
var largeBuffer = new byte[largeReadSize];
stream.Read(largeBuffer, 0, largeReadSize);
// Rewind must succeed even though largeReadSize > initialSize
stream.Rewind();
// Verify data replays correctly
var verifyBuffer = new byte[largeReadSize];
stream.Read(verifyBuffer, 0, largeReadSize);
Assert.Equal(data[0], verifyBuffer[0]);
Assert.Equal(data[largeReadSize - 1], verifyBuffer[largeReadSize - 1]);
}
}

View File

@@ -2,6 +2,8 @@ using System;
using System.Collections.Generic;
using System.IO;
using SharpCompress.Common;
using SharpCompress.Compressors.BZip2;
using SharpCompress.Factories;
using SharpCompress.Readers;
using SharpCompress.Readers.Tar;
using SharpCompress.Test.Mocks;
@@ -58,6 +60,53 @@ public class TarReaderTests : ReaderTests
[Fact]
public void Tar_GZip_OldGnu_Reader() => Read("Tar.oldgnu.tar.gz", CompressionType.GZip);
[Fact]
public void Tar_BZip2_Reader_NonSeekable()
{
// Regression test for: Dynamic default RingBuffer for BZip2
// Opening a .tar.bz2 from a non-seekable stream should succeed
// because EnsureMinimumRewindBufferSize expands the ring buffer
// to hold the BZip2 block before calling IsTarFile.
using var fs = File.OpenRead(Path.Combine(TEST_ARCHIVES_PATH, "Tar.tar.bz2"));
using var nonSeekable = new ForwardOnlyStream(fs);
using var reader = ReaderFactory.OpenReader(nonSeekable);
var entryCount = 0;
while (reader.MoveToNextEntry())
{
if (!reader.Entry.IsDirectory)
{
entryCount++;
}
}
Assert.True(entryCount > 0);
}
[Fact]
public void TarWrapper_BZip2_MinimumRewindBufferSize_IsMaxBZip2BlockSize()
{
// The BZip2 TarWrapper must declare a MinimumRewindBufferSize large enough
// to hold an entire maximum-size compressed BZip2 block (9 × 100 000 bytes).
var bzip2Wrapper = Array.Find(
TarWrapper.Wrappers,
w => w.CompressionType == CompressionType.BZip2
);
Assert.NotNull(bzip2Wrapper);
Assert.Equal(BZip2Constants.baseBlockSize * 9, bzip2Wrapper.MinimumRewindBufferSize);
}
[Fact]
public void TarWrapper_Default_MinimumRewindBufferSize_Is_DefaultRewindableBufferSize()
{
// Non-BZip2 wrappers that don't specify a custom size default to
// Constants.RewindableBufferSize so existing behaviour is unchanged.
var noneWrapper = Array.Find(
TarWrapper.Wrappers,
w => w.CompressionType == CompressionType.None
);
Assert.NotNull(noneWrapper);
Assert.Equal(Common.Constants.RewindableBufferSize, noneWrapper.MinimumRewindBufferSize);
}
[Fact]
public void Tar_BZip2_Entry_Stream()
{