From ca52cec0b3d04b97fdd2bb003fe4167a4056dedf Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 31 Mar 2026 09:01:21 +0000 Subject: [PATCH] Dynamic RingBuffer for BZip2: expand buffer after format detection Agent-Logs-Url: https://github.com/adamhathcock/sharpcompress/sessions/2d1412f8-34f8-4a32-8802-e52770342940 Co-authored-by: adamhathcock <527620+adamhathcock@users.noreply.github.com> --- src/SharpCompress/Factories/TarFactory.cs | 4 + src/SharpCompress/Factories/TarWrapper.cs | 18 +++- src/SharpCompress/IO/SharpCompressStream.cs | 31 +++++++ .../Readers/Tar/TarReader.Factory.cs | 2 + src/SharpCompress/packages.lock.json | 12 +-- .../Streams/SharpCompressStreamSeekTest.cs | 92 +++++++++++++++++++ .../SharpCompress.Test/Tar/TarReaderTests.cs | 49 ++++++++++ 7 files changed, 200 insertions(+), 8 deletions(-) diff --git a/src/SharpCompress/Factories/TarFactory.cs b/src/SharpCompress/Factories/TarFactory.cs index 28380587..ee8d62a0 100644 --- a/src/SharpCompress/Factories/TarFactory.cs +++ b/src/SharpCompress/Factories/TarFactory.cs @@ -59,6 +59,7 @@ public class TarFactory if (wrapper.IsMatch(sharpCompressStream)) { sharpCompressStream.Rewind(); + sharpCompressStream.EnsureMinimumRewindBufferSize(wrapper.MinimumRewindBufferSize); var decompressedStream = CreateProbeDecompressionStream( sharpCompressStream, wrapper.CompressionType, @@ -95,6 +96,7 @@ public class TarFactory ) { sharpCompressStream.Rewind(); + sharpCompressStream.EnsureMinimumRewindBufferSize(wrapper.MinimumRewindBufferSize); var decompressedStream = await CreateProbeDecompressionStreamAsync( sharpCompressStream, wrapper.CompressionType, @@ -326,6 +328,7 @@ public class TarFactory if (wrapper.IsMatch(sharpCompressStream)) { sharpCompressStream.Rewind(); + sharpCompressStream.EnsureMinimumRewindBufferSize(wrapper.MinimumRewindBufferSize); var decompressedStream = CreateProbeDecompressionStream( sharpCompressStream, wrapper.CompressionType, @@ -363,6 +366,7 @@ public class TarFactory ) { sharpCompressStream.Rewind(); + sharpCompressStream.EnsureMinimumRewindBufferSize(wrapper.MinimumRewindBufferSize); var decompressedStream = await CreateProbeDecompressionStreamAsync( sharpCompressStream, wrapper.CompressionType, diff --git a/src/SharpCompress/Factories/TarWrapper.cs b/src/SharpCompress/Factories/TarWrapper.cs index fee9707e..85d5d866 100644 --- a/src/SharpCompress/Factories/TarWrapper.cs +++ b/src/SharpCompress/Factories/TarWrapper.cs @@ -22,7 +22,8 @@ public class TarWrapper( Func createStream, Func> createStreamAsync, IEnumerable knownExtensions, - bool wrapInSharpCompressStream = true + bool wrapInSharpCompressStream = true, + int? minimumRewindBufferSize = null ) { public CompressionType CompressionType { get; } = type; @@ -30,6 +31,15 @@ public class TarWrapper( public Func> IsMatchAsync { get; } = canHandleAsync; public bool WrapInSharpCompressStream { get; } = wrapInSharpCompressStream; + /// + /// The minimum ring buffer size required to detect and probe this format. + /// Format detection reads a decompressed block to check the tar header, so + /// the ring buffer must be large enough to hold the compressed bytes consumed + /// during that probe. Defaults to . + /// + public int MinimumRewindBufferSize { get; } = + minimumRewindBufferSize ?? Common.Constants.RewindableBufferSize; + public Func CreateStream { get; } = createStream; public Func> CreateStreamAsync { get; } = createStreamAsync; @@ -57,7 +67,11 @@ public class TarWrapper( await BZip2Stream .CreateAsync(stream, CompressionMode.Decompress, false) .ConfigureAwait(false), - ["tar.bz2", "tb2", "tbz", "tbz2", "tz2"] + ["tar.bz2", "tb2", "tbz", "tbz2", "tz2"], + // BZip2 decompresses in whole blocks; the compressed size of the first block + // can be close to the uncompressed maximum (9 × 100 000 = 900 000 bytes). + // The ring buffer must hold all compressed bytes read during format detection. + minimumRewindBufferSize: BZip2Constants.baseBlockSize * 9 ), new( CompressionType.GZip, diff --git a/src/SharpCompress/IO/SharpCompressStream.cs b/src/SharpCompress/IO/SharpCompressStream.cs index 53e2b864..d5be2cb5 100644 --- a/src/SharpCompress/IO/SharpCompressStream.cs +++ b/src/SharpCompress/IO/SharpCompressStream.cs @@ -202,6 +202,37 @@ public partial class SharpCompressStream : Stream, IStreamStack _isRecording = true; } + /// + /// Ensures the ring buffer has at least the specified minimum capacity. + /// If the current buffer is smaller, it is replaced with a larger one while + /// preserving all existing buffered data. Call this after detecting a compression + /// format that requires a larger buffer for format detection (e.g. BZip2 whose + /// first block can be up to 900 KB of compressed data). + /// + /// Minimum required ring buffer capacity in bytes. + internal void EnsureMinimumRewindBufferSize(int minSize) + { + if (_isPassthrough || _ringBuffer is null || _ringBuffer.Capacity >= minSize) + { + return; + } + + // Create a new larger buffer with the required capacity + var newBuffer = new RingBuffer(minSize); + + // Preserve existing buffered data in the new buffer + var existingLength = _ringBuffer.Length; + if (existingLength > 0) + { + var existingData = new byte[existingLength]; + _ringBuffer.ReadFromEnd(existingLength, existingData, 0, existingLength); + newBuffer.Write(existingData, 0, existingLength); + } + + _ringBuffer.Dispose(); + _ringBuffer = newBuffer; + } + public override bool CanRead => true; public override bool CanSeek => !_isPassthrough || stream.CanSeek; diff --git a/src/SharpCompress/Readers/Tar/TarReader.Factory.cs b/src/SharpCompress/Readers/Tar/TarReader.Factory.cs index f94a89dc..3bd98565 100644 --- a/src/SharpCompress/Readers/Tar/TarReader.Factory.cs +++ b/src/SharpCompress/Readers/Tar/TarReader.Factory.cs @@ -107,6 +107,7 @@ public partial class TarReader } sharpCompressStream.Position = pos; + sharpCompressStream.EnsureMinimumRewindBufferSize(wrapper.MinimumRewindBufferSize); var testStream = await CreateProbeDecompressionStreamAsync( sharpCompressStream, wrapper.CompressionType, @@ -182,6 +183,7 @@ public partial class TarReader } sharpCompressStream.Position = pos; + sharpCompressStream.EnsureMinimumRewindBufferSize(wrapper.MinimumRewindBufferSize); var testStream = CreateProbeDecompressionStream( sharpCompressStream, wrapper.CompressionType, diff --git a/src/SharpCompress/packages.lock.json b/src/SharpCompress/packages.lock.json index b44de28b..5059aafe 100644 --- a/src/SharpCompress/packages.lock.json +++ b/src/SharpCompress/packages.lock.json @@ -268,9 +268,9 @@ "net10.0": { "Microsoft.NET.ILLink.Tasks": { "type": "Direct", - "requested": "[10.0.2, )", - "resolved": "10.0.2", - "contentHash": "sXdDtMf2qcnbygw9OdE535c2lxSxrZP8gO4UhDJ0xiJbl1wIqXS1OTcTDFTIJPOFd6Mhcm8gPEthqWGUxBsTqw==" + "requested": "[10.0.0, )", + "resolved": "10.0.0", + "contentHash": "kICGrGYEzCNI3wPzfEXcwNHgTvlvVn9yJDhSdRK+oZQy4jvYH529u7O0xf5ocQKzOMjfS07+3z9PKRIjrFMJDA==" }, "Microsoft.NETFramework.ReferenceAssemblies": { "type": "Direct", @@ -442,9 +442,9 @@ "net8.0": { "Microsoft.NET.ILLink.Tasks": { "type": "Direct", - "requested": "[8.0.23, )", - "resolved": "8.0.23", - "contentHash": "GqHiB1HbbODWPbY/lc5xLQH8siEEhNA0ptpJCC6X6adtAYNEzu5ZlqV3YHA3Gh7fuEwgA8XqVwMtH2KNtuQM1Q==" + "requested": "[8.0.22, )", + "resolved": "8.0.22", + "contentHash": "MhcMithKEiyyNkD2ZfbDZPmcOdi0GheGfg8saEIIEfD/fol3iHmcV8TsZkD4ZYz5gdUuoX4YtlVySUU7Sxl9SQ==" }, "Microsoft.NETFramework.ReferenceAssemblies": { "type": "Direct", diff --git a/tests/SharpCompress.Test/Streams/SharpCompressStreamSeekTest.cs b/tests/SharpCompress.Test/Streams/SharpCompressStreamSeekTest.cs index 05d3dc21..cc3a6c6d 100644 --- a/tests/SharpCompress.Test/Streams/SharpCompressStreamSeekTest.cs +++ b/tests/SharpCompress.Test/Streams/SharpCompressStreamSeekTest.cs @@ -127,4 +127,96 @@ public class SharpCompressStreamSeekTest Assert.Equal(3, readBuffer[0]); Assert.Equal(4, readBuffer[1]); } + + [Fact] + public void EnsureMinimumRewindBufferSize_ExpandsSmallBuffer_PreservesExistingData() + { + // Arrange: create a stream with a small initial buffer (size 10) + var ms = new MemoryStream(new byte[] { 1, 2, 3, 4, 5, 6, 7, 8 }); + var nonSeekableMs = new NonSeekableStreamWrapper(ms); + var stream = SharpCompressStream.Create(nonSeekableMs, 10); + stream.StartRecording(); + + // Read 4 bytes — they are now in the ring buffer + var buffer = new byte[8]; + stream.Read(buffer, 0, 4); + Assert.Equal(4, stream.Position); + + // Rewind to verify 4 bytes are present + stream.Rewind(); + + // Act: expand the ring buffer to 200 bytes while data is present + stream.EnsureMinimumRewindBufferSize(200); + + // Verify the data is still replayable after expansion + var readBuffer = new byte[4]; + stream.Read(readBuffer, 0, 4); + Assert.Equal(1, readBuffer[0]); + Assert.Equal(2, readBuffer[1]); + Assert.Equal(3, readBuffer[2]); + Assert.Equal(4, readBuffer[3]); + } + + [Fact] + public void EnsureMinimumRewindBufferSize_BufferAlreadyLarger_DoesNotShrink() + { + // Arrange: create a stream with a large initial buffer (size 200) + var ms = new MemoryStream(new byte[] { 1, 2, 3, 4, 5 }); + var nonSeekableMs = new NonSeekableStreamWrapper(ms); + var stream = SharpCompressStream.Create(nonSeekableMs, 200); + stream.StartRecording(); + stream.Read(new byte[5], 0, 5); + + // Act: request a smaller minimum — buffer should stay at 200 + stream.EnsureMinimumRewindBufferSize(50); + + // Assert: buffer can still hold the 5 bytes written before expansion request + stream.Rewind(); + var readBuffer = new byte[5]; + stream.Read(readBuffer, 0, 5); + Assert.Equal(1, readBuffer[0]); + Assert.Equal(5, readBuffer[4]); + } + + [Fact] + public void EnsureMinimumRewindBufferSize_AllowsRewindAfterLargeRead() + { + // Simulate the BZip2 scenario: small initial buffer, expand after format detection, + // then verify a large read still allows Rewind. + const int initialSize = 10; + const int expandedSize = 100; + const int largeReadSize = 80; + + var data = new byte[100]; + for (var i = 0; i < data.Length; i++) + { + data[i] = (byte)(i + 1); + } + + var ms = new MemoryStream(data); + var nonSeekableMs = new NonSeekableStreamWrapper(ms); + var stream = SharpCompressStream.Create(nonSeekableMs, initialSize); + stream.StartRecording(); + + // Read 4 bytes (format detection — magic bytes) + var buffer = new byte[4]; + stream.Read(buffer, 0, 4); + stream.Rewind(); + + // Expand the ring buffer to cover the anticipated large probe read + stream.EnsureMinimumRewindBufferSize(expandedSize); + + // Read a large amount (simulating BZip2 block decompression) + var largeBuffer = new byte[largeReadSize]; + stream.Read(largeBuffer, 0, largeReadSize); + + // Rewind must succeed even though largeReadSize > initialSize + stream.Rewind(); + + // Verify data replays correctly + var verifyBuffer = new byte[largeReadSize]; + stream.Read(verifyBuffer, 0, largeReadSize); + Assert.Equal(data[0], verifyBuffer[0]); + Assert.Equal(data[largeReadSize - 1], verifyBuffer[largeReadSize - 1]); + } } diff --git a/tests/SharpCompress.Test/Tar/TarReaderTests.cs b/tests/SharpCompress.Test/Tar/TarReaderTests.cs index 21c62f83..8cc0b6a3 100644 --- a/tests/SharpCompress.Test/Tar/TarReaderTests.cs +++ b/tests/SharpCompress.Test/Tar/TarReaderTests.cs @@ -2,6 +2,8 @@ using System; using System.Collections.Generic; using System.IO; using SharpCompress.Common; +using SharpCompress.Compressors.BZip2; +using SharpCompress.Factories; using SharpCompress.Readers; using SharpCompress.Readers.Tar; using SharpCompress.Test.Mocks; @@ -58,6 +60,53 @@ public class TarReaderTests : ReaderTests [Fact] public void Tar_GZip_OldGnu_Reader() => Read("Tar.oldgnu.tar.gz", CompressionType.GZip); + [Fact] + public void Tar_BZip2_Reader_NonSeekable() + { + // Regression test for: Dynamic default RingBuffer for BZip2 + // Opening a .tar.bz2 from a non-seekable stream should succeed + // because EnsureMinimumRewindBufferSize expands the ring buffer + // to hold the BZip2 block before calling IsTarFile. + using var fs = File.OpenRead(Path.Combine(TEST_ARCHIVES_PATH, "Tar.tar.bz2")); + using var nonSeekable = new ForwardOnlyStream(fs); + using var reader = ReaderFactory.OpenReader(nonSeekable); + var entryCount = 0; + while (reader.MoveToNextEntry()) + { + if (!reader.Entry.IsDirectory) + { + entryCount++; + } + } + Assert.True(entryCount > 0); + } + + [Fact] + public void TarWrapper_BZip2_MinimumRewindBufferSize_IsMaxBZip2BlockSize() + { + // The BZip2 TarWrapper must declare a MinimumRewindBufferSize large enough + // to hold an entire maximum-size compressed BZip2 block (9 × 100 000 bytes). + var bzip2Wrapper = Array.Find( + TarWrapper.Wrappers, + w => w.CompressionType == CompressionType.BZip2 + ); + Assert.NotNull(bzip2Wrapper); + Assert.Equal(BZip2Constants.baseBlockSize * 9, bzip2Wrapper.MinimumRewindBufferSize); + } + + [Fact] + public void TarWrapper_Default_MinimumRewindBufferSize_Is_DefaultRewindableBufferSize() + { + // Non-BZip2 wrappers that don't specify a custom size default to + // Constants.RewindableBufferSize so existing behaviour is unchanged. + var noneWrapper = Array.Find( + TarWrapper.Wrappers, + w => w.CompressionType == CompressionType.None + ); + Assert.NotNull(noneWrapper); + Assert.Equal(Common.Constants.RewindableBufferSize, noneWrapper.MinimumRewindBufferSize); + } + [Fact] public void Tar_BZip2_Entry_Stream() {