mirror of
https://github.com/adamhathcock/sharpcompress.git
synced 2026-04-17 19:43:36 +00:00
Dynamic RingBuffer for BZip2: expand buffer after format detection
Agent-Logs-Url: https://github.com/adamhathcock/sharpcompress/sessions/2d1412f8-34f8-4a32-8802-e52770342940 Co-authored-by: adamhathcock <527620+adamhathcock@users.noreply.github.com>
This commit is contained in:
committed by
GitHub
parent
ddcbb3e474
commit
ca52cec0b3
@@ -59,6 +59,7 @@ public class TarFactory
|
||||
if (wrapper.IsMatch(sharpCompressStream))
|
||||
{
|
||||
sharpCompressStream.Rewind();
|
||||
sharpCompressStream.EnsureMinimumRewindBufferSize(wrapper.MinimumRewindBufferSize);
|
||||
var decompressedStream = CreateProbeDecompressionStream(
|
||||
sharpCompressStream,
|
||||
wrapper.CompressionType,
|
||||
@@ -95,6 +96,7 @@ public class TarFactory
|
||||
)
|
||||
{
|
||||
sharpCompressStream.Rewind();
|
||||
sharpCompressStream.EnsureMinimumRewindBufferSize(wrapper.MinimumRewindBufferSize);
|
||||
var decompressedStream = await CreateProbeDecompressionStreamAsync(
|
||||
sharpCompressStream,
|
||||
wrapper.CompressionType,
|
||||
@@ -326,6 +328,7 @@ public class TarFactory
|
||||
if (wrapper.IsMatch(sharpCompressStream))
|
||||
{
|
||||
sharpCompressStream.Rewind();
|
||||
sharpCompressStream.EnsureMinimumRewindBufferSize(wrapper.MinimumRewindBufferSize);
|
||||
var decompressedStream = CreateProbeDecompressionStream(
|
||||
sharpCompressStream,
|
||||
wrapper.CompressionType,
|
||||
@@ -363,6 +366,7 @@ public class TarFactory
|
||||
)
|
||||
{
|
||||
sharpCompressStream.Rewind();
|
||||
sharpCompressStream.EnsureMinimumRewindBufferSize(wrapper.MinimumRewindBufferSize);
|
||||
var decompressedStream = await CreateProbeDecompressionStreamAsync(
|
||||
sharpCompressStream,
|
||||
wrapper.CompressionType,
|
||||
|
||||
@@ -22,7 +22,8 @@ public class TarWrapper(
|
||||
Func<Stream, Stream> createStream,
|
||||
Func<Stream, CancellationToken, ValueTask<Stream>> createStreamAsync,
|
||||
IEnumerable<string> knownExtensions,
|
||||
bool wrapInSharpCompressStream = true
|
||||
bool wrapInSharpCompressStream = true,
|
||||
int? minimumRewindBufferSize = null
|
||||
)
|
||||
{
|
||||
public CompressionType CompressionType { get; } = type;
|
||||
@@ -30,6 +31,15 @@ public class TarWrapper(
|
||||
public Func<Stream, CancellationToken, ValueTask<bool>> IsMatchAsync { get; } = canHandleAsync;
|
||||
public bool WrapInSharpCompressStream { get; } = wrapInSharpCompressStream;
|
||||
|
||||
/// <summary>
|
||||
/// The minimum ring buffer size required to detect and probe this format.
|
||||
/// Format detection reads a decompressed block to check the tar header, so
|
||||
/// the ring buffer must be large enough to hold the compressed bytes consumed
|
||||
/// during that probe. Defaults to <see cref="Common.Constants.RewindableBufferSize"/>.
|
||||
/// </summary>
|
||||
public int MinimumRewindBufferSize { get; } =
|
||||
minimumRewindBufferSize ?? Common.Constants.RewindableBufferSize;
|
||||
|
||||
public Func<Stream, Stream> CreateStream { get; } = createStream;
|
||||
public Func<Stream, CancellationToken, ValueTask<Stream>> CreateStreamAsync { get; } =
|
||||
createStreamAsync;
|
||||
@@ -57,7 +67,11 @@ public class TarWrapper(
|
||||
await BZip2Stream
|
||||
.CreateAsync(stream, CompressionMode.Decompress, false)
|
||||
.ConfigureAwait(false),
|
||||
["tar.bz2", "tb2", "tbz", "tbz2", "tz2"]
|
||||
["tar.bz2", "tb2", "tbz", "tbz2", "tz2"],
|
||||
// BZip2 decompresses in whole blocks; the compressed size of the first block
|
||||
// can be close to the uncompressed maximum (9 × 100 000 = 900 000 bytes).
|
||||
// The ring buffer must hold all compressed bytes read during format detection.
|
||||
minimumRewindBufferSize: BZip2Constants.baseBlockSize * 9
|
||||
),
|
||||
new(
|
||||
CompressionType.GZip,
|
||||
|
||||
@@ -202,6 +202,37 @@ public partial class SharpCompressStream : Stream, IStreamStack
|
||||
_isRecording = true;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Ensures the ring buffer has at least the specified minimum capacity.
|
||||
/// If the current buffer is smaller, it is replaced with a larger one while
|
||||
/// preserving all existing buffered data. Call this after detecting a compression
|
||||
/// format that requires a larger buffer for format detection (e.g. BZip2 whose
|
||||
/// first block can be up to 900 KB of compressed data).
|
||||
/// </summary>
|
||||
/// <param name="minSize">Minimum required ring buffer capacity in bytes.</param>
|
||||
internal void EnsureMinimumRewindBufferSize(int minSize)
|
||||
{
|
||||
if (_isPassthrough || _ringBuffer is null || _ringBuffer.Capacity >= minSize)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
// Create a new larger buffer with the required capacity
|
||||
var newBuffer = new RingBuffer(minSize);
|
||||
|
||||
// Preserve existing buffered data in the new buffer
|
||||
var existingLength = _ringBuffer.Length;
|
||||
if (existingLength > 0)
|
||||
{
|
||||
var existingData = new byte[existingLength];
|
||||
_ringBuffer.ReadFromEnd(existingLength, existingData, 0, existingLength);
|
||||
newBuffer.Write(existingData, 0, existingLength);
|
||||
}
|
||||
|
||||
_ringBuffer.Dispose();
|
||||
_ringBuffer = newBuffer;
|
||||
}
|
||||
|
||||
public override bool CanRead => true;
|
||||
|
||||
public override bool CanSeek => !_isPassthrough || stream.CanSeek;
|
||||
|
||||
@@ -107,6 +107,7 @@ public partial class TarReader
|
||||
}
|
||||
|
||||
sharpCompressStream.Position = pos;
|
||||
sharpCompressStream.EnsureMinimumRewindBufferSize(wrapper.MinimumRewindBufferSize);
|
||||
var testStream = await CreateProbeDecompressionStreamAsync(
|
||||
sharpCompressStream,
|
||||
wrapper.CompressionType,
|
||||
@@ -182,6 +183,7 @@ public partial class TarReader
|
||||
}
|
||||
|
||||
sharpCompressStream.Position = pos;
|
||||
sharpCompressStream.EnsureMinimumRewindBufferSize(wrapper.MinimumRewindBufferSize);
|
||||
var testStream = CreateProbeDecompressionStream(
|
||||
sharpCompressStream,
|
||||
wrapper.CompressionType,
|
||||
|
||||
@@ -268,9 +268,9 @@
|
||||
"net10.0": {
|
||||
"Microsoft.NET.ILLink.Tasks": {
|
||||
"type": "Direct",
|
||||
"requested": "[10.0.2, )",
|
||||
"resolved": "10.0.2",
|
||||
"contentHash": "sXdDtMf2qcnbygw9OdE535c2lxSxrZP8gO4UhDJ0xiJbl1wIqXS1OTcTDFTIJPOFd6Mhcm8gPEthqWGUxBsTqw=="
|
||||
"requested": "[10.0.0, )",
|
||||
"resolved": "10.0.0",
|
||||
"contentHash": "kICGrGYEzCNI3wPzfEXcwNHgTvlvVn9yJDhSdRK+oZQy4jvYH529u7O0xf5ocQKzOMjfS07+3z9PKRIjrFMJDA=="
|
||||
},
|
||||
"Microsoft.NETFramework.ReferenceAssemblies": {
|
||||
"type": "Direct",
|
||||
@@ -442,9 +442,9 @@
|
||||
"net8.0": {
|
||||
"Microsoft.NET.ILLink.Tasks": {
|
||||
"type": "Direct",
|
||||
"requested": "[8.0.23, )",
|
||||
"resolved": "8.0.23",
|
||||
"contentHash": "GqHiB1HbbODWPbY/lc5xLQH8siEEhNA0ptpJCC6X6adtAYNEzu5ZlqV3YHA3Gh7fuEwgA8XqVwMtH2KNtuQM1Q=="
|
||||
"requested": "[8.0.22, )",
|
||||
"resolved": "8.0.22",
|
||||
"contentHash": "MhcMithKEiyyNkD2ZfbDZPmcOdi0GheGfg8saEIIEfD/fol3iHmcV8TsZkD4ZYz5gdUuoX4YtlVySUU7Sxl9SQ=="
|
||||
},
|
||||
"Microsoft.NETFramework.ReferenceAssemblies": {
|
||||
"type": "Direct",
|
||||
|
||||
@@ -127,4 +127,96 @@ public class SharpCompressStreamSeekTest
|
||||
Assert.Equal(3, readBuffer[0]);
|
||||
Assert.Equal(4, readBuffer[1]);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void EnsureMinimumRewindBufferSize_ExpandsSmallBuffer_PreservesExistingData()
|
||||
{
|
||||
// Arrange: create a stream with a small initial buffer (size 10)
|
||||
var ms = new MemoryStream(new byte[] { 1, 2, 3, 4, 5, 6, 7, 8 });
|
||||
var nonSeekableMs = new NonSeekableStreamWrapper(ms);
|
||||
var stream = SharpCompressStream.Create(nonSeekableMs, 10);
|
||||
stream.StartRecording();
|
||||
|
||||
// Read 4 bytes — they are now in the ring buffer
|
||||
var buffer = new byte[8];
|
||||
stream.Read(buffer, 0, 4);
|
||||
Assert.Equal(4, stream.Position);
|
||||
|
||||
// Rewind to verify 4 bytes are present
|
||||
stream.Rewind();
|
||||
|
||||
// Act: expand the ring buffer to 200 bytes while data is present
|
||||
stream.EnsureMinimumRewindBufferSize(200);
|
||||
|
||||
// Verify the data is still replayable after expansion
|
||||
var readBuffer = new byte[4];
|
||||
stream.Read(readBuffer, 0, 4);
|
||||
Assert.Equal(1, readBuffer[0]);
|
||||
Assert.Equal(2, readBuffer[1]);
|
||||
Assert.Equal(3, readBuffer[2]);
|
||||
Assert.Equal(4, readBuffer[3]);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void EnsureMinimumRewindBufferSize_BufferAlreadyLarger_DoesNotShrink()
|
||||
{
|
||||
// Arrange: create a stream with a large initial buffer (size 200)
|
||||
var ms = new MemoryStream(new byte[] { 1, 2, 3, 4, 5 });
|
||||
var nonSeekableMs = new NonSeekableStreamWrapper(ms);
|
||||
var stream = SharpCompressStream.Create(nonSeekableMs, 200);
|
||||
stream.StartRecording();
|
||||
stream.Read(new byte[5], 0, 5);
|
||||
|
||||
// Act: request a smaller minimum — buffer should stay at 200
|
||||
stream.EnsureMinimumRewindBufferSize(50);
|
||||
|
||||
// Assert: buffer can still hold the 5 bytes written before expansion request
|
||||
stream.Rewind();
|
||||
var readBuffer = new byte[5];
|
||||
stream.Read(readBuffer, 0, 5);
|
||||
Assert.Equal(1, readBuffer[0]);
|
||||
Assert.Equal(5, readBuffer[4]);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void EnsureMinimumRewindBufferSize_AllowsRewindAfterLargeRead()
|
||||
{
|
||||
// Simulate the BZip2 scenario: small initial buffer, expand after format detection,
|
||||
// then verify a large read still allows Rewind.
|
||||
const int initialSize = 10;
|
||||
const int expandedSize = 100;
|
||||
const int largeReadSize = 80;
|
||||
|
||||
var data = new byte[100];
|
||||
for (var i = 0; i < data.Length; i++)
|
||||
{
|
||||
data[i] = (byte)(i + 1);
|
||||
}
|
||||
|
||||
var ms = new MemoryStream(data);
|
||||
var nonSeekableMs = new NonSeekableStreamWrapper(ms);
|
||||
var stream = SharpCompressStream.Create(nonSeekableMs, initialSize);
|
||||
stream.StartRecording();
|
||||
|
||||
// Read 4 bytes (format detection — magic bytes)
|
||||
var buffer = new byte[4];
|
||||
stream.Read(buffer, 0, 4);
|
||||
stream.Rewind();
|
||||
|
||||
// Expand the ring buffer to cover the anticipated large probe read
|
||||
stream.EnsureMinimumRewindBufferSize(expandedSize);
|
||||
|
||||
// Read a large amount (simulating BZip2 block decompression)
|
||||
var largeBuffer = new byte[largeReadSize];
|
||||
stream.Read(largeBuffer, 0, largeReadSize);
|
||||
|
||||
// Rewind must succeed even though largeReadSize > initialSize
|
||||
stream.Rewind();
|
||||
|
||||
// Verify data replays correctly
|
||||
var verifyBuffer = new byte[largeReadSize];
|
||||
stream.Read(verifyBuffer, 0, largeReadSize);
|
||||
Assert.Equal(data[0], verifyBuffer[0]);
|
||||
Assert.Equal(data[largeReadSize - 1], verifyBuffer[largeReadSize - 1]);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,6 +2,8 @@ using System;
|
||||
using System.Collections.Generic;
|
||||
using System.IO;
|
||||
using SharpCompress.Common;
|
||||
using SharpCompress.Compressors.BZip2;
|
||||
using SharpCompress.Factories;
|
||||
using SharpCompress.Readers;
|
||||
using SharpCompress.Readers.Tar;
|
||||
using SharpCompress.Test.Mocks;
|
||||
@@ -58,6 +60,53 @@ public class TarReaderTests : ReaderTests
|
||||
[Fact]
|
||||
public void Tar_GZip_OldGnu_Reader() => Read("Tar.oldgnu.tar.gz", CompressionType.GZip);
|
||||
|
||||
[Fact]
|
||||
public void Tar_BZip2_Reader_NonSeekable()
|
||||
{
|
||||
// Regression test for: Dynamic default RingBuffer for BZip2
|
||||
// Opening a .tar.bz2 from a non-seekable stream should succeed
|
||||
// because EnsureMinimumRewindBufferSize expands the ring buffer
|
||||
// to hold the BZip2 block before calling IsTarFile.
|
||||
using var fs = File.OpenRead(Path.Combine(TEST_ARCHIVES_PATH, "Tar.tar.bz2"));
|
||||
using var nonSeekable = new ForwardOnlyStream(fs);
|
||||
using var reader = ReaderFactory.OpenReader(nonSeekable);
|
||||
var entryCount = 0;
|
||||
while (reader.MoveToNextEntry())
|
||||
{
|
||||
if (!reader.Entry.IsDirectory)
|
||||
{
|
||||
entryCount++;
|
||||
}
|
||||
}
|
||||
Assert.True(entryCount > 0);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void TarWrapper_BZip2_MinimumRewindBufferSize_IsMaxBZip2BlockSize()
|
||||
{
|
||||
// The BZip2 TarWrapper must declare a MinimumRewindBufferSize large enough
|
||||
// to hold an entire maximum-size compressed BZip2 block (9 × 100 000 bytes).
|
||||
var bzip2Wrapper = Array.Find(
|
||||
TarWrapper.Wrappers,
|
||||
w => w.CompressionType == CompressionType.BZip2
|
||||
);
|
||||
Assert.NotNull(bzip2Wrapper);
|
||||
Assert.Equal(BZip2Constants.baseBlockSize * 9, bzip2Wrapper.MinimumRewindBufferSize);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void TarWrapper_Default_MinimumRewindBufferSize_Is_DefaultRewindableBufferSize()
|
||||
{
|
||||
// Non-BZip2 wrappers that don't specify a custom size default to
|
||||
// Constants.RewindableBufferSize so existing behaviour is unchanged.
|
||||
var noneWrapper = Array.Find(
|
||||
TarWrapper.Wrappers,
|
||||
w => w.CompressionType == CompressionType.None
|
||||
);
|
||||
Assert.NotNull(noneWrapper);
|
||||
Assert.Equal(Common.Constants.RewindableBufferSize, noneWrapper.MinimumRewindBufferSize);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Tar_BZip2_Entry_Stream()
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user