Compare commits

...

4 Commits

Author SHA1 Message Date
copilot-swe-agent[bot]
c994a9f845 Fix DataDescriptorStream to handle legitimate cross-boundary signatures
Previous fix prevented infinite loops but broke detection of legitimate
data descriptor signatures split across buffer boundaries.

New approach:
- Track previous rewind count in _previousSearchPosition
- Detect infinite loop when read size equals previous rewind count
  AND all bytes match the signature pattern
- In that case, treat bytes as data (not signature) to break loop
- Otherwise, allow normal rewind/match to continue for legitimate
  cross-boundary signatures

This fixes both cases:
1. Infinite loop on data containing signature-like bytes (0x50...)
2. Correct detection of actual signatures split at buffer edge

Co-authored-by: adamhathcock <527620+adamhathcock@users.noreply.github.com>
2026-01-29 08:43:36 +00:00
copilot-swe-agent[bot]
e896179f3e Add regression test for DataDescriptorStream boundary bug
Test reproduces the infinite loop scenario where:
- ZIP file contains data filled with 0x50 ('P') bytes
- Non-seekable stream triggers DataDescriptorStream usage
- Partial signature matches at buffer boundaries caused infinite rewind loop

The test verifies that with the fix in place, extraction completes
successfully without hanging. It includes safeguards to detect if the
infinite loop condition occurs (> 1000 read iterations for 100KB file).

Co-authored-by: adamhathcock <527620+adamhathcock@users.noreply.github.com>
2026-01-28 14:34:58 +00:00
copilot-swe-agent[bot]
c726a13a97 Fix DataDescriptorStream boundary bug to prevent infinite loop
When the first byte of the data descriptor signature (0x50 = 'P') appears as
the last byte of a read buffer, the reader would rewind and re-read the same
byte repeatedly, causing an infinite loop.

The fix tracks the last rewind position and prevents rewinding to the same
position twice, breaking the infinite loop while preserving correct behavior.

Fixes boundary condition where signature bytes at buffer edge cause hang.

Co-authored-by: adamhathcock <527620+adamhathcock@users.noreply.github.com>
2026-01-28 12:34:39 +00:00
copilot-swe-agent[bot]
82071af566 Initial plan 2026-01-28 12:25:49 +00:00
2 changed files with 123 additions and 0 deletions

View File

@@ -30,6 +30,7 @@ public class DataDescriptorStream : Stream, IStreamStack
private int _searchPosition;
private bool _isDisposed;
private bool _done;
private int _previousSearchPosition;
private static byte[] _dataDescriptorMarker = new byte[] { 0x50, 0x4b, 0x07, 0x08 };
private static long _dataDescriptorSize = 24;
@@ -116,6 +117,36 @@ public class DataDescriptorStream : Stream, IStreamStack
var read = _stream.Read(buffer, offset, count);
// Detect infinite loop: if we just rewound and read the exact same bytes again
// (indicated by read == _previousSearchPosition and all bytes matching),
// it means these bytes are data, not a signature. Don't rewind again.
var inInfiniteLoop =
_previousSearchPosition > 0 && read == _previousSearchPosition && read <= 3;
if (inInfiniteLoop)
{
// Verify all bytes match the pattern
var allMatch = true;
for (var i = 0; i < read && allMatch; i++)
{
if (buffer[offset + i] != _dataDescriptorMarker[i])
{
allMatch = false;
}
}
if (allMatch)
{
// This is the infinite loop condition - these bytes are data, not signature
// Reset state and return the bytes as data
_searchPosition = 0;
_previousSearchPosition = 0;
return read;
}
}
_previousSearchPosition = 0;
for (var i = 0; i < read; i++)
{
if (buffer[offset + i] == _dataDescriptorMarker[_searchPosition])
@@ -165,6 +196,7 @@ public class DataDescriptorStream : Stream, IStreamStack
{
read -= _searchPosition;
_stream.Position -= _searchPosition;
_previousSearchPosition = _searchPosition;
_searchPosition = 0;
}

View File

@@ -530,4 +530,95 @@ public class ZipReaderTests : ReaderTests
// Should iterate through all entries, not just the first one
Assert.True(count > 1, $"Expected more than 1 entry, but got {count}");
}
[Fact]
public void DataDescriptorStream_BoundaryBug_ReproduceInfiniteLoop()
{
// Regression test for DataDescriptorStream boundary bug
// Issue: When the first byte of the data descriptor signature (0x50 = 'P')
// appears at the end of a read buffer, the reader would get stuck in an
// infinite loop, causing extraction to fail.
//
// This test reproduces the exact scenario described in the issue:
// - Streaming ZIP reader (non-seekable stream)
// - DataDescriptorStream (triggered by CompressionType.None + non-seekable)
// - Payload filled with 0x50 ('P') bytes
// - Payload size that causes boundary condition
// Create a payload filled with 0x50 bytes that will trigger the boundary bug
// The bug occurs when partial signature matches fall on buffer boundaries
const int payloadSize = 100000; // Large enough to span multiple read buffers
var payload = new byte[payloadSize];
for (var i = 0; i < payloadSize; i++)
{
payload[i] = 0x50; // Fill with 'P' bytes (0x50 = first byte of PK signature)
}
using var memory = new MemoryStream();
// Use non-seekable stream to force data descriptor mode
// This triggers the use of DataDescriptorStream
Stream writeStream = new TestStream(memory, read: true, write: true, seek: false);
// Write ZIP with no compression (this ensures DataDescriptorStream is used)
using (
var zipWriter = WriterFactory.Open(writeStream, ArchiveType.Zip, CompressionType.None)
)
{
zipWriter.Write("test.txt", new MemoryStream(payload));
}
// Read back the ZIP
var zipBytes = memory.ToArray();
var readStream = new MemoryStream(zipBytes);
using var reader = ZipReader.Open(readStream);
var extracted = false;
var readIterations = 0;
const int maxIterations = 1000; // Safety limit to detect infinite loops
while (reader.MoveToNextEntry())
{
using var entryStream = reader.OpenEntryStream();
var outputStream = new MemoryStream();
var buffer = new byte[8192];
int bytesRead;
while ((bytesRead = entryStream.Read(buffer, 0, buffer.Length)) > 0)
{
outputStream.Write(buffer, 0, bytesRead);
readIterations++;
// Detect infinite loop - the bug causes read to return very small amounts repeatedly
if (readIterations > maxIterations)
{
Assert.Fail(
$"Detected infinite loop: Read called {readIterations} times, "
+ $"extracted {outputStream.Length} of {payloadSize} bytes. "
+ "This indicates the DataDescriptorStream boundary bug is present."
);
}
}
// Verify we extracted all the data correctly
var extractedData = outputStream.ToArray();
Assert.Equal(payloadSize, extractedData.Length);
// Verify content is correct (all 0x50 bytes)
for (var i = 0; i < payloadSize; i++)
{
if (extractedData[i] != 0x50)
{
Assert.Fail(
$"Data corruption at byte {i}: expected 0x50, got 0x{extractedData[i]:X2}"
);
}
}
extracted = true;
}
Assert.True(extracted, "Failed to extract the entry");
}
}