LZW async

This commit is contained in:
Adam Hathcock
2026-01-20 12:56:13 +00:00
parent b26d38b7e4
commit 3987733079
2 changed files with 484 additions and 0 deletions

View File

@@ -410,6 +410,244 @@ namespace SharpCompress.Compressors.Lzw
return offset - start;
}
/// <summary>
/// Reads decompressed data asynchronously into the provided buffer byte array
/// </summary>
/// <param name="buffer">The array to read and decompress data into</param>
/// <param name="offset">The offset indicating where the data should be placed</param>
/// <param name="count">The number of bytes to decompress</param>
/// <param name="cancellationToken">Cancellation token</param>
/// <returns>The number of bytes read. Zero signals the end of stream</returns>
public override async Task<int> ReadAsync(
byte[] buffer,
int offset,
int count,
CancellationToken cancellationToken
)
{
if (!headerParsed)
await ParseHeaderAsync(cancellationToken).ConfigureAwait(false);
if (eof)
return 0;
int start = offset;
int[] lTabPrefix = tabPrefix;
byte[] lTabSuffix = tabSuffix;
byte[] lStack = stack;
int lNBits = nBits;
int lMaxCode = maxCode;
int lMaxMaxCode = maxMaxCode;
int lBitMask = bitMask;
int lOldCode = oldCode;
byte lFinChar = finChar;
int lStackP = stackP;
int lFreeEnt = freeEnt;
byte[] lData = data;
int lBitPos = bitPos;
int sSize = lStack.Length - lStackP;
if (sSize > 0)
{
int num = (sSize >= count) ? count : sSize;
Array.Copy(lStack, lStackP, buffer, offset, num);
offset += num;
count -= num;
lStackP += num;
}
if (count == 0)
{
stackP = lStackP;
return offset - start;
}
MainLoop:
do
{
if (end < EXTRA)
{
await FillAsync(cancellationToken).ConfigureAwait(false);
}
int bitIn = (got > 0) ? (end - end % lNBits) << 3 : (end << 3) - (lNBits - 1);
while (lBitPos < bitIn)
{
if (count == 0)
{
nBits = lNBits;
maxCode = lMaxCode;
maxMaxCode = lMaxMaxCode;
bitMask = lBitMask;
oldCode = lOldCode;
finChar = lFinChar;
stackP = lStackP;
freeEnt = lFreeEnt;
bitPos = lBitPos;
return offset - start;
}
if (lFreeEnt > lMaxCode)
{
int nBytes = lNBits << 3;
lBitPos = (lBitPos - 1) + nBytes - (lBitPos - 1 + nBytes) % nBytes;
lNBits++;
lMaxCode = (lNBits == maxBits) ? lMaxMaxCode : (1 << lNBits) - 1;
lBitMask = (1 << lNBits) - 1;
lBitPos = ResetBuf(lBitPos);
goto MainLoop;
}
int pos = lBitPos >> 3;
int code =
(
(
(lData[pos] & 0xFF)
| ((lData[pos + 1] & 0xFF) << 8)
| ((lData[pos + 2] & 0xFF) << 16)
) >> (lBitPos & 0x7)
) & lBitMask;
lBitPos += lNBits;
if (lOldCode == -1)
{
if (code >= 256)
throw new IncompleteArchiveException(
"corrupt input: " + code + " > 255"
);
lFinChar = (byte)(lOldCode = code);
buffer[offset++] = lFinChar;
count--;
continue;
}
if (code == TBL_CLEAR && blockMode)
{
Array.Copy(zeros, 0, lTabPrefix, 0, zeros.Length);
lFreeEnt = TBL_FIRST - 1;
int nBytes = lNBits << 3;
lBitPos = (lBitPos - 1) + nBytes - (lBitPos - 1 + nBytes) % nBytes;
lNBits = LzwConstants.INIT_BITS;
lMaxCode = (1 << lNBits) - 1;
lBitMask = lMaxCode;
lBitPos = ResetBuf(lBitPos);
goto MainLoop;
}
int inCode = code;
lStackP = lStack.Length;
if (code >= lFreeEnt)
{
if (code > lFreeEnt)
{
throw new IncompleteArchiveException(
"corrupt input: code=" + code + ", freeEnt=" + lFreeEnt
);
}
lStack[--lStackP] = lFinChar;
code = lOldCode;
}
while (code >= 256)
{
lStack[--lStackP] = lTabSuffix[code];
code = lTabPrefix[code];
}
lFinChar = lTabSuffix[code];
buffer[offset++] = lFinChar;
count--;
sSize = lStack.Length - lStackP;
int num = (sSize >= count) ? count : sSize;
Array.Copy(lStack, lStackP, buffer, offset, num);
offset += num;
count -= num;
lStackP += num;
if (lFreeEnt < lMaxMaxCode)
{
lTabPrefix[lFreeEnt] = lOldCode;
lTabSuffix[lFreeEnt] = lFinChar;
lFreeEnt++;
}
lOldCode = inCode;
if (count == 0)
{
nBits = lNBits;
maxCode = lMaxCode;
bitMask = lBitMask;
oldCode = lOldCode;
finChar = lFinChar;
stackP = lStackP;
freeEnt = lFreeEnt;
bitPos = lBitPos;
return offset - start;
}
}
lBitPos = ResetBuf(lBitPos);
} while (got > 0);
nBits = lNBits;
maxCode = lMaxCode;
bitMask = lBitMask;
oldCode = lOldCode;
finChar = lFinChar;
stackP = lStackP;
freeEnt = lFreeEnt;
bitPos = lBitPos;
eof = true;
return offset - start;
}
#if !NETFRAMEWORK && !NETSTANDARD2_0
/// <summary>
/// Reads decompressed data asynchronously into the provided buffer
/// </summary>
/// <param name="buffer">The memory to read and decompress data into</param>
/// <param name="cancellationToken">Cancellation token</param>
/// <returns>The number of bytes read. Zero signals the end of stream</returns>
public override async ValueTask<int> ReadAsync(
Memory<byte> buffer,
CancellationToken cancellationToken = default
)
{
if (buffer.IsEmpty)
{
return 0;
}
byte[] array = System.Buffers.ArrayPool<byte>.Shared.Rent(buffer.Length);
try
{
int read = await ReadAsync(array, 0, buffer.Length, cancellationToken)
.ConfigureAwait(false);
array.AsSpan(0, read).CopyTo(buffer.Span);
return read;
}
finally
{
System.Buffers.ArrayPool<byte>.Shared.Return(array);
}
}
#endif
/// <summary>
/// Moves the unread data in the buffer to the beginning and resets
/// the pointers.
@@ -433,6 +671,18 @@ namespace SharpCompress.Compressors.Lzw
}
}
private async Task FillAsync(CancellationToken cancellationToken)
{
cancellationToken.ThrowIfCancellationRequested();
got = await baseInputStream
.ReadAsync(data, end, data.Length - 1 - end, cancellationToken)
.ConfigureAwait(false);
if (got > 0)
{
end += got;
}
}
private void ParseHeader()
{
headerParsed = true;
@@ -494,6 +744,66 @@ namespace SharpCompress.Compressors.Lzw
tabSuffix[idx] = (byte)idx;
}
private async Task ParseHeaderAsync(CancellationToken cancellationToken)
{
headerParsed = true;
byte[] hdr = new byte[LzwConstants.HDR_SIZE];
int result = await baseInputStream
.ReadAsync(hdr, 0, hdr.Length, cancellationToken)
.ConfigureAwait(false);
if (result < 0)
throw new IncompleteArchiveException("Failed to read LZW header");
if (hdr[0] != (LzwConstants.MAGIC >> 8) || hdr[1] != (LzwConstants.MAGIC & 0xff))
{
throw new IncompleteArchiveException(
String.Format(
"Wrong LZW header. Magic bytes don't match. 0x{0:x2} 0x{1:x2}",
hdr[0],
hdr[1]
)
);
}
blockMode = (hdr[2] & LzwConstants.BLOCK_MODE_MASK) > 0;
maxBits = hdr[2] & LzwConstants.BIT_MASK;
if (maxBits > LzwConstants.MAX_BITS)
{
throw new ArchiveException(
"Stream compressed with "
+ maxBits
+ " bits, but decompression can only handle "
+ LzwConstants.MAX_BITS
+ " bits."
);
}
if ((hdr[2] & LzwConstants.RESERVED_MASK) > 0)
{
throw new ArchiveException("Unsupported bits set in the header.");
}
maxMaxCode = 1 << maxBits;
nBits = LzwConstants.INIT_BITS;
maxCode = (1 << nBits) - 1;
bitMask = maxCode;
oldCode = -1;
finChar = 0;
freeEnt = blockMode ? TBL_FIRST : 256;
tabPrefix = new int[1 << maxBits];
tabSuffix = new byte[1 << maxBits];
stack = new byte[1 << maxBits];
stackP = stack.Length;
for (int idx = 255; idx >= 0; idx--)
tabSuffix[idx] = (byte)idx;
}
#region Stream Overrides
/// <summary>

View File

@@ -0,0 +1,174 @@
using System;
using System.Collections.Generic;
using System.IO;
using System.Threading;
using System.Threading.Tasks;
using SharpCompress.Compressors.Lzw;
using Xunit;
namespace SharpCompress.Test.Streams;
public class LzwStreamAsyncTests : TestBase
{
[Fact]
public async Task LzwStream_ReadAsync_ByteArray()
{
var testArchive = Path.Combine(TEST_ARCHIVES_PATH, "Tar.tar.Z");
using var stream = File.OpenRead(testArchive);
using var lzwStream = new LzwStream(stream);
var buffer = new byte[4096];
int bytesRead = await lzwStream.ReadAsync(buffer, 0, buffer.Length).ConfigureAwait(false);
Assert.True(bytesRead > 0, "Should read at least some data");
}
#if !NETFRAMEWORK && !NETSTANDARD2_0
[Fact]
public async Task LzwStream_ReadAsync_Memory()
{
var testArchive = Path.Combine(TEST_ARCHIVES_PATH, "Tar.tar.Z");
using var stream = File.OpenRead(testArchive);
using var lzwStream = new LzwStream(stream);
var buffer = new byte[4096];
int bytesRead = await lzwStream.ReadAsync(new Memory<byte>(buffer)).ConfigureAwait(false);
Assert.True(bytesRead > 0, "Should read at least some data");
}
#endif
[Fact]
public async Task LzwStream_ReadAsync_ProducesSameResultAsSync()
{
var testArchive = Path.Combine(TEST_ARCHIVES_PATH, "Tar.tar.Z");
byte[] syncResult;
byte[] asyncResult;
using (var stream = File.OpenRead(testArchive))
using (var lzwStream = new LzwStream(stream))
{
syncResult = ReadAllSync(lzwStream);
}
using (var stream = File.OpenRead(testArchive))
using (var lzwStream = new LzwStream(stream))
{
asyncResult = await ReadAllAsync(lzwStream).ConfigureAwait(false);
}
Assert.Equal(syncResult, asyncResult);
}
[Fact]
public async Task LzwStream_ReadAsync_MultipleReads()
{
var testArchive = Path.Combine(TEST_ARCHIVES_PATH, "Tar.tar.Z");
using var stream = File.OpenRead(testArchive);
using var lzwStream = new LzwStream(stream);
var totalData = new List<byte>();
var buffer = new byte[1024];
int bytesRead;
while (
(bytesRead = await lzwStream.ReadAsync(buffer, 0, buffer.Length).ConfigureAwait(false))
> 0
)
{
for (int i = 0; i < bytesRead; i++)
{
totalData.Add(buffer[i]);
}
}
Assert.True(totalData.Count > 0, "Should have read some data");
}
[Fact]
public async Task LzwStream_ReadAsync_Cancellation()
{
var testArchive = Path.Combine(TEST_ARCHIVES_PATH, "Tar.tar.Z");
using var stream = File.OpenRead(testArchive);
using var lzwStream = new LzwStream(stream);
var cts = new CancellationTokenSource();
var buffer = new byte[4096];
var readTask = lzwStream.ReadAsync(buffer, 0, buffer.Length, cts.Token);
cts.Cancel();
await Assert.ThrowsAsync<OperationCanceledException>(async () => await readTask);
}
[Fact]
public async Task LzwStream_ReadAsync_EmptyBuffer()
{
var testArchive = Path.Combine(TEST_ARCHIVES_PATH, "Tar.tar.Z");
using var stream = File.OpenRead(testArchive);
using var lzwStream = new LzwStream(stream);
var buffer = new byte[0];
int bytesRead = await lzwStream.ReadAsync(buffer, 0, 0).ConfigureAwait(false);
Assert.Equal(0, bytesRead);
}
[Fact]
public async Task LzwStream_ReadAsync_ReturnsZeroAtEndOfStream()
{
var testArchive = Path.Combine(TEST_ARCHIVES_PATH, "Tar.tar.Z");
using var stream = File.OpenRead(testArchive);
using var lzwStream = new LzwStream(stream);
var buffer = new byte[4096];
int bytesRead;
while (
(bytesRead = await lzwStream.ReadAsync(buffer, 0, buffer.Length).ConfigureAwait(false))
> 0
) { }
Assert.Equal(0, bytesRead);
bytesRead = await lzwStream.ReadAsync(buffer, 0, buffer.Length).ConfigureAwait(false);
Assert.Equal(0, bytesRead);
}
private static async Task<byte[]> ReadAllAsync(LzwStream stream)
{
var result = new List<byte>();
var buffer = new byte[4096];
int bytesRead;
while (
(bytesRead = await stream.ReadAsync(buffer, 0, buffer.Length).ConfigureAwait(false)) > 0
)
{
for (int i = 0; i < bytesRead; i++)
{
result.Add(buffer[i]);
}
}
return result.ToArray();
}
private static byte[] ReadAllSync(LzwStream stream)
{
var result = new List<byte>();
var buffer = new byte[4096];
int bytesRead;
while ((bytesRead = stream.Read(buffer, 0, buffer.Length)) > 0)
{
for (int i = 0; i < bytesRead; i++)
{
result.Add(buffer[i]);
}
}
return result.ToArray();
}
}