diff --git a/src/SharpCompress/Compressors/Lzw/LzwStream.cs b/src/SharpCompress/Compressors/Lzw/LzwStream.cs index 10f19407..74074a66 100644 --- a/src/SharpCompress/Compressors/Lzw/LzwStream.cs +++ b/src/SharpCompress/Compressors/Lzw/LzwStream.cs @@ -410,6 +410,244 @@ namespace SharpCompress.Compressors.Lzw return offset - start; } + /// + /// Reads decompressed data asynchronously into the provided buffer byte array + /// + /// The array to read and decompress data into + /// The offset indicating where the data should be placed + /// The number of bytes to decompress + /// Cancellation token + /// The number of bytes read. Zero signals the end of stream + public override async Task ReadAsync( + byte[] buffer, + int offset, + int count, + CancellationToken cancellationToken + ) + { + if (!headerParsed) + await ParseHeaderAsync(cancellationToken).ConfigureAwait(false); + + if (eof) + return 0; + + int start = offset; + + int[] lTabPrefix = tabPrefix; + byte[] lTabSuffix = tabSuffix; + byte[] lStack = stack; + int lNBits = nBits; + int lMaxCode = maxCode; + int lMaxMaxCode = maxMaxCode; + int lBitMask = bitMask; + int lOldCode = oldCode; + byte lFinChar = finChar; + int lStackP = stackP; + int lFreeEnt = freeEnt; + byte[] lData = data; + int lBitPos = bitPos; + + int sSize = lStack.Length - lStackP; + if (sSize > 0) + { + int num = (sSize >= count) ? count : sSize; + Array.Copy(lStack, lStackP, buffer, offset, num); + offset += num; + count -= num; + lStackP += num; + } + + if (count == 0) + { + stackP = lStackP; + return offset - start; + } + + MainLoop: + do + { + if (end < EXTRA) + { + await FillAsync(cancellationToken).ConfigureAwait(false); + } + + int bitIn = (got > 0) ? (end - end % lNBits) << 3 : (end << 3) - (lNBits - 1); + + while (lBitPos < bitIn) + { + if (count == 0) + { + nBits = lNBits; + maxCode = lMaxCode; + maxMaxCode = lMaxMaxCode; + bitMask = lBitMask; + oldCode = lOldCode; + finChar = lFinChar; + stackP = lStackP; + freeEnt = lFreeEnt; + bitPos = lBitPos; + + return offset - start; + } + + if (lFreeEnt > lMaxCode) + { + int nBytes = lNBits << 3; + lBitPos = (lBitPos - 1) + nBytes - (lBitPos - 1 + nBytes) % nBytes; + + lNBits++; + lMaxCode = (lNBits == maxBits) ? lMaxMaxCode : (1 << lNBits) - 1; + + lBitMask = (1 << lNBits) - 1; + lBitPos = ResetBuf(lBitPos); + goto MainLoop; + } + + int pos = lBitPos >> 3; + int code = + ( + ( + (lData[pos] & 0xFF) + | ((lData[pos + 1] & 0xFF) << 8) + | ((lData[pos + 2] & 0xFF) << 16) + ) >> (lBitPos & 0x7) + ) & lBitMask; + + lBitPos += lNBits; + + if (lOldCode == -1) + { + if (code >= 256) + throw new IncompleteArchiveException( + "corrupt input: " + code + " > 255" + ); + + lFinChar = (byte)(lOldCode = code); + buffer[offset++] = lFinChar; + count--; + continue; + } + + if (code == TBL_CLEAR && blockMode) + { + Array.Copy(zeros, 0, lTabPrefix, 0, zeros.Length); + lFreeEnt = TBL_FIRST - 1; + + int nBytes = lNBits << 3; + lBitPos = (lBitPos - 1) + nBytes - (lBitPos - 1 + nBytes) % nBytes; + lNBits = LzwConstants.INIT_BITS; + lMaxCode = (1 << lNBits) - 1; + lBitMask = lMaxCode; + + lBitPos = ResetBuf(lBitPos); + goto MainLoop; + } + + int inCode = code; + lStackP = lStack.Length; + + if (code >= lFreeEnt) + { + if (code > lFreeEnt) + { + throw new IncompleteArchiveException( + "corrupt input: code=" + code + ", freeEnt=" + lFreeEnt + ); + } + + lStack[--lStackP] = lFinChar; + code = lOldCode; + } + + while (code >= 256) + { + lStack[--lStackP] = lTabSuffix[code]; + code = lTabPrefix[code]; + } + + lFinChar = lTabSuffix[code]; + buffer[offset++] = lFinChar; + count--; + + sSize = lStack.Length - lStackP; + int num = (sSize >= count) ? count : sSize; + Array.Copy(lStack, lStackP, buffer, offset, num); + offset += num; + count -= num; + lStackP += num; + + if (lFreeEnt < lMaxMaxCode) + { + lTabPrefix[lFreeEnt] = lOldCode; + lTabSuffix[lFreeEnt] = lFinChar; + lFreeEnt++; + } + + lOldCode = inCode; + + if (count == 0) + { + nBits = lNBits; + maxCode = lMaxCode; + bitMask = lBitMask; + oldCode = lOldCode; + finChar = lFinChar; + stackP = lStackP; + freeEnt = lFreeEnt; + bitPos = lBitPos; + + return offset - start; + } + } + + lBitPos = ResetBuf(lBitPos); + } while (got > 0); + + nBits = lNBits; + maxCode = lMaxCode; + bitMask = lBitMask; + oldCode = lOldCode; + finChar = lFinChar; + stackP = lStackP; + freeEnt = lFreeEnt; + bitPos = lBitPos; + + eof = true; + return offset - start; + } + +#if !NETFRAMEWORK && !NETSTANDARD2_0 + /// + /// Reads decompressed data asynchronously into the provided buffer + /// + /// The memory to read and decompress data into + /// Cancellation token + /// The number of bytes read. Zero signals the end of stream + public override async ValueTask ReadAsync( + Memory buffer, + CancellationToken cancellationToken = default + ) + { + if (buffer.IsEmpty) + { + return 0; + } + + byte[] array = System.Buffers.ArrayPool.Shared.Rent(buffer.Length); + try + { + int read = await ReadAsync(array, 0, buffer.Length, cancellationToken) + .ConfigureAwait(false); + array.AsSpan(0, read).CopyTo(buffer.Span); + return read; + } + finally + { + System.Buffers.ArrayPool.Shared.Return(array); + } + } +#endif + /// /// Moves the unread data in the buffer to the beginning and resets /// the pointers. @@ -433,6 +671,18 @@ namespace SharpCompress.Compressors.Lzw } } + private async Task FillAsync(CancellationToken cancellationToken) + { + cancellationToken.ThrowIfCancellationRequested(); + got = await baseInputStream + .ReadAsync(data, end, data.Length - 1 - end, cancellationToken) + .ConfigureAwait(false); + if (got > 0) + { + end += got; + } + } + private void ParseHeader() { headerParsed = true; @@ -494,6 +744,66 @@ namespace SharpCompress.Compressors.Lzw tabSuffix[idx] = (byte)idx; } + private async Task ParseHeaderAsync(CancellationToken cancellationToken) + { + headerParsed = true; + + byte[] hdr = new byte[LzwConstants.HDR_SIZE]; + + int result = await baseInputStream + .ReadAsync(hdr, 0, hdr.Length, cancellationToken) + .ConfigureAwait(false); + + if (result < 0) + throw new IncompleteArchiveException("Failed to read LZW header"); + + if (hdr[0] != (LzwConstants.MAGIC >> 8) || hdr[1] != (LzwConstants.MAGIC & 0xff)) + { + throw new IncompleteArchiveException( + String.Format( + "Wrong LZW header. Magic bytes don't match. 0x{0:x2} 0x{1:x2}", + hdr[0], + hdr[1] + ) + ); + } + + blockMode = (hdr[2] & LzwConstants.BLOCK_MODE_MASK) > 0; + maxBits = hdr[2] & LzwConstants.BIT_MASK; + + if (maxBits > LzwConstants.MAX_BITS) + { + throw new ArchiveException( + "Stream compressed with " + + maxBits + + " bits, but decompression can only handle " + + LzwConstants.MAX_BITS + + " bits." + ); + } + + if ((hdr[2] & LzwConstants.RESERVED_MASK) > 0) + { + throw new ArchiveException("Unsupported bits set in the header."); + } + + maxMaxCode = 1 << maxBits; + nBits = LzwConstants.INIT_BITS; + maxCode = (1 << nBits) - 1; + bitMask = maxCode; + oldCode = -1; + finChar = 0; + freeEnt = blockMode ? TBL_FIRST : 256; + + tabPrefix = new int[1 << maxBits]; + tabSuffix = new byte[1 << maxBits]; + stack = new byte[1 << maxBits]; + stackP = stack.Length; + + for (int idx = 255; idx >= 0; idx--) + tabSuffix[idx] = (byte)idx; + } + #region Stream Overrides /// diff --git a/tests/SharpCompress.Test/Streams/LzwStreamAsyncTests.cs b/tests/SharpCompress.Test/Streams/LzwStreamAsyncTests.cs new file mode 100644 index 00000000..6a7f7119 --- /dev/null +++ b/tests/SharpCompress.Test/Streams/LzwStreamAsyncTests.cs @@ -0,0 +1,174 @@ +using System; +using System.Collections.Generic; +using System.IO; +using System.Threading; +using System.Threading.Tasks; +using SharpCompress.Compressors.Lzw; +using Xunit; + +namespace SharpCompress.Test.Streams; + +public class LzwStreamAsyncTests : TestBase +{ + [Fact] + public async Task LzwStream_ReadAsync_ByteArray() + { + var testArchive = Path.Combine(TEST_ARCHIVES_PATH, "Tar.tar.Z"); + using var stream = File.OpenRead(testArchive); + using var lzwStream = new LzwStream(stream); + + var buffer = new byte[4096]; + int bytesRead = await lzwStream.ReadAsync(buffer, 0, buffer.Length).ConfigureAwait(false); + + Assert.True(bytesRead > 0, "Should read at least some data"); + } + +#if !NETFRAMEWORK && !NETSTANDARD2_0 + [Fact] + public async Task LzwStream_ReadAsync_Memory() + { + var testArchive = Path.Combine(TEST_ARCHIVES_PATH, "Tar.tar.Z"); + using var stream = File.OpenRead(testArchive); + using var lzwStream = new LzwStream(stream); + + var buffer = new byte[4096]; + int bytesRead = await lzwStream.ReadAsync(new Memory(buffer)).ConfigureAwait(false); + + Assert.True(bytesRead > 0, "Should read at least some data"); + } +#endif + + [Fact] + public async Task LzwStream_ReadAsync_ProducesSameResultAsSync() + { + var testArchive = Path.Combine(TEST_ARCHIVES_PATH, "Tar.tar.Z"); + + byte[] syncResult; + byte[] asyncResult; + + using (var stream = File.OpenRead(testArchive)) + using (var lzwStream = new LzwStream(stream)) + { + syncResult = ReadAllSync(lzwStream); + } + + using (var stream = File.OpenRead(testArchive)) + using (var lzwStream = new LzwStream(stream)) + { + asyncResult = await ReadAllAsync(lzwStream).ConfigureAwait(false); + } + + Assert.Equal(syncResult, asyncResult); + } + + [Fact] + public async Task LzwStream_ReadAsync_MultipleReads() + { + var testArchive = Path.Combine(TEST_ARCHIVES_PATH, "Tar.tar.Z"); + using var stream = File.OpenRead(testArchive); + using var lzwStream = new LzwStream(stream); + + var totalData = new List(); + var buffer = new byte[1024]; + int bytesRead; + + while ( + (bytesRead = await lzwStream.ReadAsync(buffer, 0, buffer.Length).ConfigureAwait(false)) + > 0 + ) + { + for (int i = 0; i < bytesRead; i++) + { + totalData.Add(buffer[i]); + } + } + + Assert.True(totalData.Count > 0, "Should have read some data"); + } + + [Fact] + public async Task LzwStream_ReadAsync_Cancellation() + { + var testArchive = Path.Combine(TEST_ARCHIVES_PATH, "Tar.tar.Z"); + using var stream = File.OpenRead(testArchive); + using var lzwStream = new LzwStream(stream); + + var cts = new CancellationTokenSource(); + var buffer = new byte[4096]; + + var readTask = lzwStream.ReadAsync(buffer, 0, buffer.Length, cts.Token); + cts.Cancel(); + + await Assert.ThrowsAsync(async () => await readTask); + } + + [Fact] + public async Task LzwStream_ReadAsync_EmptyBuffer() + { + var testArchive = Path.Combine(TEST_ARCHIVES_PATH, "Tar.tar.Z"); + using var stream = File.OpenRead(testArchive); + using var lzwStream = new LzwStream(stream); + + var buffer = new byte[0]; + int bytesRead = await lzwStream.ReadAsync(buffer, 0, 0).ConfigureAwait(false); + + Assert.Equal(0, bytesRead); + } + + [Fact] + public async Task LzwStream_ReadAsync_ReturnsZeroAtEndOfStream() + { + var testArchive = Path.Combine(TEST_ARCHIVES_PATH, "Tar.tar.Z"); + using var stream = File.OpenRead(testArchive); + using var lzwStream = new LzwStream(stream); + + var buffer = new byte[4096]; + + int bytesRead; + while ( + (bytesRead = await lzwStream.ReadAsync(buffer, 0, buffer.Length).ConfigureAwait(false)) + > 0 + ) { } + + Assert.Equal(0, bytesRead); + + bytesRead = await lzwStream.ReadAsync(buffer, 0, buffer.Length).ConfigureAwait(false); + Assert.Equal(0, bytesRead); + } + + private static async Task ReadAllAsync(LzwStream stream) + { + var result = new List(); + var buffer = new byte[4096]; + int bytesRead; + + while ( + (bytesRead = await stream.ReadAsync(buffer, 0, buffer.Length).ConfigureAwait(false)) > 0 + ) + { + for (int i = 0; i < bytesRead; i++) + { + result.Add(buffer[i]); + } + } + + return result.ToArray(); + } + + private static byte[] ReadAllSync(LzwStream stream) + { + var result = new List(); + var buffer = new byte[4096]; + int bytesRead; + + while ((bytesRead = stream.Read(buffer, 0, buffer.Length)) > 0) + { + for (int i = 0; i < bytesRead; i++) + { + result.Add(buffer[i]); + } + } + + return result.ToArray(); + } +}