From 39d986a62ce434a6db80b85157f0aabde0bf29d0 Mon Sep 17 00:00:00 2001 From: Adam Hathcock Date: Sat, 3 Jan 2026 14:49:30 +0000 Subject: [PATCH] async loading for rar is better --- src/SharpCompress/Archives/Rar/RarArchive.cs | 4 +- .../Archives/SevenZip/SevenZipArchive.cs | 25 +++ .../Common/Rar/Headers/MarkHeader.cs | 4 +- .../Common/Rar/Headers/RarHeader.cs | 31 ++- .../Common/Rar/Headers/RarHeaderFactory.cs | 17 +- .../Common/Rar/RarCrcBinaryReader.cs | 11 +- .../Common/Rar/RarCryptoBinaryReader.cs | 14 +- src/SharpCompress/Common/Rar/RarVolume.cs | 7 +- .../Common/Zip/StreamingZipHeaderFactory.cs | 193 ++++++++++++++++++ src/SharpCompress/IO/MarkingBinaryReader.cs | 76 +++++-- src/SharpCompress/IO/SharpCompressStream.cs | 33 +-- src/SharpCompress/Readers/AbstractReader.cs | 27 ++- src/SharpCompress/Readers/Arc/ArcReader.cs | 16 ++ src/SharpCompress/Readers/Arj/ArjReader.cs | 42 ++++ .../Readers/Arj/MultiVolumeArjReader.cs | 1 + .../Readers/Arj/SingleVolumeArjReader.cs | 38 ++-- src/SharpCompress/Readers/GZip/GZipReader.cs | 14 ++ src/SharpCompress/Readers/Rar/RarReader.cs | 34 +-- src/SharpCompress/Readers/Tar/TarReader.cs | 21 ++ src/SharpCompress/Readers/Zip/ZipReader.cs | 37 ++++ .../Rar/RarReaderAsyncTests.cs | 30 ++- 21 files changed, 541 insertions(+), 134 deletions(-) diff --git a/src/SharpCompress/Archives/Rar/RarArchive.cs b/src/SharpCompress/Archives/Rar/RarArchive.cs index b2e69588..54ac8fee 100644 --- a/src/SharpCompress/Archives/Rar/RarArchive.cs +++ b/src/SharpCompress/Archives/Rar/RarArchive.cs @@ -280,7 +280,9 @@ public class RarArchive : AbstractArchive { try { - await MarkHeader.ReadAsync(stream, true, false, cancellationToken).ConfigureAwait(false); + await MarkHeader + .ReadAsync(stream, true, false, cancellationToken) + .ConfigureAwait(false); return true; } catch diff --git a/src/SharpCompress/Archives/SevenZip/SevenZipArchive.cs b/src/SharpCompress/Archives/SevenZip/SevenZipArchive.cs index a3041c7d..d046d41d 100644 --- a/src/SharpCompress/Archives/SevenZip/SevenZipArchive.cs +++ b/src/SharpCompress/Archives/SevenZip/SevenZipArchive.cs @@ -305,6 +305,31 @@ public class SevenZipArchive : AbstractArchive GetEntriesAsync( + Stream stream, + [System.Runtime.CompilerServices.EnumeratorCancellation] + CancellationToken cancellationToken = default + ) + { + var entries = _archive.Entries.ToList(); + stream.Position = 0; + foreach (var dir in entries.Where(x => x.IsDirectory)) + { + cancellationToken.ThrowIfCancellationRequested(); + _currentEntry = dir; + yield return dir; + } + // For non-directory entries, yield them without creating shared streams + // Each call to GetEntryStream() will create a fresh decompression stream + // to avoid state corruption issues with async operations + foreach (var entry in entries.Where(x => !x.IsDirectory)) + { + cancellationToken.ThrowIfCancellationRequested(); + _currentEntry = entry; + yield return entry; + } + } + protected override EntryStream GetEntryStream() { // Create a fresh decompression stream for each file (no state sharing). diff --git a/src/SharpCompress/Common/Rar/Headers/MarkHeader.cs b/src/SharpCompress/Common/Rar/Headers/MarkHeader.cs index b49abcf8..86b2b685 100644 --- a/src/SharpCompress/Common/Rar/Headers/MarkHeader.cs +++ b/src/SharpCompress/Common/Rar/Headers/MarkHeader.cs @@ -138,7 +138,9 @@ internal class MarkHeader : IRarHeader ) { var buffer = new byte[1]; - var bytesRead = await stream.ReadAsync(buffer, 0, 1, cancellationToken).ConfigureAwait(false); + var bytesRead = await stream + .ReadAsync(buffer, 0, 1, cancellationToken) + .ConfigureAwait(false); if (bytesRead == 1) { return buffer[0]; diff --git a/src/SharpCompress/Common/Rar/Headers/RarHeader.cs b/src/SharpCompress/Common/Rar/Headers/RarHeader.cs index c6fd3823..a0587cb8 100644 --- a/src/SharpCompress/Common/Rar/Headers/RarHeader.cs +++ b/src/SharpCompress/Common/Rar/Headers/RarHeader.cs @@ -37,7 +37,8 @@ internal class RarHeader : IRarHeader { try { - return await CreateAsync(reader, isRar5, archiveEncoding, cancellationToken).ConfigureAwait(false); + return await CreateAsync(reader, isRar5, archiveEncoding, cancellationToken) + .ConfigureAwait(false); } catch (InvalidFormatException) { @@ -53,7 +54,9 @@ internal class RarHeader : IRarHeader ) { var header = new RarHeader(); - await header.InitializeAsync(reader, isRar5, archiveEncoding, cancellationToken).ConfigureAwait(false); + await header + .InitializeAsync(reader, isRar5, archiveEncoding, cancellationToken) + .ConfigureAwait(false); return header; } @@ -72,23 +75,31 @@ internal class RarHeader : IRarHeader { _isRar5 = isRar5; ArchiveEncoding = archiveEncoding; - + if (IsRar5) { HeaderCrc = await reader.ReadUInt32Async(cancellationToken).ConfigureAwait(false); reader.ResetCrc(); - HeaderSize = (int)await reader.ReadRarVIntUInt32Async(3, cancellationToken).ConfigureAwait(false); + HeaderSize = (int) + await reader.ReadRarVIntUInt32Async(3, cancellationToken).ConfigureAwait(false); reader.Mark(); - HeaderCode = await reader.ReadRarVIntByteAsync(2, cancellationToken).ConfigureAwait(false); - HeaderFlags = await reader.ReadRarVIntUInt16Async(2, cancellationToken).ConfigureAwait(false); + HeaderCode = await reader + .ReadRarVIntByteAsync(2, cancellationToken) + .ConfigureAwait(false); + HeaderFlags = await reader + .ReadRarVIntUInt16Async(2, cancellationToken) + .ConfigureAwait(false); if (HasHeaderFlag(HeaderFlagsV5.HAS_EXTRA)) { - ExtraSize = await reader.ReadRarVIntUInt32Async(5, cancellationToken).ConfigureAwait(false); + ExtraSize = await reader + .ReadRarVIntUInt32Async(5, cancellationToken) + .ConfigureAwait(false); } if (HasHeaderFlag(HeaderFlagsV5.HAS_DATA)) { - AdditionalDataSize = (long)await reader.ReadRarVIntAsync(10, cancellationToken).ConfigureAwait(false); + AdditionalDataSize = (long) + await reader.ReadRarVIntAsync(10, cancellationToken).ConfigureAwait(false); } } else @@ -101,7 +112,9 @@ internal class RarHeader : IRarHeader HeaderSize = await reader.ReadInt16Async(cancellationToken).ConfigureAwait(false); if (HasHeaderFlag(HeaderFlagsV4.HAS_DATA)) { - AdditionalDataSize = await reader.ReadUInt32Async(cancellationToken).ConfigureAwait(false); + AdditionalDataSize = await reader + .ReadUInt32Async(cancellationToken) + .ConfigureAwait(false); } } } diff --git a/src/SharpCompress/Common/Rar/Headers/RarHeaderFactory.cs b/src/SharpCompress/Common/Rar/Headers/RarHeaderFactory.cs index 949891c8..b95996da 100644 --- a/src/SharpCompress/Common/Rar/Headers/RarHeaderFactory.cs +++ b/src/SharpCompress/Common/Rar/Headers/RarHeaderFactory.cs @@ -44,7 +44,8 @@ public class RarHeaderFactory public async IAsyncEnumerable ReadHeadersAsync( Stream stream, - [System.Runtime.CompilerServices.EnumeratorCancellation] CancellationToken cancellationToken = default + [System.Runtime.CompilerServices.EnumeratorCancellation] + CancellationToken cancellationToken = default ) { var markHeader = await MarkHeader @@ -54,7 +55,10 @@ public class RarHeaderFactory yield return markHeader; RarHeader? header; - while ((header = await TryReadNextHeaderAsync(stream, cancellationToken).ConfigureAwait(false)) != null) + while ( + (header = await TryReadNextHeaderAsync(stream, cancellationToken).ConfigureAwait(false)) + != null + ) { yield return header; if (header.HeaderType == HeaderType.EndArchive) @@ -224,7 +228,10 @@ public class RarHeaderFactory } } - private async Task TryReadNextHeaderAsync(Stream stream, CancellationToken cancellationToken = default) + private async Task TryReadNextHeaderAsync( + Stream stream, + CancellationToken cancellationToken = default + ) { RarCrcBinaryReader reader; if (!IsEncrypted) @@ -254,7 +261,9 @@ public class RarHeaderFactory } } - var header = await RarHeader.TryReadBaseAsync(reader, _isRar5, Options.ArchiveEncoding, cancellationToken).ConfigureAwait(false); + var header = await RarHeader + .TryReadBaseAsync(reader, _isRar5, Options.ArchiveEncoding, cancellationToken) + .ConfigureAwait(false); if (header is null) { return null; diff --git a/src/SharpCompress/Common/Rar/RarCrcBinaryReader.cs b/src/SharpCompress/Common/Rar/RarCrcBinaryReader.cs index f47cc301..cd2a1c58 100644 --- a/src/SharpCompress/Common/Rar/RarCrcBinaryReader.cs +++ b/src/SharpCompress/Common/Rar/RarCrcBinaryReader.cs @@ -43,13 +43,18 @@ internal class RarCrcBinaryReader : MarkingBinaryReader return b; } - public override async Task ReadBytesAsync(int count, CancellationToken cancellationToken = default) + public override async Task ReadBytesAsync( + int count, + CancellationToken cancellationToken = default + ) { var result = await base.ReadBytesAsync(count, cancellationToken).ConfigureAwait(false); _currentCrc = RarCRC.CheckCrc(_currentCrc, result, 0, result.Length); return result; } - public async Task ReadBytesNoCrcAsync(int count, CancellationToken cancellationToken = default) => - await base.ReadBytesAsync(count, cancellationToken).ConfigureAwait(false); + public async Task ReadBytesNoCrcAsync( + int count, + CancellationToken cancellationToken = default + ) => await base.ReadBytesAsync(count, cancellationToken).ConfigureAwait(false); } diff --git a/src/SharpCompress/Common/Rar/RarCryptoBinaryReader.cs b/src/SharpCompress/Common/Rar/RarCryptoBinaryReader.cs index c0178134..97ae3049 100644 --- a/src/SharpCompress/Common/Rar/RarCryptoBinaryReader.cs +++ b/src/SharpCompress/Common/Rar/RarCryptoBinaryReader.cs @@ -88,10 +88,15 @@ internal sealed class RarCryptoBinaryReader : RarCrcBinaryReader public override async Task ReadByteAsync(CancellationToken cancellationToken = default) => (await ReadAndDecryptBytesAsync(1, cancellationToken).ConfigureAwait(false))[0]; - public override async Task ReadBytesAsync(int count, CancellationToken cancellationToken = default) => - await ReadAndDecryptBytesAsync(count, cancellationToken).ConfigureAwait(false); + public override async Task ReadBytesAsync( + int count, + CancellationToken cancellationToken = default + ) => await ReadAndDecryptBytesAsync(count, cancellationToken).ConfigureAwait(false); - private async Task ReadAndDecryptBytesAsync(int count, CancellationToken cancellationToken) + private async Task ReadAndDecryptBytesAsync( + int count, + CancellationToken cancellationToken + ) { var queueSize = _data.Count; var sizeToRead = count - queueSize; @@ -101,7 +106,8 @@ internal sealed class RarCryptoBinaryReader : RarCrcBinaryReader var alignedSize = sizeToRead + ((~sizeToRead + 1) & 0xf); for (var i = 0; i < alignedSize / 16; i++) { - var cipherText = await ReadBytesNoCrcAsync(16, cancellationToken).ConfigureAwait(false); + var cipherText = await ReadBytesNoCrcAsync(16, cancellationToken) + .ConfigureAwait(false); var readBytes = _rijndael.ProcessBlock(cipherText); foreach (var readByte in readBytes) { diff --git a/src/SharpCompress/Common/Rar/RarVolume.cs b/src/SharpCompress/Common/Rar/RarVolume.cs index 101848f6..af81af76 100644 --- a/src/SharpCompress/Common/Rar/RarVolume.cs +++ b/src/SharpCompress/Common/Rar/RarVolume.cs @@ -82,12 +82,15 @@ public abstract class RarVolume : Volume } internal async IAsyncEnumerable GetVolumeFilePartsAsync( - [System.Runtime.CompilerServices.EnumeratorCancellation] CancellationToken cancellationToken = default + [System.Runtime.CompilerServices.EnumeratorCancellation] + CancellationToken cancellationToken = default ) { MarkHeader? lastMarkHeader = null; await foreach ( - var header in _headerFactory.ReadHeadersAsync(Stream, cancellationToken).ConfigureAwait(false) + var header in _headerFactory + .ReadHeadersAsync(Stream, cancellationToken) + .ConfigureAwait(false) ) { switch (header.HeaderType) diff --git a/src/SharpCompress/Common/Zip/StreamingZipHeaderFactory.cs b/src/SharpCompress/Common/Zip/StreamingZipHeaderFactory.cs index 031287ed..475116f7 100644 --- a/src/SharpCompress/Common/Zip/StreamingZipHeaderFactory.cs +++ b/src/SharpCompress/Common/Zip/StreamingZipHeaderFactory.cs @@ -2,6 +2,7 @@ using System; using System.Collections.Generic; using System.IO; using System.Linq; +using System.Threading; using SharpCompress.Common.Zip.Headers; using SharpCompress.IO; @@ -200,4 +201,196 @@ internal class StreamingZipHeaderFactory : ZipHeaderFactory yield return header; } } + + internal async IAsyncEnumerable ReadStreamHeaderAsync( + Stream stream, + [System.Runtime.CompilerServices.EnumeratorCancellation] + CancellationToken cancellationToken = default + ) + { + if (stream is not SharpCompressStream) //ensure the stream is already a SharpCompressStream. So the buffer/size will already be set + { + //the original code wrapped this with RewindableStream. Wrap with SharpCompressStream as we can get the buffer size + if (stream is SourceStream src) + { + stream = new SharpCompressStream( + stream, + src.ReaderOptions.LeaveStreamOpen, + bufferSize: src.ReaderOptions.BufferSize + ); + } + else + { + throw new ArgumentException("Stream must be a SharpCompressStream", nameof(stream)); + } + } + var rewindableStream = (SharpCompressStream)stream; + + while (true) + { + cancellationToken.ThrowIfCancellationRequested(); + var reader = new AsyncBinaryReader(rewindableStream, leaveOpen: true); + uint headerBytes = 0; + if ( + _lastEntryHeader != null + && FlagUtility.HasFlag(_lastEntryHeader.Flags, HeaderFlags.UsePostDataDescriptor) + ) + { + if (_lastEntryHeader.Part is null) + { + continue; + } + + // removed requirement for FixStreamedFileLocation() + + var pos = rewindableStream.CanSeek ? (long?)rewindableStream.Position : null; + + var crc = await reader.ReadUInt32Async().ConfigureAwait(false); + if (crc == POST_DATA_DESCRIPTOR) + { + crc = await reader.ReadUInt32Async().ConfigureAwait(false); + } + _lastEntryHeader.Crc = crc; + + //attempt 32bit read + ulong compSize = await reader.ReadUInt32Async().ConfigureAwait(false); + ulong uncompSize = await reader.ReadUInt32Async().ConfigureAwait(false); + headerBytes = await reader.ReadUInt32Async().ConfigureAwait(false); + + //check for zip64 sentinel or unexpected header + bool isSentinel = compSize == 0xFFFFFFFF || uncompSize == 0xFFFFFFFF; + bool isHeader = headerBytes == 0x04034b50 || headerBytes == 0x02014b50; + + if (!isHeader && !isSentinel) + { + //reshuffle into 64-bit values + compSize = (uncompSize << 32) | compSize; + uncompSize = + ((ulong)headerBytes << 32) + | await reader.ReadUInt32Async().ConfigureAwait(false); + headerBytes = await reader.ReadUInt32Async().ConfigureAwait(false); + } + else if (isSentinel) + { + //standards-compliant zip64 descriptor + compSize = await reader.ReadUInt64Async().ConfigureAwait(false); + uncompSize = await reader.ReadUInt64Async().ConfigureAwait(false); + } + + _lastEntryHeader.CompressedSize = (long)compSize; + _lastEntryHeader.UncompressedSize = (long)uncompSize; + + if (pos.HasValue) + { + _lastEntryHeader.DataStartPosition = pos - _lastEntryHeader.CompressedSize; + } + } + else if (_lastEntryHeader != null && _lastEntryHeader.IsZip64) + { + if (_lastEntryHeader.Part is null) + continue; + + //reader = ((StreamingZipFilePart)_lastEntryHeader.Part).FixStreamedFileLocation( + // ref rewindableStream + //); + + var pos = rewindableStream.CanSeek ? (long?)rewindableStream.Position : null; + + headerBytes = await reader.ReadUInt32Async().ConfigureAwait(false); + + var version = await reader.ReadUInt16Async().ConfigureAwait(false); + var flags = (HeaderFlags)await reader.ReadUInt16Async().ConfigureAwait(false); + var compressionMethod = (ZipCompressionMethod) + await reader.ReadUInt16Async().ConfigureAwait(false); + var lastModifiedDate = await reader.ReadUInt16Async().ConfigureAwait(false); + var lastModifiedTime = await reader.ReadUInt16Async().ConfigureAwait(false); + + var crc = await reader.ReadUInt32Async().ConfigureAwait(false); + + if (crc == POST_DATA_DESCRIPTOR) + { + crc = await reader.ReadUInt32Async().ConfigureAwait(false); + } + _lastEntryHeader.Crc = crc; + + // The DataDescriptor can be either 64bit or 32bit + var compressed_size = await reader.ReadUInt32Async().ConfigureAwait(false); + var uncompressed_size = await reader.ReadUInt32Async().ConfigureAwait(false); + + // Check if we have header or 64bit DataDescriptor + var test_header = !(headerBytes == 0x04034b50 || headerBytes == 0x02014b50); + + var test_64bit = ((long)uncompressed_size << 32) | compressed_size; + if (test_64bit == _lastEntryHeader.CompressedSize && test_header) + { + _lastEntryHeader.UncompressedSize = + ((long)await reader.ReadUInt32Async().ConfigureAwait(false) << 32) + | headerBytes; + headerBytes = await reader.ReadUInt32Async().ConfigureAwait(false); + } + else + { + _lastEntryHeader.UncompressedSize = uncompressed_size; + } + + if (pos.HasValue) + { + _lastEntryHeader.DataStartPosition = pos - _lastEntryHeader.CompressedSize; + + // 4 = First 4 bytes of the entry header (i.e. 50 4B 03 04) + rewindableStream.Position = pos.Value + 4; + } + } + else + { + headerBytes = await reader.ReadUInt32Async().ConfigureAwait(false); + } + + _lastEntryHeader = null; + var header = await ReadHeader(headerBytes, reader).ConfigureAwait(false); + if (header is null) + { + yield break; + } + + //entry could be zero bytes so we need to know that. + if (header.ZipHeaderType == ZipHeaderType.LocalEntry) + { + var local_header = ((LocalEntryHeader)header); + var dir_header = _entries?.FirstOrDefault(entry => + entry.Key == local_header.Name + && local_header.CompressedSize == 0 + && local_header.UncompressedSize == 0 + && local_header.Crc == 0 + && local_header.IsDirectory == false + ); + + if (dir_header != null) + { + local_header.UncompressedSize = dir_header.Size; + local_header.CompressedSize = dir_header.CompressedSize; + local_header.Crc = (uint)dir_header.Crc; + } + + // If we have CompressedSize, there is data to be read + if (local_header.CompressedSize > 0) + { + header.HasData = true; + } // Check if zip is streaming ( Length is 0 and is declared in PostDataDescriptor ) + else if (local_header.Flags.HasFlag(HeaderFlags.UsePostDataDescriptor)) + { + var nextHeaderBytes = await reader.ReadUInt32Async().ConfigureAwait(false); + ((IStreamStack)rewindableStream).Rewind(sizeof(uint)); + + // Check if next data is PostDataDescriptor, streamed file with 0 length + header.HasData = !IsHeader(nextHeaderBytes); + } + else // We are not streaming and compressed size is 0, we have no data + { + header.HasData = false; + } + } + yield return header; + } + } } diff --git a/src/SharpCompress/IO/MarkingBinaryReader.cs b/src/SharpCompress/IO/MarkingBinaryReader.cs index cb03139b..cd5491d2 100644 --- a/src/SharpCompress/IO/MarkingBinaryReader.cs +++ b/src/SharpCompress/IO/MarkingBinaryReader.cs @@ -163,7 +163,9 @@ internal class MarkingBinaryReader : BinaryReader { CurrentReadByteCount++; var buffer = new byte[1]; - var bytesRead = await BaseStream.ReadAsync(buffer, 0, 1, cancellationToken).ConfigureAwait(false); + var bytesRead = await BaseStream + .ReadAsync(buffer, 0, 1, cancellationToken) + .ConfigureAwait(false); if (bytesRead != 1) { throw new EndOfStreamException(); @@ -171,14 +173,19 @@ internal class MarkingBinaryReader : BinaryReader return buffer[0]; } - public virtual async Task ReadBytesAsync(int count, CancellationToken cancellationToken = default) + public virtual async Task ReadBytesAsync( + int count, + CancellationToken cancellationToken = default + ) { CurrentReadByteCount += count; var bytes = new byte[count]; var totalRead = 0; while (totalRead < count) { - var bytesRead = await BaseStream.ReadAsync(bytes, totalRead, count - totalRead, cancellationToken).ConfigureAwait(false); + var bytesRead = await BaseStream + .ReadAsync(bytes, totalRead, count - totalRead, cancellationToken) + .ConfigureAwait(false); if (bytesRead == 0) { throw new InvalidFormatException( @@ -198,28 +205,42 @@ internal class MarkingBinaryReader : BinaryReader await ReadByteAsync(cancellationToken).ConfigureAwait(false) != 0; public async Task ReadInt16Async(CancellationToken cancellationToken = default) => - BinaryPrimitives.ReadInt16LittleEndian(await ReadBytesAsync(2, cancellationToken).ConfigureAwait(false)); + BinaryPrimitives.ReadInt16LittleEndian( + await ReadBytesAsync(2, cancellationToken).ConfigureAwait(false) + ); public async Task ReadInt32Async(CancellationToken cancellationToken = default) => - BinaryPrimitives.ReadInt32LittleEndian(await ReadBytesAsync(4, cancellationToken).ConfigureAwait(false)); + BinaryPrimitives.ReadInt32LittleEndian( + await ReadBytesAsync(4, cancellationToken).ConfigureAwait(false) + ); public async Task ReadInt64Async(CancellationToken cancellationToken = default) => - BinaryPrimitives.ReadInt64LittleEndian(await ReadBytesAsync(8, cancellationToken).ConfigureAwait(false)); + BinaryPrimitives.ReadInt64LittleEndian( + await ReadBytesAsync(8, cancellationToken).ConfigureAwait(false) + ); public async Task ReadSByteAsync(CancellationToken cancellationToken = default) => (sbyte)await ReadByteAsync(cancellationToken).ConfigureAwait(false); public async Task ReadUInt16Async(CancellationToken cancellationToken = default) => - BinaryPrimitives.ReadUInt16LittleEndian(await ReadBytesAsync(2, cancellationToken).ConfigureAwait(false)); + BinaryPrimitives.ReadUInt16LittleEndian( + await ReadBytesAsync(2, cancellationToken).ConfigureAwait(false) + ); public async Task ReadUInt32Async(CancellationToken cancellationToken = default) => - BinaryPrimitives.ReadUInt32LittleEndian(await ReadBytesAsync(4, cancellationToken).ConfigureAwait(false)); + BinaryPrimitives.ReadUInt32LittleEndian( + await ReadBytesAsync(4, cancellationToken).ConfigureAwait(false) + ); public async Task ReadUInt64Async(CancellationToken cancellationToken = default) => - BinaryPrimitives.ReadUInt64LittleEndian(await ReadBytesAsync(8, cancellationToken).ConfigureAwait(false)); + BinaryPrimitives.ReadUInt64LittleEndian( + await ReadBytesAsync(8, cancellationToken).ConfigureAwait(false) + ); - public Task ReadRarVIntAsync(int maxBytes = 10, CancellationToken cancellationToken = default) => - DoReadRarVIntAsync((maxBytes - 1) * 7, cancellationToken); + public Task ReadRarVIntAsync( + int maxBytes = 10, + CancellationToken cancellationToken = default + ) => DoReadRarVIntAsync((maxBytes - 1) * 7, cancellationToken); private async Task DoReadRarVIntAsync(int maxShift, CancellationToken cancellationToken) { @@ -247,16 +268,35 @@ internal class MarkingBinaryReader : BinaryReader throw new FormatException("malformed vint"); } - public Task ReadRarVIntUInt32Async(int maxBytes = 5, CancellationToken cancellationToken = default) => - DoReadRarVIntUInt32Async((maxBytes - 1) * 7, cancellationToken); + public Task ReadRarVIntUInt32Async( + int maxBytes = 5, + CancellationToken cancellationToken = default + ) => DoReadRarVIntUInt32Async((maxBytes - 1) * 7, cancellationToken); - public async Task ReadRarVIntUInt16Async(int maxBytes = 3, CancellationToken cancellationToken = default) => - checked((ushort)await DoReadRarVIntUInt32Async((maxBytes - 1) * 7, cancellationToken).ConfigureAwait(false)); + public async Task ReadRarVIntUInt16Async( + int maxBytes = 3, + CancellationToken cancellationToken = default + ) => + checked( + (ushort) + await DoReadRarVIntUInt32Async((maxBytes - 1) * 7, cancellationToken) + .ConfigureAwait(false) + ); - public async Task ReadRarVIntByteAsync(int maxBytes = 2, CancellationToken cancellationToken = default) => - checked((byte)await DoReadRarVIntUInt32Async((maxBytes - 1) * 7, cancellationToken).ConfigureAwait(false)); + public async Task ReadRarVIntByteAsync( + int maxBytes = 2, + CancellationToken cancellationToken = default + ) => + checked( + (byte) + await DoReadRarVIntUInt32Async((maxBytes - 1) * 7, cancellationToken) + .ConfigureAwait(false) + ); - private async Task DoReadRarVIntUInt32Async(int maxShift, CancellationToken cancellationToken) + private async Task DoReadRarVIntUInt32Async( + int maxShift, + CancellationToken cancellationToken + ) { var shift = 0; uint result = 0; diff --git a/src/SharpCompress/IO/SharpCompressStream.cs b/src/SharpCompress/IO/SharpCompressStream.cs index 8268fdaf..f068caab 100644 --- a/src/SharpCompress/IO/SharpCompressStream.cs +++ b/src/SharpCompress/IO/SharpCompressStream.cs @@ -211,16 +211,7 @@ public class SharpCompressStream : Stream, IStreamStack // Fill buffer if needed if (_bufferedLength == 0) { - // Try async read first if underlying stream only supports async - try - { - _bufferedLength = Stream.Read(_buffer!, 0, _bufferSize); - } - catch (NotSupportedException) - { - // If synchronous read is not supported, try async - _bufferedLength = Stream.ReadAsync(_buffer!, 0, _bufferSize).GetAwaiter().GetResult(); - } + _bufferedLength = Stream.Read(_buffer!, 0, _bufferSize); _bufferPosition = 0; } int available = _bufferedLength - _bufferPosition; @@ -233,16 +224,7 @@ public class SharpCompressStream : Stream, IStreamStack return toRead; } // If buffer exhausted, refill - int r; - try - { - r = Stream.Read(_buffer!, 0, _bufferSize); - } - catch (NotSupportedException) - { - // If synchronous read is not supported, try async - r = Stream.ReadAsync(_buffer!, 0, _bufferSize).GetAwaiter().GetResult(); - } + int r = Stream.Read(_buffer!, 0, _bufferSize); if (r == 0) return 0; _bufferedLength = r; @@ -263,16 +245,7 @@ public class SharpCompressStream : Stream, IStreamStack { return 0; } - int read; - try - { - read = Stream.Read(buffer, offset, count); - } - catch (NotSupportedException) - { - // If synchronous read is not supported, try async - read = Stream.ReadAsync(buffer, offset, count).GetAwaiter().GetResult(); - } + int read = Stream.Read(buffer, offset, count); _internalPosition += read; return read; } diff --git a/src/SharpCompress/Readers/AbstractReader.cs b/src/SharpCompress/Readers/AbstractReader.cs index 8895f25b..cae3385c 100644 --- a/src/SharpCompress/Readers/AbstractReader.cs +++ b/src/SharpCompress/Readers/AbstractReader.cs @@ -17,7 +17,8 @@ public abstract class AbstractReader : IReader where TVolume : Volume { private bool _completed; - protected IEnumerator? _entriesForCurrentReadStream; + private IEnumerator? _entriesForCurrentReadStream; + private IAsyncEnumerator? _entriesForCurrentReadStreamAsync; private bool _wroteCurrentEntry; internal AbstractReader(ReaderOptions options, ArchiveType archiveType) @@ -104,7 +105,8 @@ public abstract class AbstractReader : IReader } if (_entriesForCurrentReadStream is null) { - var loaded = await LoadStreamForReadingAsync(RequestInitialStream(), cancellationToken).ConfigureAwait(false); + var loaded = await LoadStreamForReadingAsync(RequestInitialStream(), cancellationToken) + .ConfigureAwait(false); return loaded; } if (!_wroteCurrentEntry) @@ -122,7 +124,10 @@ public abstract class AbstractReader : IReader protected bool LoadStreamForReading(Stream stream) { - _entriesForCurrentReadStream?.Dispose(); + if (_entriesForCurrentReadStream is not null) + { + _entriesForCurrentReadStream.Dispose(); + } if (stream is null || !stream.CanRead) { throw new MultipartStreamRequiredException( @@ -140,7 +145,11 @@ public abstract class AbstractReader : IReader CancellationToken cancellationToken = default ) { - _entriesForCurrentReadStream?.Dispose(); + if (_entriesForCurrentReadStreamAsync is null) + { + throw new InvalidOperationException("Entries async enumerator is not initialized."); + } + _entriesForCurrentReadStreamAsync?.DisposeAsync(); if (stream is null || !stream.CanRead) { throw new MultipartStreamRequiredException( @@ -150,8 +159,9 @@ public abstract class AbstractReader : IReader ); } // Default implementation uses sync version - _entriesForCurrentReadStream = GetEntries(stream).GetEnumerator(); - return _entriesForCurrentReadStream.MoveNext(); + _entriesForCurrentReadStreamAsync = GetEntriesAsync(stream, cancellationToken) + .GetAsyncEnumerator(cancellationToken); + return await _entriesForCurrentReadStreamAsync.MoveNextAsync(); } protected virtual Stream RequestInitialStream() => @@ -162,6 +172,11 @@ public abstract class AbstractReader : IReader protected abstract IEnumerable GetEntries(Stream stream); + protected abstract IAsyncEnumerable GetEntriesAsync( + Stream stream, + CancellationToken cancellationToken = default + ); + #region Entry Skip/Write private void SkipEntry() diff --git a/src/SharpCompress/Readers/Arc/ArcReader.cs b/src/SharpCompress/Readers/Arc/ArcReader.cs index 439cdb12..cb2aa177 100644 --- a/src/SharpCompress/Readers/Arc/ArcReader.cs +++ b/src/SharpCompress/Readers/Arc/ArcReader.cs @@ -3,6 +3,7 @@ using System.Collections.Generic; using System.IO; using System.Linq; using System.Text; +using System.Threading; using System.Threading.Tasks; using SharpCompress.Common; using SharpCompress.Common.Arc; @@ -37,5 +38,20 @@ namespace SharpCompress.Readers.Arc yield return new ArcEntry(new ArcFilePart(header, stream)); } } + + protected override async IAsyncEnumerable GetEntriesAsync( + Stream stream, + [System.Runtime.CompilerServices.EnumeratorCancellation] + CancellationToken cancellationToken = default + ) + { + ArcEntryHeader headerReader = new ArcEntryHeader(Options.ArchiveEncoding); + ArcEntryHeader? header; + while ((header = headerReader.ReadHeader(stream)) != null) + { + cancellationToken.ThrowIfCancellationRequested(); + yield return new ArcEntry(new ArcFilePart(header, stream)); + } + } } } diff --git a/src/SharpCompress/Readers/Arj/ArjReader.cs b/src/SharpCompress/Readers/Arj/ArjReader.cs index 439fb22c..3cf284c9 100644 --- a/src/SharpCompress/Readers/Arj/ArjReader.cs +++ b/src/SharpCompress/Readers/Arj/ArjReader.cs @@ -1,6 +1,7 @@ using System; using System.Collections.Generic; using System.IO; +using System.Threading; using SharpCompress.Common; using SharpCompress.Common.Arj; using SharpCompress.Common.Arj.Headers; @@ -85,5 +86,46 @@ namespace SharpCompress.Readers.Arj protected virtual IEnumerable CreateFilePartEnumerableForCurrentEntry() => Entry.Parts; + + protected override async IAsyncEnumerable GetEntriesAsync( + Stream stream, + [System.Runtime.CompilerServices.EnumeratorCancellation] + CancellationToken cancellationToken = default + ) + { + var encoding = new ArchiveEncoding(); + var mainHeaderReader = new ArjMainHeader(encoding); + var localHeaderReader = new ArjLocalHeader(encoding); + + var mainHeader = mainHeaderReader.Read(stream); + if (mainHeader?.IsVolume == true) + { + throw new MultiVolumeExtractionException( + "Multi volumes are currently not supported" + ); + } + if (mainHeader?.IsGabled == true) + { + throw new CryptographicException( + "Password protected archives are currently not supported" + ); + } + + if (_volume == null) + { + _volume = new ArjVolume(stream, Options, 0); + ValidateArchive(_volume); + } + + while (true) + { + cancellationToken.ThrowIfCancellationRequested(); + var localHeader = localHeaderReader.Read(stream); + if (localHeader == null) + break; + + yield return new ArjEntry(new ArjFilePart((ArjLocalHeader)localHeader, stream)); + } + } } } diff --git a/src/SharpCompress/Readers/Arj/MultiVolumeArjReader.cs b/src/SharpCompress/Readers/Arj/MultiVolumeArjReader.cs index 547541ef..a6a0083a 100644 --- a/src/SharpCompress/Readers/Arj/MultiVolumeArjReader.cs +++ b/src/SharpCompress/Readers/Arj/MultiVolumeArjReader.cs @@ -6,6 +6,7 @@ using System.Collections.Generic; using System.IO; using System.Linq; using System.Text; +using System.Threading; using System.Threading.Tasks; using SharpCompress.Common; using SharpCompress.Common.Arj; diff --git a/src/SharpCompress/Readers/Arj/SingleVolumeArjReader.cs b/src/SharpCompress/Readers/Arj/SingleVolumeArjReader.cs index 54825712..bb55132b 100644 --- a/src/SharpCompress/Readers/Arj/SingleVolumeArjReader.cs +++ b/src/SharpCompress/Readers/Arj/SingleVolumeArjReader.cs @@ -1,31 +1,31 @@ using System; using System.IO; +using System.Threading; using SharpCompress.Common; using SharpCompress.Common.Arj; -namespace SharpCompress.Readers.Arj +namespace SharpCompress.Readers.Arj; + +internal class SingleVolumeArjReader : ArjReader { - internal class SingleVolumeArjReader : ArjReader + private readonly Stream _stream; + + internal SingleVolumeArjReader(Stream stream, ReaderOptions options) + : base(options) { - private readonly Stream _stream; + stream.NotNull(nameof(stream)); + _stream = stream; + } - internal SingleVolumeArjReader(Stream stream, ReaderOptions options) - : base(options) + protected override Stream RequestInitialStream() => _stream; + + protected override void ValidateArchive(ArjVolume archive) + { + if (archive.IsMultiVolume) { - stream.NotNull(nameof(stream)); - _stream = stream; - } - - protected override Stream RequestInitialStream() => _stream; - - protected override void ValidateArchive(ArjVolume archive) - { - if (archive.IsMultiVolume) - { - throw new MultiVolumeExtractionException( - "Streamed archive is a Multi-volume archive. Use a different ArjReader method to extract." - ); - } + throw new MultiVolumeExtractionException( + "Streamed archive is a Multi-volume archive. Use a different ArjReader method to extract." + ); } } } diff --git a/src/SharpCompress/Readers/GZip/GZipReader.cs b/src/SharpCompress/Readers/GZip/GZipReader.cs index e10d509a..e62d7f0d 100644 --- a/src/SharpCompress/Readers/GZip/GZipReader.cs +++ b/src/SharpCompress/Readers/GZip/GZipReader.cs @@ -1,5 +1,6 @@ using System.Collections.Generic; using System.IO; +using System.Threading; using SharpCompress.Common; using SharpCompress.Common.GZip; @@ -30,4 +31,17 @@ public class GZipReader : AbstractReader protected override IEnumerable GetEntries(Stream stream) => GZipEntry.GetEntries(stream, Options); + + protected override async IAsyncEnumerable GetEntriesAsync( + Stream stream, + [System.Runtime.CompilerServices.EnumeratorCancellation] + CancellationToken cancellationToken = default + ) + { + foreach (var entry in GZipEntry.GetEntries(stream, Options)) + { + cancellationToken.ThrowIfCancellationRequested(); + yield return entry; + } + } } diff --git a/src/SharpCompress/Readers/Rar/RarReader.cs b/src/SharpCompress/Readers/Rar/RarReader.cs index e2ff3d3a..6e5dba48 100644 --- a/src/SharpCompress/Readers/Rar/RarReader.cs +++ b/src/SharpCompress/Readers/Rar/RarReader.cs @@ -99,46 +99,20 @@ public abstract class RarReader : AbstractReader } } - protected async IAsyncEnumerable GetEntriesAsync( + protected override async IAsyncEnumerable GetEntriesAsync( Stream stream, - [System.Runtime.CompilerServices.EnumeratorCancellation] CancellationToken cancellationToken = default + [System.Runtime.CompilerServices.EnumeratorCancellation] + CancellationToken cancellationToken = default ) { volume = new RarReaderVolume(stream, Options, 0); - await foreach ( - var fp in volume.ReadFilePartsAsync(cancellationToken).ConfigureAwait(false) - ) + await foreach (var fp in volume.ReadFilePartsAsync(cancellationToken).ConfigureAwait(false)) { ValidateArchive(volume); yield return new RarReaderEntry(volume.IsSolidArchive, fp); } } - protected override async Task LoadStreamForReadingAsync( - Stream stream, - CancellationToken cancellationToken = default - ) - { - _entriesForCurrentReadStream?.Dispose(); - if (stream is null || !stream.CanRead) - { - throw new MultipartStreamRequiredException( - "File is split into multiple archives: '" - + Entry.Key - + "'. A new readable stream is required. Use Cancel if it was intended." - ); - } - - // Materialize the async enumerable into a list to convert to sync enumerator - var entries = new List(); - await foreach (var entry in GetEntriesAsync(stream, cancellationToken).ConfigureAwait(false)) - { - entries.Add(entry); - } - _entriesForCurrentReadStream = entries.GetEnumerator(); - return _entriesForCurrentReadStream.MoveNext(); - } - protected virtual IEnumerable CreateFilePartEnumerableForCurrentEntry() => Entry.Parts; diff --git a/src/SharpCompress/Readers/Tar/TarReader.cs b/src/SharpCompress/Readers/Tar/TarReader.cs index c9218885..12d997bd 100644 --- a/src/SharpCompress/Readers/Tar/TarReader.cs +++ b/src/SharpCompress/Readers/Tar/TarReader.cs @@ -1,6 +1,7 @@ using System; using System.Collections.Generic; using System.IO; +using System.Threading; using SharpCompress.Archives.GZip; using SharpCompress.Archives.Tar; using SharpCompress.Common; @@ -124,4 +125,24 @@ public class TarReader : AbstractReader compressionType, Options.ArchiveEncoding ); + + protected override async IAsyncEnumerable GetEntriesAsync( + Stream stream, + [System.Runtime.CompilerServices.EnumeratorCancellation] + CancellationToken cancellationToken = default + ) + { + foreach ( + var entry in TarEntry.GetEntries( + StreamingMode.Streaming, + stream, + compressionType, + Options.ArchiveEncoding + ) + ) + { + cancellationToken.ThrowIfCancellationRequested(); + yield return entry; + } + } } diff --git a/src/SharpCompress/Readers/Zip/ZipReader.cs b/src/SharpCompress/Readers/Zip/ZipReader.cs index 3a257845..5f74905b 100644 --- a/src/SharpCompress/Readers/Zip/ZipReader.cs +++ b/src/SharpCompress/Readers/Zip/ZipReader.cs @@ -1,5 +1,6 @@ using System.Collections.Generic; using System.IO; +using System.Threading; using SharpCompress.Common; using SharpCompress.Common.Zip; using SharpCompress.Common.Zip.Headers; @@ -91,4 +92,40 @@ public class ZipReader : AbstractReader } } } + + protected override async IAsyncEnumerable GetEntriesAsync( + Stream stream, + [System.Runtime.CompilerServices.EnumeratorCancellation] + CancellationToken cancellationToken = default + ) + { + await foreach (var h in _headerFactory.ReadStreamHeaderAsync(stream, cancellationToken)) + { + if (h != null) + { + switch (h.ZipHeaderType) + { + case ZipHeaderType.LocalEntry: + { + yield return new ZipEntry( + new StreamingZipFilePart((LocalEntryHeader)h, stream) + ); + } + break; + case ZipHeaderType.DirectoryEntry: + // DirectoryEntry headers in the central directory are intentionally skipped. + // In streaming mode, we can only read forward, and DirectoryEntry headers + // reference LocalEntry headers that have already been processed. The file + // data comes from LocalEntry headers, not DirectoryEntry headers. + // For multi-volume ZIPs where file data spans multiple files, use ZipArchive + // instead, which requires seekable streams. + break; + case ZipHeaderType.DirectoryEnd: + { + yield break; + } + } + } + } + } } diff --git a/tests/SharpCompress.Test/Rar/RarReaderAsyncTests.cs b/tests/SharpCompress.Test/Rar/RarReaderAsyncTests.cs index b387302d..f58a8042 100644 --- a/tests/SharpCompress.Test/Rar/RarReaderAsyncTests.cs +++ b/tests/SharpCompress.Test/Rar/RarReaderAsyncTests.cs @@ -75,7 +75,7 @@ public class RarReaderAsyncTests : ReaderTests archives .Select(s => Path.Combine(TEST_ARCHIVES_PATH, s)) .Select(p => File.OpenRead(p)) - .Select(x=> new AsyncOnlyStream(x)), + .Select(x => new AsyncOnlyStream(x)), new ReaderOptions { Password = "test" } ) ) @@ -125,7 +125,7 @@ public class RarReaderAsyncTests : ReaderTests var streams = archives .Select(s => Path.Combine(SCRATCH2_FILES_PATH, s)) .Select(File.OpenRead) - .Select(x=> new AsyncOnlyStream(x)) + .Select(x => new AsyncOnlyStream(x)) .ToList(); using (var reader = RarReader.Open(streams)) { @@ -251,7 +251,12 @@ public class RarReaderAsyncTests : ReaderTests using ( var stream = File.OpenRead(Path.Combine(TEST_ARCHIVES_PATH, "Rar.Audio_program.rar")) ) - using (var reader = await ReaderFactory.OpenAsync(stream, new ReaderOptions { LookForHeader = true })) + using ( + var reader = await ReaderFactory.OpenAsync( + stream, + new ReaderOptions { LookForHeader = true } + ) + ) { while (await reader.MoveToNextEntryAsync()) { @@ -313,7 +318,10 @@ public class RarReaderAsyncTests : ReaderTests private async Task DoRar_Solid_Skip_Reader_Async(string filename) { using var stream = File.OpenRead(Path.Combine(TEST_ARCHIVES_PATH, filename)); - using var reader = await ReaderFactory.OpenAsync(stream, new ReaderOptions { LookForHeader = true }); + using var reader = await ReaderFactory.OpenAsync( + stream, + new ReaderOptions { LookForHeader = true } + ); while (await reader.MoveToNextEntryAsync()) { if (reader.Entry.Key.NotNull().Contains("jpg")) @@ -335,8 +343,13 @@ public class RarReaderAsyncTests : ReaderTests private async Task DoRar_Reader_Skip_Async(string filename) { - using var stream = new AsyncOnlyStream(File.OpenRead(Path.Combine(TEST_ARCHIVES_PATH, filename))); - using var reader = await ReaderFactory.OpenAsync(stream, new ReaderOptions { LookForHeader = true }); + using var stream = new AsyncOnlyStream( + File.OpenRead(Path.Combine(TEST_ARCHIVES_PATH, filename)) + ); + using var reader = await ReaderFactory.OpenAsync( + stream, + new ReaderOptions { LookForHeader = true } + ); while (await reader.MoveToNextEntryAsync()) { if (reader.Entry.Key.NotNull().Contains("jpg")) @@ -358,7 +371,10 @@ public class RarReaderAsyncTests : ReaderTests { testArchive = Path.Combine(TEST_ARCHIVES_PATH, testArchive); using Stream stream = new AsyncOnlyStream(File.OpenRead(testArchive)); - using var reader = await ReaderFactory.OpenAsync(stream, readerOptions ?? new ReaderOptions()); + using var reader = await ReaderFactory.OpenAsync( + stream, + readerOptions ?? new ReaderOptions() + ); while (await reader.MoveToNextEntryAsync()) { if (!reader.Entry.IsDirectory)