Merge pull request #1133 from adamhathcock/copilot/sub-pr-1132

Add async I/O support for SevenZip archive initialization
This commit is contained in:
Adam Hathcock
2026-01-16 08:44:04 +00:00
committed by GitHub
5 changed files with 306 additions and 14 deletions

View File

@@ -32,11 +32,56 @@ public partial class SevenZipArchive : AbstractArchive<SevenZipArchiveEntry, Sev
IEnumerable<SevenZipVolume> volumes
)
{
var stream = volumes.Single().Stream;
LoadFactory(stream);
foreach (var volume in volumes)
{
LoadFactory(volume.Stream);
if (_database is null)
{
yield break;
}
var entries = new SevenZipArchiveEntry[_database._files.Count];
for (var i = 0; i < _database._files.Count; i++)
{
var file = _database._files[i];
entries[i] = new SevenZipArchiveEntry(
this,
new SevenZipFilePart(
volume.Stream,
_database,
i,
file,
ReaderOptions.ArchiveEncoding
)
);
}
foreach (
var group in entries.Where(x => !x.IsDirectory).GroupBy(x => x.FilePart.Folder)
)
{
var isSolid = false;
foreach (var entry in group)
{
entry.IsSolid = isSolid;
isSolid = true;
}
}
foreach (var entry in entries)
{
yield return entry;
}
}
}
protected override async IAsyncEnumerable<SevenZipArchiveEntry> LoadEntriesAsync(
IAsyncEnumerable<SevenZipVolume> volumes
)
{
var stream = (await volumes.SingleAsync()).Stream;
await LoadFactoryAsync(stream);
if (_database is null)
{
return Enumerable.Empty<SevenZipArchiveEntry>();
yield break;
}
var entries = new SevenZipArchiveEntry[_database._files.Count];
for (var i = 0; i < _database._files.Count; i++)
@@ -57,7 +102,10 @@ public partial class SevenZipArchive : AbstractArchive<SevenZipArchiveEntry, Sev
}
}
return entries;
foreach (var entry in entries)
{
yield return entry;
}
}
private void LoadFactory(Stream stream)
@@ -71,6 +119,27 @@ public partial class SevenZipArchive : AbstractArchive<SevenZipArchiveEntry, Sev
}
}
private async Task LoadFactoryAsync(
Stream stream,
CancellationToken cancellationToken = default
)
{
if (_database is null)
{
stream.Position = 0;
var reader = new ArchiveReader();
await reader.OpenAsync(
stream,
lookForHeader: ReaderOptions.LookForHeader,
cancellationToken
);
_database = await reader.ReadDatabaseAsync(
new PasswordProvider(ReaderOptions.Password),
cancellationToken
);
}
}
protected override IReader CreateReaderForSolidExtraction() =>
new SevenZipReader(ReaderOptions, this);

View File

@@ -5,6 +5,8 @@ using System.Collections.Generic;
using System.Diagnostics;
using System.IO;
using System.Linq;
using System.Threading;
using System.Threading.Tasks;
using SharpCompress.Compressors.LZMA;
using SharpCompress.Compressors.LZMA.Utilites;
using SharpCompress.IO;
@@ -1270,6 +1272,46 @@ internal class ArchiveReader
_stream = stream;
}
public async Task OpenAsync(
Stream stream,
bool lookForHeader,
CancellationToken cancellationToken = default
)
{
Close();
_streamOrigin = stream.Position;
_streamEnding = stream.Length;
var canScan = lookForHeader ? 0x80000 - 20 : 0;
while (true)
{
// TODO: Check Signature!
_header = new byte[0x20];
await stream.ReadExactAsync(_header, 0, 0x20, cancellationToken);
if (
!lookForHeader
|| _header
.AsSpan(0, length: 6)
.SequenceEqual<byte>([0x37, 0x7A, 0xBC, 0xAF, 0x27, 0x1C])
)
{
break;
}
if (canScan == 0)
{
throw new InvalidFormatException("Unable to find 7z signature");
}
canScan--;
stream.Position = ++_streamOrigin;
}
_stream = stream;
}
public void Close()
{
_stream?.Dispose();
@@ -1383,6 +1425,110 @@ internal class ArchiveReader
return db;
}
public async Task<ArchiveDatabase> ReadDatabaseAsync(
IPasswordProvider pass,
CancellationToken cancellationToken = default
)
{
var db = new ArchiveDatabase(pass);
db.Clear();
db._majorVersion = _header[6];
db._minorVersion = _header[7];
if (db._majorVersion != 0)
{
throw new InvalidOperationException();
}
var crcFromArchive = DataReader.Get32(_header, 8);
var nextHeaderOffset = (long)DataReader.Get64(_header, 0xC);
var nextHeaderSize = (long)DataReader.Get64(_header, 0x14);
var nextHeaderCrc = DataReader.Get32(_header, 0x1C);
var crc = Crc.INIT_CRC;
crc = Crc.Update(crc, nextHeaderOffset);
crc = Crc.Update(crc, nextHeaderSize);
crc = Crc.Update(crc, nextHeaderCrc);
crc = Crc.Finish(crc);
if (crc != crcFromArchive)
{
throw new InvalidOperationException();
}
db._startPositionAfterHeader = _streamOrigin + 0x20;
// empty header is ok
if (nextHeaderSize == 0)
{
db.Fill();
return db;
}
if (nextHeaderOffset < 0 || nextHeaderSize < 0 || nextHeaderSize > int.MaxValue)
{
throw new InvalidOperationException();
}
if (nextHeaderOffset > _streamEnding - db._startPositionAfterHeader)
{
throw new InvalidOperationException("nextHeaderOffset is invalid");
}
_stream.Seek(nextHeaderOffset, SeekOrigin.Current);
var header = new byte[nextHeaderSize];
await _stream.ReadExactAsync(header, 0, header.Length, cancellationToken);
if (Crc.Finish(Crc.Update(Crc.INIT_CRC, header, 0, header.Length)) != nextHeaderCrc)
{
throw new InvalidOperationException();
}
using (var streamSwitch = new CStreamSwitch())
{
streamSwitch.Set(this, header);
var type = ReadId();
if (type != BlockType.Header)
{
if (type != BlockType.EncodedHeader)
{
throw new InvalidOperationException();
}
var dataVector = ReadAndDecodePackedStreams(
db._startPositionAfterHeader,
db.PasswordProvider
);
// compressed header without content is odd but ok
if (dataVector.Count == 0)
{
db.Fill();
return db;
}
if (dataVector.Count != 1)
{
throw new InvalidOperationException();
}
streamSwitch.Set(this, dataVector[0]);
if (ReadId() != BlockType.Header)
{
throw new InvalidOperationException();
}
}
ReadHeader(db, db.PasswordProvider);
}
db.Fill();
return db;
}
internal class CExtractFolderInfo
{
internal int _fileIndex;

View File

@@ -27,8 +27,7 @@ namespace SharpCompress.Factories
Stream stream,
string? password = null,
int bufferSize = ReaderOptions.DefaultBufferSize
) =>
ArjHeader.IsArchive(stream);
) => ArjHeader.IsArchive(stream);
public override ValueTask<bool> IsArchiveAsync(
Stream stream,

View File

@@ -216,9 +216,9 @@
"net10.0": {
"Microsoft.NET.ILLink.Tasks": {
"type": "Direct",
"requested": "[10.0.0, )",
"resolved": "10.0.0",
"contentHash": "kICGrGYEzCNI3wPzfEXcwNHgTvlvVn9yJDhSdRK+oZQy4jvYH529u7O0xf5ocQKzOMjfS07+3z9PKRIjrFMJDA=="
"requested": "[10.0.1, )",
"resolved": "10.0.1",
"contentHash": "ISahzLHsHY7vrwqr2p1YWZ+gsxoBRtH7gWRDK8fDUst9pp2He0GiesaqEfeX0V8QMCJM3eNEHGGpnIcPjFo2NQ=="
},
"Microsoft.NETFramework.ReferenceAssemblies": {
"type": "Direct",

View File

@@ -8,14 +8,17 @@ using Xunit;
namespace SharpCompress.Test.SevenZip;
#if !NETFRAMEWORK
public class SevenZipArchiveAsyncTests : ArchiveTests
{
[Fact]
public async Task SevenZipArchive_LZMA_AsyncStreamExtraction()
{
var testArchive = Path.Combine(TEST_ARCHIVES_PATH, "7Zip.LZMA.7z");
#if NETFRAMEWORK
using var stream = File.OpenRead(testArchive);
#else
await using var stream = File.OpenRead(testArchive);
#endif
await using var archive = await ArchiveFactory.OpenAsyncArchive(
new AsyncOnlyStream(stream)
);
@@ -30,9 +33,21 @@ public class SevenZipArchiveAsyncTests : ArchiveTests
Directory.CreateDirectory(targetDir);
}
#if NETFRAMEWORK
using var sourceStream = await entry.OpenEntryStreamAsync(CancellationToken.None);
#else
await using var sourceStream = await entry.OpenEntryStreamAsync(CancellationToken.None);
#endif
#if NETFRAMEWORK
using var targetStream = File.Create(targetPath);
#else
await using var targetStream = File.Create(targetPath);
#endif
#if NETFRAMEWORK
await sourceStream.CopyToAsync(targetStream, 81920, CancellationToken.None);
#else
await sourceStream.CopyToAsync(targetStream, CancellationToken.None);
#endif
}
VerifyFiles();
@@ -42,7 +57,11 @@ public class SevenZipArchiveAsyncTests : ArchiveTests
public async Task SevenZipArchive_LZMA2_AsyncStreamExtraction()
{
var testArchive = Path.Combine(TEST_ARCHIVES_PATH, "7Zip.LZMA2.7z");
#if NETFRAMEWORK
using var stream = File.OpenRead(testArchive);
#else
await using var stream = File.OpenRead(testArchive);
#endif
await using var archive = await ArchiveFactory.OpenAsyncArchive(
new AsyncOnlyStream(stream)
);
@@ -57,19 +76,35 @@ public class SevenZipArchiveAsyncTests : ArchiveTests
Directory.CreateDirectory(targetDir);
}
#if NETFRAMEWORK
using var sourceStream = await entry.OpenEntryStreamAsync(CancellationToken.None);
#else
await using var sourceStream = await entry.OpenEntryStreamAsync(CancellationToken.None);
#endif
#if NETFRAMEWORK
using var targetStream = File.Create(targetPath);
#else
await using var targetStream = File.Create(targetPath);
#endif
#if NETFRAMEWORK
await sourceStream.CopyToAsync(targetStream, 81920, CancellationToken.None);
#else
await sourceStream.CopyToAsync(targetStream, CancellationToken.None);
#endif
}
VerifyFiles();
}
//[Fact]
[Fact]
public async Task SevenZipArchive_Solid_AsyncStreamExtraction()
{
var testArchive = Path.Combine(TEST_ARCHIVES_PATH, "7Zip.solid.7z");
#if NETFRAMEWORK
using var stream = File.OpenRead(testArchive);
#else
await using var stream = File.OpenRead(testArchive);
#endif
await using var archive = await ArchiveFactory.OpenAsyncArchive(
new AsyncOnlyStream(stream)
);
@@ -84,19 +119,35 @@ public class SevenZipArchiveAsyncTests : ArchiveTests
Directory.CreateDirectory(targetDir);
}
#if NETFRAMEWORK
using var sourceStream = await entry.OpenEntryStreamAsync(CancellationToken.None);
#else
await using var sourceStream = await entry.OpenEntryStreamAsync(CancellationToken.None);
#endif
#if NETFRAMEWORK
using var targetStream = File.Create(targetPath);
#else
await using var targetStream = File.Create(targetPath);
#endif
#if NETFRAMEWORK
await sourceStream.CopyToAsync(targetStream, 81920, CancellationToken.None);
#else
await sourceStream.CopyToAsync(targetStream, CancellationToken.None);
#endif
}
VerifyFiles();
}
//[Fact]
[Fact]
public async Task SevenZipArchive_BZip2_AsyncStreamExtraction()
{
var testArchive = Path.Combine(TEST_ARCHIVES_PATH, "7Zip.BZip2.7z");
#if NETFRAMEWORK
using var stream = File.OpenRead(testArchive);
#else
await using var stream = File.OpenRead(testArchive);
#endif
await using var archive = await ArchiveFactory.OpenAsyncArchive(
new AsyncOnlyStream(stream)
);
@@ -111,19 +162,35 @@ public class SevenZipArchiveAsyncTests : ArchiveTests
Directory.CreateDirectory(targetDir);
}
#if NETFRAMEWORK
using var sourceStream = await entry.OpenEntryStreamAsync(CancellationToken.None);
#else
await using var sourceStream = await entry.OpenEntryStreamAsync(CancellationToken.None);
#endif
#if NETFRAMEWORK
using var targetStream = File.Create(targetPath);
#else
await using var targetStream = File.Create(targetPath);
#endif
#if NETFRAMEWORK
await sourceStream.CopyToAsync(targetStream, 81920, CancellationToken.None);
#else
await sourceStream.CopyToAsync(targetStream, CancellationToken.None);
#endif
}
VerifyFiles();
}
//[Fact]
[Fact]
public async Task SevenZipArchive_PPMd_AsyncStreamExtraction()
{
var testArchive = Path.Combine(TEST_ARCHIVES_PATH, "7Zip.PPMd.7z");
#if NETFRAMEWORK
using var stream = File.OpenRead(testArchive);
#else
await using var stream = File.OpenRead(testArchive);
#endif
await using var archive = await ArchiveFactory.OpenAsyncArchive(
new AsyncOnlyStream(stream)
);
@@ -138,12 +205,23 @@ public class SevenZipArchiveAsyncTests : ArchiveTests
Directory.CreateDirectory(targetDir);
}
#if NETFRAMEWORK
using var sourceStream = await entry.OpenEntryStreamAsync(CancellationToken.None);
#else
await using var sourceStream = await entry.OpenEntryStreamAsync(CancellationToken.None);
#endif
#if NETFRAMEWORK
using var targetStream = File.Create(targetPath);
#else
await using var targetStream = File.Create(targetPath);
#endif
#if NETFRAMEWORK
await sourceStream.CopyToAsync(targetStream, 81920, CancellationToken.None);
#else
await sourceStream.CopyToAsync(targetStream, CancellationToken.None);
#endif
}
VerifyFiles();
}
}
#endif