Merge pull request #1235 from adamhathcock/adam/support-async-7z-writing

This commit is contained in:
Adam Hathcock
2026-02-25 18:31:29 +00:00
committed by GitHub
5 changed files with 347 additions and 7 deletions

View File

@@ -22,12 +22,12 @@
| Tar.LZip | LZMA | Both | TarArchive | TarReader | TarWriter (3) |
| Tar.XZ | LZMA2 | Decompress | TarArchive | TarReader | TarWriter (3) |
| GZip (single file) | DEFLATE | Both | GZipArchive | GZipReader | GZipWriter |
| 7Zip (4) | LZMA, LZMA2, BZip2, PPMd, BCJ, BCJ2, Deflate | Decompress | SevenZipArchive | N/A | N/A |
| 7Zip (4) | LZMA, LZMA2, BZip2, PPMd, BCJ, BCJ2, Deflate | Both | SevenZipArchive | N/A | SevenZipWriter |
1. SOLID Rars are only supported in the RarReader API.
2. Zip format supports pkware and WinzipAES encryption. However, encrypted LZMA is not supported. Zip64 reading/writing is supported but only with seekable streams as the Zip spec doesn't support Zip64 data in post data descriptors. Deflate64 is only supported for reading. See [Zip Format Notes](#zip-format-notes) for details on multi-volume archives and streaming behavior.
3. The Tar format requires a file size in the header. If no size is specified to the TarWriter and the stream is not seekable, then an exception will be thrown.
4. The 7Zip format doesn't allow for reading as a forward-only stream so 7Zip is only supported through the Archive API. See [7Zip Format Notes](#7zip-format-notes) for details on async extraction behavior.
4. The 7Zip format doesn't allow for reading as a forward-only stream, so 7Zip read support is only through the Archive API. Writing is supported through SevenZipWriter for non-solid archives with LZMA/LZMA2 and requires a seekable output stream. See [7Zip Format Notes](#7zip-format-notes) for details on async extraction behavior.
5. LZip has no support for extra data like the file name or timestamp. There is a default filename used when looking at the entry Key on the archive.
### Zip Format Notes

View File

@@ -1,5 +1,7 @@
using System;
using System.IO;
using System.Threading;
using System.Threading.Tasks;
using SharpCompress.Common;
using SharpCompress.Compressors.LZMA;
using SharpCompress.Crypto;
@@ -87,6 +89,90 @@ internal sealed class SevenZipStreamsCompressor(Stream outputStream)
);
}
/// <summary>
/// Asynchronously compresses the input stream to the output stream using the specified method.
/// Returns a PackedStream containing folder metadata, compressed size, and CRCs.
/// </summary>
/// <param name="inputStream">Uncompressed data to compress.</param>
/// <param name="compressionType">Compression method (LZMA or LZMA2).</param>
/// <param name="encoderProperties">LZMA encoder properties (null for defaults).</param>
/// <param name="cancellationToken">Cancellation token.</param>
public async ValueTask<PackedStream> CompressAsync(
Stream inputStream,
CompressionType compressionType,
LzmaEncoderProperties? encoderProperties = null,
CancellationToken cancellationToken = default
)
{
cancellationToken.ThrowIfCancellationRequested();
var isLzma2 = compressionType == CompressionType.LZMA2;
encoderProperties ??= new LzmaEncoderProperties(eos: !isLzma2);
var outStartOffset = outputStream.Position;
// Wrap the output stream in CRC calculator
using var outCrcStream = new Crc32Stream(outputStream);
byte[] properties;
if (isLzma2)
{
// LZMA2: use Lzma2EncoderStream for chunk-based framing
uint inputCrc2;
long inputSize2;
{
using var lzma2Stream = new Lzma2EncoderStream(
outCrcStream,
encoderProperties.DictionarySize,
encoderProperties.NumFastBytes
);
(inputCrc2, inputSize2) = await CopyWithCrcAsync(
inputStream,
lzma2Stream,
cancellationToken
)
.ConfigureAwait(false);
properties = lzma2Stream.Properties;
}
return BuildPackedStream(
isLzma2: true,
properties,
(ulong)(outputStream.Position - outStartOffset),
(ulong)inputSize2,
inputCrc2,
outCrcStream.Crc
);
}
// LZMA
uint inputCrc;
long inputSize;
{
using var lzmaStream = LzmaStream.Create(encoderProperties, false, outCrcStream);
properties = lzmaStream.Properties;
(inputCrc, inputSize) = await CopyWithCrcAsync(
inputStream,
lzmaStream,
cancellationToken
)
.ConfigureAwait(false);
}
return BuildPackedStream(
isLzma2: false,
properties,
(ulong)(outputStream.Position - outStartOffset),
(ulong)inputSize,
inputCrc,
outCrcStream.Crc
);
}
/// <summary>
/// Copies data from source to destination while computing CRC32 of the source data.
/// Uses Crc32Stream.Compute for CRC calculation to avoid duplicating the table/algorithm.
@@ -120,6 +206,43 @@ internal sealed class SevenZipStreamsCompressor(Stream outputStream)
bytesRead = totalRead;
}
/// <summary>
/// Asynchronously copies data from source to destination while computing CRC32 of source data.
/// Uses Crc32Stream.Compute for CRC calculation to avoid duplicating the table/algorithm.
/// </summary>
private static async ValueTask<(uint crc, long bytesRead)> CopyWithCrcAsync(
Stream source,
Stream destination,
CancellationToken cancellationToken
)
{
var seed = Crc32Stream.DEFAULT_SEED;
var buffer = new byte[81920];
long totalRead = 0;
int read;
while (
(
read = await source
.ReadAsync(buffer, 0, buffer.Length, cancellationToken)
.ConfigureAwait(false)
) > 0
)
{
// Crc32Stream.Compute returns ~CalculateCrc(table, seed, data),
// so passing ~result as next seed chains correctly.
seed = ~Crc32Stream.Compute(
Crc32Stream.DEFAULT_POLYNOMIAL,
seed,
buffer.AsSpan(0, read)
);
await destination.WriteAsync(buffer, 0, read, cancellationToken).ConfigureAwait(false);
totalRead += read;
}
return (~seed, totalRead);
}
private static PackedStream BuildPackedStream(
bool isLzma2,
byte[] properties,

View File

@@ -17,7 +17,7 @@
<Copyright>Copyright (c) 2025 Adam Hathcock</Copyright>
<GenerateAssemblyTitleAttribute>false</GenerateAssemblyTitleAttribute>
<GenerateAssemblyProductAttribute>false</GenerateAssemblyProductAttribute>
<Description>SharpCompress is a compression library for NET 4.8/NET Standard 2.0/NET Standard 2.1/NET 5.0/NET 6.0/NET 7.0/NET 8.0/NET 9.0/NET 10.0 that can unrar, decompress 7zip, decompress xz, zip/unzip, tar/untar lzip/unlzip, bzip2/unbzip2 and gzip/ungzip with forward-only reading and file random access APIs. Write support for zip/tar/bzip2/gzip is implemented.</Description>
<Description>SharpCompress is a compression library for NET 4.8/NET Standard 2.0/NET Standard 2.1/NET 5.0/NET 6.0/NET 7.0/NET 8.0/NET 9.0/NET 10.0 that can unrar, decompress 7zip, decompress xz, zip/unzip, tar/untar lzip/unlzip, bzip2/unbzip2 and gzip/ungzip with forward-only reading and file random access APIs. Write support for zip/tar/bzip2/gzip/7zip is implemented.</Description>
<PublishRepositoryUrl>true</PublishRepositoryUrl>
<IncludeSymbols>true</IncludeSymbols>
<DebugType>embedded</DebugType>

View File

@@ -2,6 +2,7 @@ using System;
using System.IO;
using System.Threading;
using System.Threading.Tasks;
using SharpCompress.Common.SevenZip;
namespace SharpCompress.Writers.SevenZip;
@@ -9,18 +10,75 @@ public partial class SevenZipWriter
{
/// <summary>
/// Asynchronously writes a file entry to the 7z archive.
/// Note: LZMA compression itself is synchronous; async is used for stream copying.
/// </summary>
public override ValueTask WriteAsync(
public override async ValueTask WriteAsync(
string filename,
Stream source,
DateTime? modificationTime,
CancellationToken cancellationToken = default
)
{
if (finalized)
{
throw new ObjectDisposedException(
nameof(SevenZipWriter),
"Cannot write to a finalized archive."
);
}
cancellationToken.ThrowIfCancellationRequested();
Write(filename, source, modificationTime);
return new ValueTask();
filename = NormalizeFilename(filename);
var progressStream = WrapWithProgress(source, filename);
var isEmpty = source.CanSeek && source.Length == 0;
if (isEmpty)
{
entries.Add(
new SevenZipWriteEntry
{
Name = filename,
ModificationTime = modificationTime,
IsDirectory = false,
IsEmpty = true,
}
);
return;
}
var output = OutputStream.NotNull();
var outputPosBefore = output.Position;
var compressor = new SevenZipStreamsCompressor(output);
var packed = await compressor
.CompressAsync(
progressStream,
sevenZipOptions.CompressionType,
sevenZipOptions.LzmaProperties,
cancellationToken
)
.ConfigureAwait(false);
var actuallyEmpty = packed.Folder.GetUnpackSize() == 0;
if (!actuallyEmpty)
{
packedStreams.Add(packed);
}
else
{
output.Position = outputPosBefore;
output.SetLength(outputPosBefore);
}
entries.Add(
new SevenZipWriteEntry
{
Name = filename,
ModificationTime = modificationTime,
IsDirectory = false,
IsEmpty = isEmpty || actuallyEmpty,
}
);
}
/// <summary>

View File

@@ -0,0 +1,159 @@
using System;
using System.IO;
using System.Linq;
using System.Text;
using System.Threading;
using System.Threading.Tasks;
using SharpCompress.Archives.SevenZip;
using SharpCompress.Common;
using SharpCompress.Test.Mocks;
using SharpCompress.Writers;
using SharpCompress.Writers.SevenZip;
using Xunit;
namespace SharpCompress.Test.SevenZip;
public class SevenZipWriterAsyncTests : TestBase
{
[Fact]
public async ValueTask SevenZipWriter_Async_SingleFile_RoundTrip()
{
var content = "Hello, async 7z world!"u8.ToArray();
using var archiveStream = new MemoryStream();
await using (
var writer = new SevenZipWriter(
new AsyncOnlyStream(archiveStream),
new SevenZipWriterOptions()
)
)
{
await writer.WriteAsync("test.txt", new MemoryStream(content), DateTime.UtcNow);
}
archiveStream.Position = 0;
using var archive = (SevenZipArchive)SevenZipArchive.OpenArchive(archiveStream);
var entries = archive.Entries.Where(e => !e.IsDirectory).ToList();
Assert.Single(entries);
Assert.Equal("test.txt", entries[0].Key);
Assert.Equal(content.Length, (int)entries[0].Size);
using var output = new MemoryStream();
using (var entryStream = entries[0].OpenEntryStream())
{
entryStream.CopyTo(output);
}
Assert.Equal(content, output.ToArray());
}
[Fact]
public async ValueTask SevenZipWriter_Async_WithDirectory_RoundTrip()
{
using var archiveStream = new MemoryStream();
await using (
var writer = new SevenZipWriter(
new AsyncOnlyStream(archiveStream),
new SevenZipWriterOptions(CompressionType.LZMA2)
)
)
{
await writer.WriteDirectoryAsync("mydir", DateTime.UtcNow);
await writer.WriteAsync(
"mydir/file1.txt",
new MemoryStream(Encoding.UTF8.GetBytes("file one")),
DateTime.UtcNow
);
await writer.WriteAsync(
"mydir/file2.txt",
new MemoryStream(Encoding.UTF8.GetBytes("file two")),
DateTime.UtcNow
);
}
archiveStream.Position = 0;
using var archive = (SevenZipArchive)SevenZipArchive.OpenArchive(archiveStream);
var entries = archive.Entries.ToList();
Assert.Equal(3, entries.Count);
Assert.Contains(entries, e => e.IsDirectory && e.Key == "mydir");
Assert.Contains(entries, e => !e.IsDirectory && e.Key == "mydir/file1.txt");
Assert.Contains(entries, e => !e.IsDirectory && e.Key == "mydir/file2.txt");
}
[Fact]
public async ValueTask SevenZipWriter_Async_ViaWriterFactory()
{
var content = "Factory-created async archive"u8.ToArray();
using var archiveStream = new MemoryStream();
await using (
var writer = await WriterFactory.OpenAsyncWriter(
new AsyncOnlyStream(archiveStream),
ArchiveType.SevenZip,
new SevenZipWriterOptions()
)
)
{
await writer.WriteAsync("factory.txt", new MemoryStream(content), DateTime.UtcNow);
}
archiveStream.Position = 0;
using var archive = (SevenZipArchive)SevenZipArchive.OpenArchive(archiveStream);
var entry = archive.Entries.Single(e => !e.IsDirectory);
using var output = new MemoryStream();
using (var entryStream = entry.OpenEntryStream())
{
entryStream.CopyTo(output);
}
Assert.Equal("factory.txt", entry.Key);
Assert.Equal(content, output.ToArray());
}
[Fact]
public async ValueTask SevenZipWriter_Async_UsesAsyncSourceReads()
{
var content = "source stream supports async reads only"u8.ToArray();
using var archiveStream = new MemoryStream();
await using (var writer = new SevenZipWriter(archiveStream, new SevenZipWriterOptions()))
{
using var source = new AsyncOnlyStream(new MemoryStream(content));
await writer.WriteAsync("async-source.txt", source, DateTime.UtcNow);
}
archiveStream.Position = 0;
using var archive = (SevenZipArchive)SevenZipArchive.OpenArchive(archiveStream);
var entry = archive.Entries.Single(e => !e.IsDirectory);
using var output = new MemoryStream();
using (var entryStream = entry.OpenEntryStream())
{
entryStream.CopyTo(output);
}
Assert.Equal("async-source.txt", entry.Key);
Assert.Equal(content, output.ToArray());
}
[Fact]
public async ValueTask SevenZipWriter_Async_Cancelled_Throws()
{
using var archiveStream = new MemoryStream();
await using var writer = new SevenZipWriter(archiveStream, new SevenZipWriterOptions());
using var source = new MemoryStream("cancel me"u8.ToArray());
using var cts = new CancellationTokenSource();
cts.Cancel();
await Assert.ThrowsAnyAsync<OperationCanceledException>(() =>
writer.WriteAsync("cancel.txt", source, DateTime.UtcNow, cts.Token).AsTask()
);
}
}