mirror of
https://github.com/adamhathcock/sharpcompress.git
synced 2026-04-05 21:51:09 +00:00
Merge pull request #1235 from adamhathcock/adam/support-async-7z-writing
This commit is contained in:
@@ -22,12 +22,12 @@
|
||||
| Tar.LZip | LZMA | Both | TarArchive | TarReader | TarWriter (3) |
|
||||
| Tar.XZ | LZMA2 | Decompress | TarArchive | TarReader | TarWriter (3) |
|
||||
| GZip (single file) | DEFLATE | Both | GZipArchive | GZipReader | GZipWriter |
|
||||
| 7Zip (4) | LZMA, LZMA2, BZip2, PPMd, BCJ, BCJ2, Deflate | Decompress | SevenZipArchive | N/A | N/A |
|
||||
| 7Zip (4) | LZMA, LZMA2, BZip2, PPMd, BCJ, BCJ2, Deflate | Both | SevenZipArchive | N/A | SevenZipWriter |
|
||||
|
||||
1. SOLID Rars are only supported in the RarReader API.
|
||||
2. Zip format supports pkware and WinzipAES encryption. However, encrypted LZMA is not supported. Zip64 reading/writing is supported but only with seekable streams as the Zip spec doesn't support Zip64 data in post data descriptors. Deflate64 is only supported for reading. See [Zip Format Notes](#zip-format-notes) for details on multi-volume archives and streaming behavior.
|
||||
3. The Tar format requires a file size in the header. If no size is specified to the TarWriter and the stream is not seekable, then an exception will be thrown.
|
||||
4. The 7Zip format doesn't allow for reading as a forward-only stream so 7Zip is only supported through the Archive API. See [7Zip Format Notes](#7zip-format-notes) for details on async extraction behavior.
|
||||
4. The 7Zip format doesn't allow for reading as a forward-only stream, so 7Zip read support is only through the Archive API. Writing is supported through SevenZipWriter for non-solid archives with LZMA/LZMA2 and requires a seekable output stream. See [7Zip Format Notes](#7zip-format-notes) for details on async extraction behavior.
|
||||
5. LZip has no support for extra data like the file name or timestamp. There is a default filename used when looking at the entry Key on the archive.
|
||||
|
||||
### Zip Format Notes
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
using System;
|
||||
using System.IO;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
using SharpCompress.Common;
|
||||
using SharpCompress.Compressors.LZMA;
|
||||
using SharpCompress.Crypto;
|
||||
@@ -87,6 +89,90 @@ internal sealed class SevenZipStreamsCompressor(Stream outputStream)
|
||||
);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Asynchronously compresses the input stream to the output stream using the specified method.
|
||||
/// Returns a PackedStream containing folder metadata, compressed size, and CRCs.
|
||||
/// </summary>
|
||||
/// <param name="inputStream">Uncompressed data to compress.</param>
|
||||
/// <param name="compressionType">Compression method (LZMA or LZMA2).</param>
|
||||
/// <param name="encoderProperties">LZMA encoder properties (null for defaults).</param>
|
||||
/// <param name="cancellationToken">Cancellation token.</param>
|
||||
public async ValueTask<PackedStream> CompressAsync(
|
||||
Stream inputStream,
|
||||
CompressionType compressionType,
|
||||
LzmaEncoderProperties? encoderProperties = null,
|
||||
CancellationToken cancellationToken = default
|
||||
)
|
||||
{
|
||||
cancellationToken.ThrowIfCancellationRequested();
|
||||
|
||||
var isLzma2 = compressionType == CompressionType.LZMA2;
|
||||
encoderProperties ??= new LzmaEncoderProperties(eos: !isLzma2);
|
||||
|
||||
var outStartOffset = outputStream.Position;
|
||||
|
||||
// Wrap the output stream in CRC calculator
|
||||
using var outCrcStream = new Crc32Stream(outputStream);
|
||||
|
||||
byte[] properties;
|
||||
|
||||
if (isLzma2)
|
||||
{
|
||||
// LZMA2: use Lzma2EncoderStream for chunk-based framing
|
||||
uint inputCrc2;
|
||||
long inputSize2;
|
||||
{
|
||||
using var lzma2Stream = new Lzma2EncoderStream(
|
||||
outCrcStream,
|
||||
encoderProperties.DictionarySize,
|
||||
encoderProperties.NumFastBytes
|
||||
);
|
||||
|
||||
(inputCrc2, inputSize2) = await CopyWithCrcAsync(
|
||||
inputStream,
|
||||
lzma2Stream,
|
||||
cancellationToken
|
||||
)
|
||||
.ConfigureAwait(false);
|
||||
|
||||
properties = lzma2Stream.Properties;
|
||||
}
|
||||
|
||||
return BuildPackedStream(
|
||||
isLzma2: true,
|
||||
properties,
|
||||
(ulong)(outputStream.Position - outStartOffset),
|
||||
(ulong)inputSize2,
|
||||
inputCrc2,
|
||||
outCrcStream.Crc
|
||||
);
|
||||
}
|
||||
|
||||
// LZMA
|
||||
uint inputCrc;
|
||||
long inputSize;
|
||||
{
|
||||
using var lzmaStream = LzmaStream.Create(encoderProperties, false, outCrcStream);
|
||||
properties = lzmaStream.Properties;
|
||||
|
||||
(inputCrc, inputSize) = await CopyWithCrcAsync(
|
||||
inputStream,
|
||||
lzmaStream,
|
||||
cancellationToken
|
||||
)
|
||||
.ConfigureAwait(false);
|
||||
}
|
||||
|
||||
return BuildPackedStream(
|
||||
isLzma2: false,
|
||||
properties,
|
||||
(ulong)(outputStream.Position - outStartOffset),
|
||||
(ulong)inputSize,
|
||||
inputCrc,
|
||||
outCrcStream.Crc
|
||||
);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Copies data from source to destination while computing CRC32 of the source data.
|
||||
/// Uses Crc32Stream.Compute for CRC calculation to avoid duplicating the table/algorithm.
|
||||
@@ -120,6 +206,43 @@ internal sealed class SevenZipStreamsCompressor(Stream outputStream)
|
||||
bytesRead = totalRead;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Asynchronously copies data from source to destination while computing CRC32 of source data.
|
||||
/// Uses Crc32Stream.Compute for CRC calculation to avoid duplicating the table/algorithm.
|
||||
/// </summary>
|
||||
private static async ValueTask<(uint crc, long bytesRead)> CopyWithCrcAsync(
|
||||
Stream source,
|
||||
Stream destination,
|
||||
CancellationToken cancellationToken
|
||||
)
|
||||
{
|
||||
var seed = Crc32Stream.DEFAULT_SEED;
|
||||
var buffer = new byte[81920];
|
||||
long totalRead = 0;
|
||||
|
||||
int read;
|
||||
while (
|
||||
(
|
||||
read = await source
|
||||
.ReadAsync(buffer, 0, buffer.Length, cancellationToken)
|
||||
.ConfigureAwait(false)
|
||||
) > 0
|
||||
)
|
||||
{
|
||||
// Crc32Stream.Compute returns ~CalculateCrc(table, seed, data),
|
||||
// so passing ~result as next seed chains correctly.
|
||||
seed = ~Crc32Stream.Compute(
|
||||
Crc32Stream.DEFAULT_POLYNOMIAL,
|
||||
seed,
|
||||
buffer.AsSpan(0, read)
|
||||
);
|
||||
await destination.WriteAsync(buffer, 0, read, cancellationToken).ConfigureAwait(false);
|
||||
totalRead += read;
|
||||
}
|
||||
|
||||
return (~seed, totalRead);
|
||||
}
|
||||
|
||||
private static PackedStream BuildPackedStream(
|
||||
bool isLzma2,
|
||||
byte[] properties,
|
||||
|
||||
@@ -17,7 +17,7 @@
|
||||
<Copyright>Copyright (c) 2025 Adam Hathcock</Copyright>
|
||||
<GenerateAssemblyTitleAttribute>false</GenerateAssemblyTitleAttribute>
|
||||
<GenerateAssemblyProductAttribute>false</GenerateAssemblyProductAttribute>
|
||||
<Description>SharpCompress is a compression library for NET 4.8/NET Standard 2.0/NET Standard 2.1/NET 5.0/NET 6.0/NET 7.0/NET 8.0/NET 9.0/NET 10.0 that can unrar, decompress 7zip, decompress xz, zip/unzip, tar/untar lzip/unlzip, bzip2/unbzip2 and gzip/ungzip with forward-only reading and file random access APIs. Write support for zip/tar/bzip2/gzip is implemented.</Description>
|
||||
<Description>SharpCompress is a compression library for NET 4.8/NET Standard 2.0/NET Standard 2.1/NET 5.0/NET 6.0/NET 7.0/NET 8.0/NET 9.0/NET 10.0 that can unrar, decompress 7zip, decompress xz, zip/unzip, tar/untar lzip/unlzip, bzip2/unbzip2 and gzip/ungzip with forward-only reading and file random access APIs. Write support for zip/tar/bzip2/gzip/7zip is implemented.</Description>
|
||||
<PublishRepositoryUrl>true</PublishRepositoryUrl>
|
||||
<IncludeSymbols>true</IncludeSymbols>
|
||||
<DebugType>embedded</DebugType>
|
||||
|
||||
@@ -2,6 +2,7 @@ using System;
|
||||
using System.IO;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
using SharpCompress.Common.SevenZip;
|
||||
|
||||
namespace SharpCompress.Writers.SevenZip;
|
||||
|
||||
@@ -9,18 +10,75 @@ public partial class SevenZipWriter
|
||||
{
|
||||
/// <summary>
|
||||
/// Asynchronously writes a file entry to the 7z archive.
|
||||
/// Note: LZMA compression itself is synchronous; async is used for stream copying.
|
||||
/// </summary>
|
||||
public override ValueTask WriteAsync(
|
||||
public override async ValueTask WriteAsync(
|
||||
string filename,
|
||||
Stream source,
|
||||
DateTime? modificationTime,
|
||||
CancellationToken cancellationToken = default
|
||||
)
|
||||
{
|
||||
if (finalized)
|
||||
{
|
||||
throw new ObjectDisposedException(
|
||||
nameof(SevenZipWriter),
|
||||
"Cannot write to a finalized archive."
|
||||
);
|
||||
}
|
||||
|
||||
cancellationToken.ThrowIfCancellationRequested();
|
||||
Write(filename, source, modificationTime);
|
||||
return new ValueTask();
|
||||
|
||||
filename = NormalizeFilename(filename);
|
||||
var progressStream = WrapWithProgress(source, filename);
|
||||
|
||||
var isEmpty = source.CanSeek && source.Length == 0;
|
||||
|
||||
if (isEmpty)
|
||||
{
|
||||
entries.Add(
|
||||
new SevenZipWriteEntry
|
||||
{
|
||||
Name = filename,
|
||||
ModificationTime = modificationTime,
|
||||
IsDirectory = false,
|
||||
IsEmpty = true,
|
||||
}
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
var output = OutputStream.NotNull();
|
||||
var outputPosBefore = output.Position;
|
||||
var compressor = new SevenZipStreamsCompressor(output);
|
||||
var packed = await compressor
|
||||
.CompressAsync(
|
||||
progressStream,
|
||||
sevenZipOptions.CompressionType,
|
||||
sevenZipOptions.LzmaProperties,
|
||||
cancellationToken
|
||||
)
|
||||
.ConfigureAwait(false);
|
||||
|
||||
var actuallyEmpty = packed.Folder.GetUnpackSize() == 0;
|
||||
if (!actuallyEmpty)
|
||||
{
|
||||
packedStreams.Add(packed);
|
||||
}
|
||||
else
|
||||
{
|
||||
output.Position = outputPosBefore;
|
||||
output.SetLength(outputPosBefore);
|
||||
}
|
||||
|
||||
entries.Add(
|
||||
new SevenZipWriteEntry
|
||||
{
|
||||
Name = filename,
|
||||
ModificationTime = modificationTime,
|
||||
IsDirectory = false,
|
||||
IsEmpty = isEmpty || actuallyEmpty,
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
|
||||
159
tests/SharpCompress.Test/SevenZip/SevenZipWriterAsyncTests.cs
Normal file
159
tests/SharpCompress.Test/SevenZip/SevenZipWriterAsyncTests.cs
Normal file
@@ -0,0 +1,159 @@
|
||||
using System;
|
||||
using System.IO;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
using SharpCompress.Archives.SevenZip;
|
||||
using SharpCompress.Common;
|
||||
using SharpCompress.Test.Mocks;
|
||||
using SharpCompress.Writers;
|
||||
using SharpCompress.Writers.SevenZip;
|
||||
using Xunit;
|
||||
|
||||
namespace SharpCompress.Test.SevenZip;
|
||||
|
||||
public class SevenZipWriterAsyncTests : TestBase
|
||||
{
|
||||
[Fact]
|
||||
public async ValueTask SevenZipWriter_Async_SingleFile_RoundTrip()
|
||||
{
|
||||
var content = "Hello, async 7z world!"u8.ToArray();
|
||||
|
||||
using var archiveStream = new MemoryStream();
|
||||
|
||||
await using (
|
||||
var writer = new SevenZipWriter(
|
||||
new AsyncOnlyStream(archiveStream),
|
||||
new SevenZipWriterOptions()
|
||||
)
|
||||
)
|
||||
{
|
||||
await writer.WriteAsync("test.txt", new MemoryStream(content), DateTime.UtcNow);
|
||||
}
|
||||
|
||||
archiveStream.Position = 0;
|
||||
using var archive = (SevenZipArchive)SevenZipArchive.OpenArchive(archiveStream);
|
||||
var entries = archive.Entries.Where(e => !e.IsDirectory).ToList();
|
||||
Assert.Single(entries);
|
||||
Assert.Equal("test.txt", entries[0].Key);
|
||||
Assert.Equal(content.Length, (int)entries[0].Size);
|
||||
|
||||
using var output = new MemoryStream();
|
||||
using (var entryStream = entries[0].OpenEntryStream())
|
||||
{
|
||||
entryStream.CopyTo(output);
|
||||
}
|
||||
|
||||
Assert.Equal(content, output.ToArray());
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async ValueTask SevenZipWriter_Async_WithDirectory_RoundTrip()
|
||||
{
|
||||
using var archiveStream = new MemoryStream();
|
||||
|
||||
await using (
|
||||
var writer = new SevenZipWriter(
|
||||
new AsyncOnlyStream(archiveStream),
|
||||
new SevenZipWriterOptions(CompressionType.LZMA2)
|
||||
)
|
||||
)
|
||||
{
|
||||
await writer.WriteDirectoryAsync("mydir", DateTime.UtcNow);
|
||||
await writer.WriteAsync(
|
||||
"mydir/file1.txt",
|
||||
new MemoryStream(Encoding.UTF8.GetBytes("file one")),
|
||||
DateTime.UtcNow
|
||||
);
|
||||
await writer.WriteAsync(
|
||||
"mydir/file2.txt",
|
||||
new MemoryStream(Encoding.UTF8.GetBytes("file two")),
|
||||
DateTime.UtcNow
|
||||
);
|
||||
}
|
||||
|
||||
archiveStream.Position = 0;
|
||||
using var archive = (SevenZipArchive)SevenZipArchive.OpenArchive(archiveStream);
|
||||
var entries = archive.Entries.ToList();
|
||||
|
||||
Assert.Equal(3, entries.Count);
|
||||
Assert.Contains(entries, e => e.IsDirectory && e.Key == "mydir");
|
||||
Assert.Contains(entries, e => !e.IsDirectory && e.Key == "mydir/file1.txt");
|
||||
Assert.Contains(entries, e => !e.IsDirectory && e.Key == "mydir/file2.txt");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async ValueTask SevenZipWriter_Async_ViaWriterFactory()
|
||||
{
|
||||
var content = "Factory-created async archive"u8.ToArray();
|
||||
|
||||
using var archiveStream = new MemoryStream();
|
||||
|
||||
await using (
|
||||
var writer = await WriterFactory.OpenAsyncWriter(
|
||||
new AsyncOnlyStream(archiveStream),
|
||||
ArchiveType.SevenZip,
|
||||
new SevenZipWriterOptions()
|
||||
)
|
||||
)
|
||||
{
|
||||
await writer.WriteAsync("factory.txt", new MemoryStream(content), DateTime.UtcNow);
|
||||
}
|
||||
|
||||
archiveStream.Position = 0;
|
||||
using var archive = (SevenZipArchive)SevenZipArchive.OpenArchive(archiveStream);
|
||||
var entry = archive.Entries.Single(e => !e.IsDirectory);
|
||||
|
||||
using var output = new MemoryStream();
|
||||
using (var entryStream = entry.OpenEntryStream())
|
||||
{
|
||||
entryStream.CopyTo(output);
|
||||
}
|
||||
|
||||
Assert.Equal("factory.txt", entry.Key);
|
||||
Assert.Equal(content, output.ToArray());
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async ValueTask SevenZipWriter_Async_UsesAsyncSourceReads()
|
||||
{
|
||||
var content = "source stream supports async reads only"u8.ToArray();
|
||||
|
||||
using var archiveStream = new MemoryStream();
|
||||
|
||||
await using (var writer = new SevenZipWriter(archiveStream, new SevenZipWriterOptions()))
|
||||
{
|
||||
using var source = new AsyncOnlyStream(new MemoryStream(content));
|
||||
await writer.WriteAsync("async-source.txt", source, DateTime.UtcNow);
|
||||
}
|
||||
|
||||
archiveStream.Position = 0;
|
||||
using var archive = (SevenZipArchive)SevenZipArchive.OpenArchive(archiveStream);
|
||||
var entry = archive.Entries.Single(e => !e.IsDirectory);
|
||||
|
||||
using var output = new MemoryStream();
|
||||
using (var entryStream = entry.OpenEntryStream())
|
||||
{
|
||||
entryStream.CopyTo(output);
|
||||
}
|
||||
|
||||
Assert.Equal("async-source.txt", entry.Key);
|
||||
Assert.Equal(content, output.ToArray());
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async ValueTask SevenZipWriter_Async_Cancelled_Throws()
|
||||
{
|
||||
using var archiveStream = new MemoryStream();
|
||||
await using var writer = new SevenZipWriter(archiveStream, new SevenZipWriterOptions());
|
||||
|
||||
using var source = new MemoryStream("cancel me"u8.ToArray());
|
||||
using var cts = new CancellationTokenSource();
|
||||
cts.Cancel();
|
||||
|
||||
await Assert.ThrowsAnyAsync<OperationCanceledException>(() =>
|
||||
writer.WriteAsync("cancel.txt", source, DateTime.UtcNow, cts.Token).AsTask()
|
||||
);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user