Compare commits

...

25 Commits

Author SHA1 Message Date
Adam Hathcock
5c4b83e501 Merge remote-tracking branch 'origin/master' into copilot/add-performance-benchmarking
# Conflicts:
#	tests/SharpCompress.Performance/packages.lock.json
2026-01-14 14:52:21 +00:00
copilot-swe-agent[bot]
80ac10a5fe Merge latest master branch and resolve conflicts
Co-authored-by: adamhathcock <527620+adamhathcock@users.noreply.github.com>
2026-01-12 14:11:57 +00:00
Adam Hathcock
65dba509e0 Merge pull request #1121 from adamhathcock/adam/async
More async for ZipReader and ZipWriter
2026-01-12 14:07:37 +00:00
Adam Hathcock
3807c3ce2a Merge pull request #1127 from adamhathcock/copilot/sub-pr-1121-yet-again
Fix async test method naming in ZipArchiveAsyncTests
2026-01-12 12:10:51 +00:00
Adam Hathcock
d2f328af01 Merge pull request #1126 from adamhathcock/copilot/sub-pr-1121-another-one
Fix typo in TestBase.cs comment
2026-01-12 12:10:11 +00:00
Adam Hathcock
c3ffcf4fe8 Merge pull request #1125 from adamhathcock/copilot/sub-pr-1121-again
[WIP] Update ZipReader and ZipWriter based on review feedback
2026-01-12 12:09:42 +00:00
copilot-swe-agent[bot]
95c409d979 Change File.Create to File.OpenWrite in TarReaderAsyncTests for consistency
Co-authored-by: adamhathcock <527620+adamhathcock@users.noreply.github.com>
2026-01-12 12:09:12 +00:00
Adam Hathcock
dadf9e71bb Merge pull request #1124 from adamhathcock/copilot/sub-pr-1121
[WIP] Address feedback on async implementation for ZipReader and ZipWriter
2026-01-12 12:09:10 +00:00
copilot-swe-agent[bot]
f4b1780d8a Rename async test methods to use _Async suffix instead of _Sync
Co-authored-by: adamhathcock <527620+adamhathcock@users.noreply.github.com>
2026-01-12 12:08:23 +00:00
copilot-swe-agent[bot]
64a09eb0f8 Fix typo in TestBase.cs comment: 'akways' -> 'always'
Co-authored-by: adamhathcock <527620+adamhathcock@users.noreply.github.com>
2026-01-12 12:06:47 +00:00
copilot-swe-agent[bot]
3a636531e8 Initial plan 2026-01-12 12:05:34 +00:00
copilot-swe-agent[bot]
292da90184 Initial plan 2026-01-12 12:05:22 +00:00
copilot-swe-agent[bot]
90c8ff8650 Initial plan 2026-01-12 12:05:11 +00:00
Adam Hathcock
8d0ac5062f Merge pull request #1123 from adamhathcock/adam/add-more-docs
Add more documentation
2026-01-11 12:20:55 +00:00
Adam Hathcock
b2d1505e5c remove section on allocations 2026-01-11 12:20:40 +00:00
Adam Hathcock
47037a4b9d docs: Add explicit guideline that agents should never commit to git
Agents should stage files and leave committing to the user. Only create
commits when the user explicitly requests them.
2026-01-08 15:49:10 +00:00
Adam Hathcock
507b1e35d8 docs: Remove broken references to non-existent files
- Remove CONTRIBUTING.md reference from ARCHITECTURE.md
- Remove ERRORS.md reference from API.md
- Remove TROUBLESHOOTING.md reference from ENCODING.md
- Remove TROUBLESHOOTING.md reference from PERFORMANCE.md

All markdown files now reference only existing documentation.
2026-01-08 15:47:57 +00:00
Adam Hathcock
2839e1d33f Update docs/ENCODING.md
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
2026-01-08 15:39:47 +00:00
Adam Hathcock
8c876c70af Add more documentation 2026-01-08 15:34:48 +00:00
copilot-swe-agent[bot]
a92ce90252 Fix path validation and add iteration cleanup to prevent file reuse
Co-authored-by: adamhathcock <527620+adamhathcock@users.noreply.github.com>
2026-01-05 17:38:17 +00:00
copilot-swe-agent[bot]
e519f61f0f Address code review feedback: fix exception handling and initialization order
Co-authored-by: adamhathcock <527620+adamhathcock@users.noreply.github.com>
2026-01-05 17:36:08 +00:00
copilot-swe-agent[bot]
49f2271253 Format code with CSharpier
Co-authored-by: adamhathcock <527620+adamhathcock@users.noreply.github.com>
2026-01-05 17:33:06 +00:00
copilot-swe-agent[bot]
5b1d11bc1d Add WriteBenchmarks, BaselineComparisonBenchmarks, and comprehensive documentation
Co-authored-by: adamhathcock <527620+adamhathcock@users.noreply.github.com>
2026-01-05 17:31:32 +00:00
copilot-swe-agent[bot]
aa3a40d968 Add BenchmarkDotNet integration with Archive and Reader benchmarks
Co-authored-by: adamhathcock <527620+adamhathcock@users.noreply.github.com>
2026-01-05 17:27:50 +00:00
copilot-swe-agent[bot]
6125654b2e Initial plan 2026-01-05 17:21:29 +00:00
23 changed files with 2928 additions and 57 deletions

4
.gitignore vendored
View File

@@ -20,3 +20,7 @@ artifacts/
.DS_Store
*.snupkg
# BenchmarkDotNet artifacts
BenchmarkDotNet.Artifacts/
**/BenchmarkDotNet.Artifacts/

View File

@@ -14,6 +14,7 @@ SharpCompress is a pure C# compression library supporting multiple archive forma
- Follow the existing code style and patterns in the codebase.
## General Instructions
- **Agents should NEVER commit to git** - Agents should stage files and leave committing to the user. Only create commits when the user explicitly requests them.
- Make only high confidence suggestions when reviewing code changes.
- Write code with good maintainability practices, including comments on why certain design decisions were made.
- Handle edge cases and write clear exception handling.
@@ -110,7 +111,7 @@ SharpCompress supports multiple archive and compression formats:
- **Archive Formats**: Zip, Tar, 7Zip, Rar (read-only)
- **Compression**: DEFLATE, BZip2, LZMA/LZMA2, PPMd, ZStandard (decompress only), Deflate64 (decompress only)
- **Combined Formats**: Tar.GZip, Tar.BZip2, Tar.LZip, Tar.XZ, Tar.ZStandard
- See FORMATS.md for complete format support matrix
- See [docs/FORMATS.md](docs/FORMATS.md) for complete format support matrix
### Stream Handling Rules
- **Disposal**: As of version 0.21, SharpCompress closes wrapped streams by default

View File

@@ -1,5 +1,6 @@
<Project>
<ItemGroup>
<PackageVersion Include="BenchmarkDotNet" Version="0.14.0" />
<PackageVersion Include="Bullseye" Version="6.1.0" />
<PackageVersion Include="AwesomeAssertions" Version="9.3.0" />
<PackageVersion Include="Glob" Version="1.1.9" />

View File

@@ -4,7 +4,7 @@ SharpCompress is a compression library in pure C# for .NET Framework 4.8, .NET 8
The major feature is support for non-seekable streams so large files can be processed on the fly (i.e. download stream).
**NEW:** All I/O operations now support async/await for improved performance and scalability. See the [USAGE.md](USAGE.md#async-examples) for examples.
**NEW:** All I/O operations now support async/await for improved performance and scalability. See the [USAGE.md](docs/USAGE.md#async-examples) for examples.
GitHub Actions Build -
[![SharpCompress](https://github.com/adamhathcock/sharpcompress/actions/workflows/dotnetcore.yml/badge.svg)](https://github.com/adamhathcock/sharpcompress/actions/workflows/dotnetcore.yml)
@@ -14,7 +14,7 @@ GitHub Actions Build -
Post Issues on Github!
Check the [Supported Formats](FORMATS.md) and [Basic Usage.](USAGE.md)
Check the [Supported Formats](docs/FORMATS.md) and [Basic Usage.](docs/USAGE.md)
## Recommended Formats

489
docs/API.md Normal file
View File

@@ -0,0 +1,489 @@
# API Quick Reference
Quick reference for commonly used SharpCompress APIs.
## Factory Methods
### Opening Archives
```csharp
// Auto-detect format
using (var reader = ReaderFactory.Open(stream))
{
// Works with Zip, Tar, GZip, Rar, 7Zip, etc.
}
// Specific format - Archive API
using (var archive = ZipArchive.Open("file.zip"))
using (var archive = TarArchive.Open("file.tar"))
using (var archive = RarArchive.Open("file.rar"))
using (var archive = SevenZipArchive.Open("file.7z"))
using (var archive = GZipArchive.Open("file.gz"))
// With options
var options = new ReaderOptions
{
Password = "password",
LeaveStreamOpen = true,
ArchiveEncoding = new ArchiveEncoding { Default = Encoding.GetEncoding(932) }
};
using (var archive = ZipArchive.Open("encrypted.zip", options))
```
### Creating Archives
```csharp
// Writer Factory
using (var writer = WriterFactory.Open(stream, ArchiveType.Zip, CompressionType.Deflate))
{
// Write entries
}
// Specific writer
using (var archive = ZipArchive.Create())
using (var archive = TarArchive.Create())
using (var archive = GZipArchive.Create())
// With options
var options = new WriterOptions(CompressionType.Deflate)
{
CompressionLevel = 9,
LeaveStreamOpen = false
};
using (var archive = ZipArchive.Create())
{
archive.SaveTo("output.zip", options);
}
```
---
## Archive API Methods
### Reading/Extracting
```csharp
using (var archive = ZipArchive.Open("file.zip"))
{
// Get all entries
IEnumerable<IEntry> entries = archive.Entries;
// Find specific entry
var entry = archive.Entries.FirstOrDefault(e => e.Key == "file.txt");
// Extract all
archive.WriteToDirectory(@"C:\output", new ExtractionOptions
{
ExtractFullPath = true,
Overwrite = true
});
// Extract single entry
var entry = archive.Entries.First();
entry.WriteToFile(@"C:\output\file.txt");
entry.WriteToFile(@"C:\output\file.txt", new ExtractionOptions { Overwrite = true });
// Get entry stream
using (var stream = entry.OpenEntryStream())
{
stream.CopyTo(outputStream);
}
}
// Async variants
await archive.WriteToDirectoryAsync(@"C:\output", options, cancellationToken);
using (var stream = await entry.OpenEntryStreamAsync(cancellationToken))
{
// ...
}
```
### Entry Properties
```csharp
foreach (var entry in archive.Entries)
{
string name = entry.Key; // Entry name/path
long size = entry.Size; // Uncompressed size
long compressedSize = entry.CompressedSize;
bool isDir = entry.IsDirectory;
DateTime? modTime = entry.LastModifiedTime;
CompressionType compression = entry.CompressionType;
}
```
### Creating Archives
```csharp
using (var archive = ZipArchive.Create())
{
// Add file
archive.AddEntry("file.txt", "C:\\source\\file.txt");
// Add multiple files
archive.AddAllFromDirectory("C:\\source");
archive.AddAllFromDirectory("C:\\source", "*.txt"); // Pattern
// Save to file
archive.SaveTo("output.zip", CompressionType.Deflate);
// Save to stream
archive.SaveTo(outputStream, new WriterOptions(CompressionType.Deflate)
{
CompressionLevel = 9,
LeaveStreamOpen = true
});
}
```
---
## Reader API Methods
### Forward-Only Reading
```csharp
using (var stream = File.OpenRead("file.zip"))
using (var reader = ReaderFactory.Open(stream))
{
while (reader.MoveToNextEntry())
{
IEntry entry = reader.Entry;
if (!entry.IsDirectory)
{
// Extract entry
reader.WriteEntryToDirectory(@"C:\output");
reader.WriteEntryToFile(@"C:\output\file.txt");
// Or get stream
using (var entryStream = reader.OpenEntryStream())
{
entryStream.CopyTo(outputStream);
}
}
}
}
// Async variants
while (await reader.MoveToNextEntryAsync())
{
await reader.WriteEntryToFileAsync(@"C:\output\" + reader.Entry.Key, cancellationToken);
}
// Async extraction
await reader.WriteAllToDirectoryAsync(@"C:\output",
new ExtractionOptions { ExtractFullPath = true, Overwrite = true },
cancellationToken);
```
---
## Writer API Methods
### Creating Archives (Streaming)
```csharp
using (var stream = File.Create("output.zip"))
using (var writer = WriterFactory.Open(stream, ArchiveType.Zip, CompressionType.Deflate))
{
// Write single file
using (var fileStream = File.OpenRead("source.txt"))
{
writer.Write("entry.txt", fileStream, DateTime.Now);
}
// Write directory
writer.WriteAll("C:\\source", "*", SearchOption.AllDirectories);
writer.WriteAll("C:\\source", "*.txt", SearchOption.TopDirectoryOnly);
// Async variants
using (var fileStream = File.OpenRead("source.txt"))
{
await writer.WriteAsync("entry.txt", fileStream, DateTime.Now, cancellationToken);
}
await writer.WriteAllAsync("C:\\source", "*", SearchOption.AllDirectories, cancellationToken);
}
```
---
## Common Options
### ReaderOptions
```csharp
var options = new ReaderOptions
{
Password = "password", // For encrypted archives
LeaveStreamOpen = true, // Don't close wrapped stream
ArchiveEncoding = new ArchiveEncoding // Custom character encoding
{
Default = Encoding.GetEncoding(932)
}
};
using (var archive = ZipArchive.Open("file.zip", options))
{
// ...
}
```
### WriterOptions
```csharp
var options = new WriterOptions(CompressionType.Deflate)
{
CompressionLevel = 9, // 0-9 for Deflate
LeaveStreamOpen = true, // Don't close stream
};
archive.SaveTo("output.zip", options);
```
### ExtractionOptions
```csharp
var options = new ExtractionOptions
{
ExtractFullPath = true, // Recreate directory structure
Overwrite = true, // Overwrite existing files
PreserveFileTime = true // Keep original timestamps
};
archive.WriteToDirectory(@"C:\output", options);
```
---
## Compression Types
### Available Compressions
```csharp
// For creating archives
CompressionType.None // No compression (store)
CompressionType.Deflate // DEFLATE (default for ZIP/GZip)
CompressionType.BZip2 // BZip2
CompressionType.LZMA // LZMA (for 7Zip, LZip, XZ)
CompressionType.PPMd // PPMd (for ZIP)
CompressionType.Rar // RAR compression (read-only)
// For Tar archives
// Use CompressionType in TarWriter constructor
using (var writer = TarWriter(stream, CompressionType.GZip)) // Tar.GZip
using (var writer = TarWriter(stream, CompressionType.BZip2)) // Tar.BZip2
```
### Archive Types
```csharp
ArchiveType.Zip
ArchiveType.Tar
ArchiveType.GZip
ArchiveType.BZip2
ArchiveType.Rar
ArchiveType.SevenZip
ArchiveType.XZ
ArchiveType.ZStandard
```
---
## Patterns & Examples
### Extract with Error Handling
```csharp
try
{
using (var archive = ZipArchive.Open("archive.zip",
new ReaderOptions { Password = "password" }))
{
archive.WriteToDirectory(@"C:\output", new ExtractionOptions
{
ExtractFullPath = true,
Overwrite = true
});
}
}
catch (PasswordRequiredException)
{
Console.WriteLine("Password required");
}
catch (InvalidArchiveException)
{
Console.WriteLine("Archive is invalid");
}
catch (SharpCompressException ex)
{
Console.WriteLine($"Error: {ex.Message}");
}
```
### Extract with Progress
```csharp
var progress = new Progress<ProgressReport>(report =>
{
Console.WriteLine($"Extracting {report.EntryPath}: {report.PercentComplete}%");
});
var options = new ReaderOptions { Progress = progress };
using (var archive = ZipArchive.Open("archive.zip", options))
{
archive.WriteToDirectory(@"C:\output");
}
```
### Async Extract with Cancellation
```csharp
var cts = new CancellationTokenSource();
cts.CancelAfter(TimeSpan.FromMinutes(5));
try
{
using (var archive = ZipArchive.Open("archive.zip"))
{
await archive.WriteToDirectoryAsync(@"C:\output",
new ExtractionOptions { ExtractFullPath = true, Overwrite = true },
cts.Token);
}
}
catch (OperationCanceledException)
{
Console.WriteLine("Extraction cancelled");
}
```
### Create with Custom Compression
```csharp
using (var archive = ZipArchive.Create())
{
archive.AddAllFromDirectory(@"D:\source");
// Fastest
archive.SaveTo("fast.zip", new WriterOptions(CompressionType.Deflate)
{
CompressionLevel = 1
});
// Balanced (default)
archive.SaveTo("normal.zip", CompressionType.Deflate);
// Best compression
archive.SaveTo("best.zip", new WriterOptions(CompressionType.Deflate)
{
CompressionLevel = 9
});
}
```
### Stream Processing (No File I/O)
```csharp
using (var outputStream = new MemoryStream())
using (var archive = ZipArchive.Create())
{
// Add content from memory
using (var contentStream = new MemoryStream(Encoding.UTF8.GetBytes("Hello")))
{
archive.AddEntry("file.txt", contentStream);
}
// Save to memory
archive.SaveTo(outputStream, CompressionType.Deflate);
// Get bytes
byte[] archiveBytes = outputStream.ToArray();
}
```
### Extract Specific Files
```csharp
using (var archive = ZipArchive.Open("archive.zip"))
{
var filesToExtract = new[] { "file1.txt", "file2.txt" };
foreach (var entry in archive.Entries.Where(e => filesToExtract.Contains(e.Key)))
{
entry.WriteToFile(@"C:\output\" + entry.Key);
}
}
```
### List Archive Contents
```csharp
using (var archive = ZipArchive.Open("archive.zip"))
{
foreach (var entry in archive.Entries)
{
if (entry.IsDirectory)
Console.WriteLine($"[DIR] {entry.Key}");
else
Console.WriteLine($"[FILE] {entry.Key} ({entry.Size} bytes)");
}
}
```
---
## Common Mistakes
### ✗ Wrong - Stream not disposed
```csharp
var stream = File.OpenRead("archive.zip");
var archive = ZipArchive.Open(stream);
archive.WriteToDirectory(@"C:\output");
// stream not disposed - leaked resource
```
### ✓ Correct - Using blocks
```csharp
using (var stream = File.OpenRead("archive.zip"))
using (var archive = ZipArchive.Open(stream))
{
archive.WriteToDirectory(@"C:\output");
}
// Both properly disposed
```
### ✗ Wrong - Mixing API styles
```csharp
// Loading entire archive then iterating
using (var archive = ZipArchive.Open("large.zip"))
{
var entries = archive.Entries.ToList(); // Loads all in memory
foreach (var e in entries)
{
e.WriteToFile(...); // Then extracts each
}
}
```
### ✓ Correct - Use Reader for large files
```csharp
// Streaming iteration
using (var stream = File.OpenRead("large.zip"))
using (var reader = ReaderFactory.Open(stream))
{
while (reader.MoveToNextEntry())
{
reader.WriteEntryToDirectory(@"C:\output");
}
}
```
---
## Related Documentation
- [USAGE.md](USAGE.md) - Complete code examples
- [FORMATS.md](FORMATS.md) - Supported formats
- [PERFORMANCE.md](PERFORMANCE.md) - API selection guide

659
docs/ARCHITECTURE.md Normal file
View File

@@ -0,0 +1,659 @@
# SharpCompress Architecture Guide
This guide explains the internal architecture and design patterns of SharpCompress for contributors.
## Overview
SharpCompress is organized into three main layers:
```
┌─────────────────────────────────────────┐
│ User-Facing APIs (Top Layer) │
│ Archive, Reader, Writer Factories │
├─────────────────────────────────────────┤
│ Format-Specific Implementations │
│ ZipArchive, TarReader, GZipWriter, │
│ RarArchive, SevenZipArchive, etc. │
├─────────────────────────────────────────┤
│ Compression & Crypto (Bottom Layer) │
│ Deflate, LZMA, BZip2, AES, CRC32 │
└─────────────────────────────────────────┘
```
---
## Directory Structure
### `src/SharpCompress/`
#### `Archives/` - Archive Implementations
Contains `IArchive` implementations for seekable, random-access APIs.
**Key Files:**
- `AbstractArchive.cs` - Base class for all archives
- `IArchive.cs` - Archive interface definition
- `ArchiveFactory.cs` - Factory for opening archives
- Format-specific: `ZipArchive.cs`, `TarArchive.cs`, `RarArchive.cs`, `SevenZipArchive.cs`, `GZipArchive.cs`
**Use Archive API when:**
- Stream is seekable (file, memory)
- Need random access to entries
- Archive fits in memory
- Simplicity is important
#### `Readers/` - Reader Implementations
Contains `IReader` implementations for forward-only, non-seekable APIs.
**Key Files:**
- `AbstractReader.cs` - Base reader class
- `IReader.cs` - Reader interface
- `ReaderFactory.cs` - Auto-detection factory
- `ReaderOptions.cs` - Configuration for readers
- Format-specific: `ZipReader.cs`, `TarReader.cs`, `GZipReader.cs`, `RarReader.cs`, etc.
**Use Reader API when:**
- Stream is non-seekable (network, pipe, compressed)
- Processing large files
- Memory is limited
- Forward-only processing is acceptable
#### `Writers/` - Writer Implementations
Contains `IWriter` implementations for forward-only writing.
**Key Files:**
- `AbstractWriter.cs` - Base writer class
- `IWriter.cs` - Writer interface
- `WriterFactory.cs` - Factory for creating writers
- `WriterOptions.cs` - Configuration for writers
- Format-specific: `ZipWriter.cs`, `TarWriter.cs`, `GZipWriter.cs`
#### `Factories/` - Format Detection
Factory classes for auto-detecting archive format and creating appropriate readers/writers.
**Key Files:**
- `Factory.cs` - Base factory class
- `IFactory.cs` - Factory interface
- Format-specific: `ZipFactory.cs`, `TarFactory.cs`, `RarFactory.cs`, etc.
**How It Works:**
1. `ReaderFactory.Open(stream)` probes stream signatures
2. Identifies format by magic bytes
3. Creates appropriate reader instance
4. Returns generic `IReader` interface
#### `Common/` - Shared Types
Common types, options, and enumerations used across formats.
**Key Files:**
- `IEntry.cs` - Entry interface (file within archive)
- `Entry.cs` - Entry implementation
- `ArchiveType.cs` - Enum for archive formats
- `CompressionType.cs` - Enum for compression methods
- `ArchiveEncoding.cs` - Character encoding configuration
- `ExtractionOptions.cs` - Extraction configuration
- Format-specific headers: `Zip/Headers/`, `Tar/Headers/`, `Rar/Headers/`, etc.
#### `Compressors/` - Compression Algorithms
Low-level compression streams implementing specific algorithms.
**Algorithms:**
- `Deflate/` - DEFLATE compression (Zip default)
- `BZip2/` - BZip2 compression
- `LZMA/` - LZMA compression (7Zip, XZ, LZip)
- `PPMd/` - Prediction by Partial Matching (Zip, 7Zip)
- `ZStandard/` - ZStandard compression (decompression only)
- `Xz/` - XZ format (decompression only)
- `Rar/` - RAR-specific unpacking
- `Arj/`, `Arc/`, `Ace/` - Legacy format decompression
- `Filters/` - BCJ/BCJ2 filters for executable compression
**Each Compressor:**
- Implements a `Stream` subclass
- Provides both compression and decompression
- Some are read-only (decompression only)
#### `Crypto/` - Encryption & Hashing
Cryptographic functions and stream wrappers.
**Key Files:**
- `Crc32Stream.cs` - CRC32 calculation wrapper
- `BlockTransformer.cs` - Block cipher transformations
- AES, PKWare, WinZip encryption implementations
#### `IO/` - Stream Utilities
Stream wrappers and utilities.
**Key Classes:**
- `SharpCompressStream` - Base stream class
- `ProgressReportingStream` - Progress tracking wrapper
- `MarkingBinaryReader` - Binary reader with position marks
- `BufferedSubStream` - Buffered read-only substream
- `ReadOnlySubStream` - Read-only view of parent stream
- `NonDisposingStream` - Prevents wrapped stream disposal
---
## Design Patterns
### 1. Factory Pattern
**Purpose:** Auto-detect format and create appropriate reader/writer.
**Example:**
```csharp
// User calls factory
using (var reader = ReaderFactory.Open(stream)) // Returns IReader
{
while (reader.MoveToNextEntry())
{
// Process entry
}
}
// Behind the scenes:
// 1. Factory.Open() probes stream signatures
// 2. Detects format (Zip, Tar, Rar, etc.)
// 3. Creates appropriate reader (ZipReader, TarReader, etc.)
// 4. Returns as generic IReader interface
```
**Files:**
- `src/SharpCompress/Factories/ReaderFactory.cs`
- `src/SharpCompress/Factories/WriterFactory.cs`
- `src/SharpCompress/Factories/ArchiveFactory.cs`
### 2. Strategy Pattern
**Purpose:** Encapsulate compression algorithms as swappable strategies.
**Example:**
```csharp
// Different compression strategies
CompressionType.Deflate // DEFLATE
CompressionType.BZip2 // BZip2
CompressionType.LZMA // LZMA
CompressionType.PPMd // PPMd
// Writer uses strategy pattern
var archive = ZipArchive.Create();
archive.SaveTo("output.zip", CompressionType.Deflate); // Use Deflate
archive.SaveTo("output.bz2", CompressionType.BZip2); // Use BZip2
```
**Files:**
- `src/SharpCompress/Compressors/` - Strategy implementations
### 3. Decorator Pattern
**Purpose:** Wrap streams with additional functionality.
**Example:**
```csharp
// Progress reporting decorator
var progressStream = new ProgressReportingStream(baseStream, progressReporter);
progressStream.Read(buffer, 0, buffer.Length); // Reports progress
// Non-disposing decorator
var nonDisposingStream = new NonDisposingStream(baseStream);
using (var compressor = new DeflateStream(nonDisposingStream))
{
// baseStream won't be disposed when compressor is disposed
}
```
**Files:**
- `src/SharpCompress/IO/ProgressReportingStream.cs`
- `src/SharpCompress/IO/NonDisposingStream.cs`
### 4. Template Method Pattern
**Purpose:** Define algorithm skeleton in base class, let subclasses fill details.
**Example:**
```csharp
// AbstractArchive defines common archive operations
public abstract class AbstractArchive : IArchive
{
// Template methods
public virtual void WriteToDirectory(string destinationDirectory, ExtractionOptions options)
{
// Common extraction logic
foreach (var entry in Entries)
{
// Call subclass method
entry.WriteToFile(destinationPath, options);
}
}
// Subclasses override format-specific details
protected abstract Entry CreateEntry(EntryData data);
}
```
**Files:**
- `src/SharpCompress/Archives/AbstractArchive.cs`
- `src/SharpCompress/Readers/AbstractReader.cs`
### 5. Iterator Pattern
**Purpose:** Provide sequential access to entries.
**Example:**
```csharp
// Archive API - provides collection
IEnumerable<IEntry> entries = archive.Entries;
foreach (var entry in entries)
{
// Random access - entries already in memory
}
// Reader API - provides iterator
IReader reader = ReaderFactory.Open(stream);
while (reader.MoveToNextEntry())
{
// Forward-only iteration - one entry at a time
var entry = reader.Entry;
}
```
---
## Key Interfaces
### IArchive - Random Access API
```csharp
public interface IArchive : IDisposable
{
IEnumerable<IEntry> Entries { get; }
void WriteToDirectory(string destinationDirectory,
ExtractionOptions options = null);
IEntry FirstOrDefault(Func<IEntry, bool> predicate);
// ... format-specific methods
}
```
**Implementations:** `ZipArchive`, `TarArchive`, `RarArchive`, `SevenZipArchive`, `GZipArchive`
### IReader - Forward-Only API
```csharp
public interface IReader : IDisposable
{
IEntry Entry { get; }
bool MoveToNextEntry();
void WriteEntryToDirectory(string destinationDirectory,
ExtractionOptions options = null);
Stream OpenEntryStream();
// ... async variants
}
```
**Implementations:** `ZipReader`, `TarReader`, `RarReader`, `GZipReader`, etc.
### IWriter - Writing API
```csharp
public interface IWriter : IDisposable
{
void Write(string entryPath, Stream source,
DateTime? modificationTime = null);
void WriteAll(string sourceDirectory, string searchPattern,
SearchOption searchOption);
// ... async variants
}
```
**Implementations:** `ZipWriter`, `TarWriter`, `GZipWriter`
### IEntry - Archive Entry
```csharp
public interface IEntry
{
string Key { get; }
uint Size { get; }
uint CompressedSize { get; }
bool IsDirectory { get; }
DateTime? LastModifiedTime { get; }
CompressionType CompressionType { get; }
void WriteToFile(string fullPath, ExtractionOptions options = null);
void WriteToStream(Stream destinationStream);
Stream OpenEntryStream();
// ... async variants
}
```
---
## Adding Support for a New Format
### Step 1: Understand the Format
- Research format specification
- Understand compression/encryption used
- Study existing similar formats in codebase
### Step 2: Create Format Structure Classes
**Create:** `src/SharpCompress/Common/NewFormat/`
```csharp
// Headers and data structures
public class NewFormatHeader
{
public uint Magic { get; set; }
public ushort Version { get; set; }
// ... other fields
public static NewFormatHeader Read(BinaryReader reader)
{
// Deserialize from binary
}
}
public class NewFormatEntry
{
public string FileName { get; set; }
public uint CompressedSize { get; set; }
public uint UncompressedSize { get; set; }
// ... other fields
}
```
### Step 3: Create Archive Implementation
**Create:** `src/SharpCompress/Archives/NewFormat/NewFormatArchive.cs`
```csharp
public class NewFormatArchive : AbstractArchive
{
private NewFormatHeader _header;
private List<NewFormatEntry> _entries;
public static NewFormatArchive Open(Stream stream)
{
var archive = new NewFormatArchive();
archive._header = NewFormatHeader.Read(stream);
archive.LoadEntries(stream);
return archive;
}
public override IEnumerable<IEntry> Entries => _entries.Select(e => new Entry(e));
protected override Stream OpenEntryStream(Entry entry)
{
// Return decompressed stream for entry
}
// ... other abstract method implementations
}
```
### Step 4: Create Reader Implementation
**Create:** `src/SharpCompress/Readers/NewFormat/NewFormatReader.cs`
```csharp
public class NewFormatReader : AbstractReader
{
private NewFormatHeader _header;
private BinaryReader _reader;
public NewFormatReader(Stream stream)
{
_reader = new BinaryReader(stream);
_header = NewFormatHeader.Read(_reader);
}
public override bool MoveToNextEntry()
{
// Read next entry header
if (!_reader.BaseStream.CanRead) return false;
var entryData = NewFormatEntry.Read(_reader);
// ... set this.Entry
return entryData != null;
}
// ... other abstract method implementations
}
```
### Step 5: Create Factory
**Create:** `src/SharpCompress/Factories/NewFormatFactory.cs`
```csharp
public class NewFormatFactory : Factory, IArchiveFactory, IReaderFactory
{
// Archive format magic bytes (signature)
private static readonly byte[] NewFormatSignature = new byte[] { 0x4E, 0x46 }; // "NF"
public static NewFormatFactory Instance { get; } = new();
public IArchive CreateArchive(Stream stream)
=> NewFormatArchive.Open(stream);
public IReader CreateReader(Stream stream, ReaderOptions options)
=> new NewFormatReader(stream) { Options = options };
public bool Matches(Stream stream, ReadOnlySpan<byte> signature)
=> signature.StartsWith(NewFormatSignature);
}
```
### Step 6: Register Factory
**Update:** `src/SharpCompress/Factories/ArchiveFactory.cs`
```csharp
private static readonly IFactory[] Factories =
{
ZipFactory.Instance,
TarFactory.Instance,
RarFactory.Instance,
SevenZipFactory.Instance,
GZipFactory.Instance,
NewFormatFactory.Instance, // Add here
// ... other factories
};
```
### Step 7: Add Tests
**Create:** `tests/SharpCompress.Test/NewFormat/NewFormatTests.cs`
```csharp
public class NewFormatTests : TestBase
{
[Fact]
public void NewFormat_Extracts_Successfully()
{
var archivePath = Path.Combine(TEST_ARCHIVES_PATH, "archive.newformat");
using (var archive = NewFormatArchive.Open(archivePath))
{
archive.WriteToDirectory(SCRATCH_FILES_PATH);
// Assert extraction
}
}
[Fact]
public void NewFormat_Reader_Works()
{
var archivePath = Path.Combine(TEST_ARCHIVES_PATH, "archive.newformat");
using (var stream = File.OpenRead(archivePath))
using (var reader = new NewFormatReader(stream))
{
Assert.True(reader.MoveToNextEntry());
Assert.NotNull(reader.Entry);
}
}
}
```
### Step 8: Add Test Archives
Place test files in `tests/TestArchives/Archives/NewFormat/` directory.
### Step 9: Document
Update `docs/FORMATS.md` with format support information.
---
## Compression Algorithm Implementation
### Creating a New Compression Stream
**Example:** Creating `CustomStream` for a custom compression algorithm
```csharp
public class CustomStream : Stream
{
private readonly Stream _baseStream;
private readonly bool _leaveOpen;
public CustomStream(Stream baseStream, bool leaveOpen = false)
{
_baseStream = baseStream;
_leaveOpen = leaveOpen;
}
public override int Read(byte[] buffer, int offset, int count)
{
// Decompress data from _baseStream into buffer
// Return number of decompressed bytes
}
public override void Write(byte[] buffer, int offset, int count)
{
// Compress data from buffer into _baseStream
}
protected override void Dispose(bool disposing)
{
if (disposing && !_leaveOpen)
{
_baseStream?.Dispose();
}
base.Dispose(disposing);
}
}
```
---
## Stream Handling Best Practices
### Disposal Pattern
```csharp
// Correct: Nested using blocks
using (var fileStream = File.OpenRead("archive.zip"))
using (var archive = ZipArchive.Open(fileStream))
{
archive.WriteToDirectory(@"C:\output");
}
// Both archive and fileStream properly disposed
// Correct: Using with options
var options = new ReaderOptions { LeaveStreamOpen = true };
var stream = File.OpenRead("archive.zip");
using (var archive = ZipArchive.Open(stream, options))
{
archive.WriteToDirectory(@"C:\output");
}
stream.Dispose(); // Manually dispose if LeaveStreamOpen = true
```
### NonDisposingStream Wrapper
```csharp
// Prevent unwanted stream closure
var baseStream = File.OpenRead("data.bin");
var nonDisposing = new NonDisposingStream(baseStream);
using (var compressor = new DeflateStream(nonDisposing))
{
// Compressor won't close baseStream when disposed
}
// baseStream still usable
baseStream.Position = 0; // Works
baseStream.Dispose(); // Manual disposal
```
---
## Performance Considerations
### Memory Efficiency
1. **Avoid loading entire archive in memory** - Use Reader API for large files
2. **Process entries sequentially** - Especially for solid archives
3. **Use appropriate buffer sizes** - Larger buffers for network I/O
4. **Dispose streams promptly** - Free resources when done
### Algorithm Selection
1. **Archive API** - Fast for small archives with random access
2. **Reader API** - Efficient for large files or streaming
3. **Solid archives** - Sequential extraction much faster
4. **Compression levels** - Trade-off between speed and size
---
## Testing Guidelines
### Test Coverage
1. **Happy path** - Normal extraction works
2. **Edge cases** - Empty archives, single file, many files
3. **Corrupted data** - Handle gracefully
4. **Error cases** - Missing passwords, unsupported compression
5. **Async operations** - Both sync and async code paths
### Test Archives
- Use `tests/TestArchives/` for test data
- Create format-specific subdirectories
- Include encrypted, corrupted, and edge case archives
- Don't recreate existing archives
### Test Patterns
```csharp
[Fact]
public void Archive_Extraction_Works()
{
// Arrange
var testArchive = Path.Combine(TEST_ARCHIVES_PATH, "test.zip");
// Act
using (var archive = ZipArchive.Open(testArchive))
{
archive.WriteToDirectory(SCRATCH_FILES_PATH);
}
// Assert
Assert.True(File.Exists(Path.Combine(SCRATCH_FILES_PATH, "file.txt")));
}
```
---
## Related Documentation
- [AGENTS.md](../AGENTS.md) - Development guidelines
- [FORMATS.md](FORMATS.md) - Supported formats

610
docs/ENCODING.md Normal file
View File

@@ -0,0 +1,610 @@
# SharpCompress Character Encoding Guide
This guide explains how SharpCompress handles character encoding for archive entries (filenames, comments, etc.).
## Overview
Most archive formats store filenames and metadata as bytes. SharpCompress must convert these bytes to strings using the appropriate character encoding.
**Common Problem:** Archives created on systems with non-UTF8 encodings (especially Japanese, Chinese systems) appear with corrupted filenames when extracted on systems that assume UTF8.
---
## ArchiveEncoding Class
### Basic Usage
```csharp
using SharpCompress.Common;
using SharpCompress.Readers;
// Configure encoding before opening archive
var options = new ReaderOptions
{
ArchiveEncoding = new ArchiveEncoding
{
Default = Encoding.GetEncoding(932) // cp932 for Japanese
}
};
using (var archive = ZipArchive.Open("japanese.zip", options))
{
foreach (var entry in archive.Entries)
{
Console.WriteLine(entry.Key); // Now shows correct characters
}
}
```
### ArchiveEncoding Properties
| Property | Purpose |
|----------|---------|
| `Default` | Default encoding for filenames (fallback) |
| `CustomDecoder` | Custom decoding function for special cases |
### Setting for Different APIs
**Archive API:**
```csharp
var options = new ReaderOptions
{
ArchiveEncoding = new ArchiveEncoding { Default = Encoding.GetEncoding(932) }
};
using (var archive = ZipArchive.Open("file.zip", options))
{
// Use archive with correct encoding
}
```
**Reader API:**
```csharp
var options = new ReaderOptions
{
ArchiveEncoding = new ArchiveEncoding { Default = Encoding.GetEncoding(932) }
};
using (var stream = File.OpenRead("file.zip"))
using (var reader = ReaderFactory.Open(stream, options))
{
while (reader.MoveToNextEntry())
{
// Filenames decoded correctly
}
}
```
---
## Common Encodings
### Asian Encodings
#### cp932 (Japanese)
```csharp
// Windows-31J, Shift-JIS variant used on Japanese Windows
var options = new ReaderOptions
{
ArchiveEncoding = new ArchiveEncoding
{
Default = Encoding.GetEncoding(932)
}
};
using (var archive = ZipArchive.Open("japanese.zip", options))
{
// Correctly decodes Japanese filenames
}
```
**When to use:**
- Archives from Japanese Windows systems
- Files with Japanese characters in names
#### gb2312 (Simplified Chinese)
```csharp
// Simplified Chinese
var options = new ReaderOptions
{
ArchiveEncoding = new ArchiveEncoding
{
Default = Encoding.GetEncoding("gb2312")
}
};
```
#### gbk (Extended Simplified Chinese)
```csharp
// Extended Simplified Chinese (more characters than gb2312)
var options = new ReaderOptions
{
ArchiveEncoding = new ArchiveEncoding
{
Default = Encoding.GetEncoding("gbk")
}
};
```
#### big5 (Traditional Chinese)
```csharp
// Traditional Chinese (Taiwan, Hong Kong)
var options = new ReaderOptions
{
ArchiveEncoding = new ArchiveEncoding
{
Default = Encoding.GetEncoding("big5")
}
};
```
#### euc-jp (Japanese, Unix)
```csharp
// Extended Unix Code for Japanese
var options = new ReaderOptions
{
ArchiveEncoding = new ArchiveEncoding
{
Default = Encoding.GetEncoding("eucjp")
}
};
```
#### euc-kr (Korean)
```csharp
// Extended Unix Code for Korean
var options = new ReaderOptions
{
ArchiveEncoding = new ArchiveEncoding
{
Default = Encoding.GetEncoding("euc-kr")
}
};
```
### Western European Encodings
#### iso-8859-1 (Latin-1)
```csharp
// Western European (includes accented characters)
var options = new ReaderOptions
{
ArchiveEncoding = new ArchiveEncoding
{
Default = Encoding.GetEncoding("iso-8859-1")
}
};
```
**When to use:**
- Archives from French, German, Spanish systems
- Files with accented characters (é, ñ, ü, etc.)
#### cp1252 (Windows-1252)
```csharp
// Windows Western European
// Very similar to iso-8859-1 but with additional printable characters
var options = new ReaderOptions
{
ArchiveEncoding = new ArchiveEncoding
{
Default = Encoding.GetEncoding("cp1252")
}
};
```
**When to use:**
- Archives from older Western European Windows systems
- Files with smart quotes and other Windows-specific characters
#### iso-8859-15 (Latin-9)
```csharp
// Western European with Euro symbol support
var options = new ReaderOptions
{
ArchiveEncoding = new ArchiveEncoding
{
Default = Encoding.GetEncoding("iso-8859-15")
}
};
```
### Cyrillic Encodings
#### cp1251 (Windows Cyrillic)
```csharp
// Russian, Serbian, Bulgarian, etc.
var options = new ReaderOptions
{
ArchiveEncoding = new ArchiveEncoding
{
Default = Encoding.GetEncoding("cp1251")
}
};
```
#### koi8-r (KOI8 Russian)
```csharp
// Russian (Unix standard)
var options = new ReaderOptions
{
ArchiveEncoding = new ArchiveEncoding
{
Default = Encoding.GetEncoding("koi8-r")
}
};
```
### UTF Encodings (Modern)
#### UTF-8 (Default)
```csharp
// Modern standard - usually correct for new archives
var options = new ReaderOptions
{
ArchiveEncoding = new ArchiveEncoding
{
Default = Encoding.UTF8
}
};
```
#### UTF-16
```csharp
// Unicode - rarely used in archives
var options = new ReaderOptions
{
ArchiveEncoding = new ArchiveEncoding
{
Default = Encoding.Unicode
}
};
```
---
## Encoding Auto-Detection
SharpCompress attempts to auto-detect encoding, but this isn't always reliable:
```csharp
// Auto-detection (default)
using (var archive = ZipArchive.Open("file.zip")) // Uses UTF8 by default
{
// May show corrupted characters if archive uses different encoding
}
// Explicit encoding (more reliable)
var options = new ReaderOptions
{
ArchiveEncoding = new ArchiveEncoding { Default = Encoding.GetEncoding(932) }
};
using (var archive = ZipArchive.Open("file.zip", options))
{
// Correct characters displayed
}
```
### When Manual Override is Needed
| Situation | Solution |
|-----------|----------|
| Archive shows corrupted characters | Specify the encoding explicitly |
| Archives from specific region | Use that region's encoding |
| Mixed encodings in archive | Use CustomDecoder |
| Testing with international files | Try different encodings |
---
## Custom Decoder
For complex scenarios where a single encoding isn't sufficient:
### Basic Custom Decoder
```csharp
var options = new ReaderOptions
{
ArchiveEncoding = new ArchiveEncoding
{
CustomDecoder = (data, offset, length) =>
{
// Custom decoding logic
var bytes = new byte[length];
Array.Copy(data, offset, bytes, 0, length);
// Try UTF8 first
try
{
return Encoding.UTF8.GetString(bytes);
}
catch
{
// Fallback to cp932 if UTF8 fails
return Encoding.GetEncoding(932).GetString(bytes);
}
}
}
};
using (var archive = ZipArchive.Open("mixed.zip", options))
{
foreach (var entry in archive.Entries)
{
Console.WriteLine(entry.Key); // Uses custom decoder
}
}
```
### Advanced: Detect Encoding by Content
```csharp
var options = new ReaderOptions
{
ArchiveEncoding = new ArchiveEncoding
{
CustomDecoder = DetectAndDecode
}
};
private static string DetectAndDecode(byte[] data, int offset, int length)
{
var bytes = new byte[length];
Array.Copy(data, offset, bytes, 0, length);
// Try UTF8 (most modern archives)
try
{
var str = Encoding.UTF8.GetString(bytes);
// Verify it decoded correctly (no replacement characters)
if (!str.Contains('\uFFFD'))
return str;
}
catch { }
// Try cp932 (Japanese)
try
{
var str = Encoding.GetEncoding(932).GetString(bytes);
if (!str.Contains('\uFFFD'))
return str;
}
catch { }
// Fallback to iso-8859-1 (always succeeds)
return Encoding.GetEncoding("iso-8859-1").GetString(bytes);
}
```
---
## Code Examples
### Extract Archive with Japanese Filenames
```csharp
var options = new ReaderOptions
{
ArchiveEncoding = new ArchiveEncoding
{
Default = Encoding.GetEncoding(932) // cp932
}
};
using (var archive = ZipArchive.Open("japanese_files.zip", options))
{
archive.WriteToDirectory(@"C:\output", new ExtractionOptions
{
ExtractFullPath = true,
Overwrite = true
});
}
// Files extracted with correct Japanese names
```
### Extract Archive with Western European Filenames
```csharp
var options = new ReaderOptions
{
ArchiveEncoding = new ArchiveEncoding
{
Default = Encoding.GetEncoding("iso-8859-1")
}
};
using (var archive = ZipArchive.Open("french_files.zip", options))
{
archive.WriteToDirectory(@"C:\output");
}
// Accented characters (é, è, ê, etc.) display correctly
```
### Extract Archive with Chinese Filenames
```csharp
var options = new ReaderOptions
{
ArchiveEncoding = new ArchiveEncoding
{
Default = Encoding.GetEncoding("gbk") // Simplified Chinese
}
};
using (var archive = ZipArchive.Open("chinese_files.zip", options))
{
archive.WriteToDirectory(@"C:\output");
}
```
### Extract Archive with Russian Filenames
```csharp
var options = new ReaderOptions
{
ArchiveEncoding = new ArchiveEncoding
{
Default = Encoding.GetEncoding("cp1251") // Windows Cyrillic
}
};
using (var archive = ZipArchive.Open("russian_files.zip", options))
{
archive.WriteToDirectory(@"C:\output");
}
```
### Reader API with Encoding
```csharp
var options = new ReaderOptions
{
ArchiveEncoding = new ArchiveEncoding
{
Default = Encoding.GetEncoding(932)
}
};
using (var stream = File.OpenRead("japanese.zip"))
using (var reader = ReaderFactory.Open(stream, options))
{
while (reader.MoveToNextEntry())
{
if (!reader.Entry.IsDirectory)
{
Console.WriteLine(reader.Entry.Key); // Correct characters
reader.WriteEntryToDirectory(@"C:\output");
}
}
}
```
---
## Creating Archives with Correct Encoding
When creating archives, SharpCompress uses UTF8 by default (recommended):
```csharp
// Create with UTF8 (default, recommended)
using (var archive = ZipArchive.Create())
{
archive.AddAllFromDirectory(@"D:\my_files");
archive.SaveTo("output.zip", CompressionType.Deflate);
// Archives created with UTF8 encoding
}
```
If you need to create archives for systems that expect specific encodings:
```csharp
// Note: SharpCompress Writer API uses UTF8 for encoding
// To create archives with other encodings, consider:
// 1. Let users on those systems create archives
// 2. Use system tools (7-Zip, WinRAR) with desired encoding
// 3. Post-process archives if absolutely necessary
// For now, recommend modern UTF8-based archives
```
---
## Troubleshooting Encoding Issues
### Filenames Show Question Marks (?)
```
✗ Wrong encoding detected
test文件.txt → test???.txt
```
**Solution:** Specify correct encoding explicitly
```csharp
var options = new ReaderOptions
{
ArchiveEncoding = new ArchiveEncoding
{
Default = Encoding.GetEncoding("gbk") // Try different encodings
}
};
```
### Filenames Show Replacement Character (￿)
```
✗ Invalid bytes for selected encoding
café.txt → caf￿.txt
```
**Solution:**
1. Try a different encoding (see Common Encodings table)
2. Use CustomDecoder with fallback encoding
3. Archive might be corrupted
### Mixed Encodings in Single Archive
```csharp
// Use CustomDecoder to handle mixed encodings
var options = new ReaderOptions
{
ArchiveEncoding = new ArchiveEncoding
{
CustomDecoder = (data, offset, length) =>
{
// Try multiple encodings in priority order
var bytes = new byte[length];
Array.Copy(data, offset, bytes, 0, length);
foreach (var encoding in new[]
{
Encoding.UTF8,
Encoding.GetEncoding(932),
Encoding.GetEncoding("iso-8859-1")
})
{
try
{
var str = encoding.GetString(bytes);
if (!str.Contains('\uFFFD'))
return str;
}
catch { }
}
// Final fallback
return Encoding.GetEncoding("iso-8859-1").GetString(bytes);
}
}
};
```
---
## Encoding Reference Table
| Encoding | Code | Use Case |
|----------|------|----------|
| UTF-8 | (default) | Modern archives, recommended |
| cp932 | 932 | Japanese Windows |
| gb2312 | "gb2312" | Simplified Chinese |
| gbk | "gbk" | Extended Simplified Chinese |
| big5 | "big5" | Traditional Chinese |
| iso-8859-1 | "iso-8859-1" | Western European |
| cp1252 | "cp1252" | Windows Western European |
| cp1251 | "cp1251" | Russian/Cyrillic |
| euc-jp | "euc-jp" | Japanese Unix |
| euc-kr | "euc-kr" | Korean |
---
## Best Practices
1. **Use UTF-8 for new archives** - Most modern systems support it
2. **Ask the archive creator** - When receiving archives with corrupted names
3. **Provide encoding options** - If your app handles user archives
4. **Document your assumption** - Tell users what encoding you're using
5. **Test with international files** - Before releasing production code
---
## Related Documentation
- [USAGE.md](USAGE.md#extract-zip-which-has-non-utf8-encoded-filenamycp932) - Usage examples

474
docs/PERFORMANCE.md Normal file
View File

@@ -0,0 +1,474 @@
# SharpCompress Performance Guide
This guide helps you optimize SharpCompress for performance in various scenarios.
## API Selection Guide
### Archive API vs Reader API
Choose the right API based on your use case:
| Aspect | Archive API | Reader API |
|--------|------------|-----------|
| **Stream Type** | Seekable only | Non-seekable OK |
| **Memory Usage** | All entries in memory | One entry at a time |
| **Random Access** | ✓ Yes | ✗ No |
| **Best For** | Small-to-medium archives | Large or streaming data |
| **Performance** | Fast for random access | Better for large files |
### Archive API (Fast for Random Access)
```csharp
// Use when:
// - Archive fits in memory
// - You need random access to entries
// - Stream is seekable (file, MemoryStream)
using (var archive = ZipArchive.Open("archive.zip"))
{
// Random access - all entries available
var specific = archive.Entries.FirstOrDefault(e => e.Key == "file.txt");
if (specific != null)
{
specific.WriteToFile(@"C:\output\file.txt");
}
}
```
**Performance Characteristics:**
- ✓ Instant entry lookup
- ✓ Parallel extraction possible
- ✗ Entire archive in memory
- ✗ Can't process while downloading
### Reader API (Best for Large Files)
```csharp
// Use when:
// - Processing large archives (>100 MB)
// - Streaming from network/pipe
// - Memory is constrained
// - Forward-only processing is acceptable
using (var stream = File.OpenRead("large.zip"))
using (var reader = ReaderFactory.Open(stream))
{
while (reader.MoveToNextEntry())
{
// Process one entry at a time
reader.WriteEntryToDirectory(@"C:\output");
}
}
```
**Performance Characteristics:**
- ✓ Minimal memory footprint
- ✓ Works with non-seekable streams
- ✓ Can process while downloading
- ✗ Forward-only (no random access)
- ✗ Entry lookup requires iteration
---
## Buffer Sizing
### Understanding Buffers
SharpCompress uses internal buffers for reading compressed data. Buffer size affects:
- **Speed:** Larger buffers = fewer I/O operations = faster
- **Memory:** Larger buffers = higher memory usage
### Recommended Buffer Sizes
| Scenario | Size | Notes |
|----------|------|-------|
| Embedded/IoT devices | 4-8 KB | Minimal memory usage |
| Memory-constrained | 16-32 KB | Conservative default |
| Standard use (default) | 64 KB | Recommended default |
| Large file streaming | 256 KB | Better throughput |
| High-speed SSD | 512 KB - 1 MB | Maximum throughput |
### How Buffer Size Affects Performance
```csharp
// SharpCompress manages buffers internally
// You can't directly set buffer size, but you can:
// 1. Use Stream.CopyTo with explicit buffer size
using (var entryStream = reader.OpenEntryStream())
using (var fileStream = File.Create(@"C:\output\file.txt"))
{
// 64 KB buffer (default)
entryStream.CopyTo(fileStream);
// Or specify larger buffer for faster copy
entryStream.CopyTo(fileStream, bufferSize: 262144); // 256 KB
}
// 2. Use custom buffer for writing
using (var entryStream = reader.OpenEntryStream())
using (var fileStream = File.Create(@"C:\output\file.txt"))
{
byte[] buffer = new byte[262144]; // 256 KB
int bytesRead;
while ((bytesRead = entryStream.Read(buffer, 0, buffer.Length)) > 0)
{
fileStream.Write(buffer, 0, bytesRead);
}
}
```
---
## Streaming Large Files
### Non-Seekable Stream Patterns
For processing archives from downloads or pipes:
```csharp
// Download stream (non-seekable)
using (var httpStream = await httpClient.GetStreamAsync(url))
using (var reader = ReaderFactory.Open(httpStream))
{
// Process entries as they arrive
while (reader.MoveToNextEntry())
{
if (!reader.Entry.IsDirectory)
{
reader.WriteEntryToDirectory(@"C:\output");
}
}
}
```
**Performance Tips:**
- Don't try to buffer the entire stream
- Process entries immediately
- Use async APIs for better responsiveness
### Download-Then-Extract vs Streaming
Choose based on your constraints:
| Approach | When to Use |
|----------|------------|
| **Download then extract** | Moderate size, need random access |
| **Stream during download** | Large files, bandwidth limited, memory constrained |
```csharp
// Download then extract (requires disk space)
var archivePath = await DownloadFile(url, @"C:\temp\archive.zip");
using (var archive = ZipArchive.Open(archivePath))
{
archive.WriteToDirectory(@"C:\output");
}
// Stream during download (on-the-fly extraction)
using (var httpStream = await httpClient.GetStreamAsync(url))
using (var reader = ReaderFactory.Open(httpStream))
{
while (reader.MoveToNextEntry())
{
reader.WriteEntryToDirectory(@"C:\output");
}
}
```
---
## Solid Archive Optimization
### Why Solid Archives Are Slow
Solid archives (Rar, 7Zip) group files together in a single compressed stream:
```
Solid Archive Layout:
[Header] [Compressed Stream] [Footer]
├─ File1 compressed data
├─ File2 compressed data
├─ File3 compressed data
└─ File4 compressed data
```
Extracting File3 requires decompressing File1 and File2 first.
### Sequential vs Random Extraction
**Random Extraction (Slow):**
```csharp
using (var archive = RarArchive.Open("solid.rar"))
{
foreach (var entry in archive.Entries)
{
entry.WriteToFile(@"C:\output\" + entry.Key); // ✗ Slow!
// Each entry triggers full decompression from start
}
}
```
**Sequential Extraction (Fast):**
```csharp
using (var archive = RarArchive.Open("solid.rar"))
{
// Method 1: Use WriteToDirectory (recommended)
archive.WriteToDirectory(@"C:\output", new ExtractionOptions
{
ExtractFullPath = true,
Overwrite = true
});
// Method 2: Use ExtractAllEntries
archive.ExtractAllEntries();
// Method 3: Use Reader API (also sequential)
using (var reader = RarReader.Open(File.OpenRead("solid.rar")))
{
while (reader.MoveToNextEntry())
{
reader.WriteEntryToDirectory(@"C:\output");
}
}
}
```
**Performance Impact:**
- Random extraction: O(n²) - very slow for many files
- Sequential extraction: O(n) - 10-100x faster
### Best Practices for Solid Archives
1. **Always extract sequentially** when possible
2. **Use Reader API** for large solid archives
3. **Process entries in order** from the archive
4. **Consider using 7Zip command-line** for scripted extractions
---
## Compression Level Trade-offs
### Deflate/GZip Levels
```csharp
// Level 1 = Fastest, largest size
// Level 6 = Default (balanced)
// Level 9 = Slowest, best compression
// Write with different compression levels
using (var archive = ZipArchive.Create())
{
archive.AddAllFromDirectory(@"D:\data");
// Fast compression (level 1)
archive.SaveTo("fast.zip", new WriterOptions(CompressionType.Deflate)
{
CompressionLevel = 1
});
// Default compression (level 6)
archive.SaveTo("default.zip", CompressionType.Deflate);
// Best compression (level 9)
archive.SaveTo("best.zip", new WriterOptions(CompressionType.Deflate)
{
CompressionLevel = 9
});
}
```
**Speed vs Size:**
| Level | Speed | Size | Use Case |
|-------|-------|------|----------|
| 1 | 10x | 90% | Network, streaming |
| 6 | 1x | 75% | Default (good balance) |
| 9 | 0.1x | 65% | Archival, static storage |
### BZip2 Block Size
```csharp
// BZip2 block size affects memory and compression
// 100K to 900K (default 900K)
// Smaller block size = lower memory, faster
// Larger block size = better compression, slower
using (var archive = TarArchive.Create())
{
archive.AddAllFromDirectory(@"D:\data");
// These are preset in WriterOptions via CompressionLevel
archive.SaveTo("archive.tar.bz2", CompressionType.BZip2);
}
```
### LZMA Settings
LZMA compression is very powerful but memory-intensive:
```csharp
// LZMA (7Zip, .tar.lzma):
// - Dictionary size: 16 KB to 1 GB (default 32 MB)
// - Faster preset: smaller dictionary
// - Better compression: larger dictionary
// Preset via CompressionType
using (var archive = TarArchive.Create())
{
archive.AddAllFromDirectory(@"D:\data");
archive.SaveTo("archive.tar.xz", CompressionType.LZMA); // Default settings
}
```
---
## Async Performance
### When Async Helps
Async is beneficial when:
- **Long I/O operations** (network, slow disks)
- **UI responsiveness** needed (Windows Forms, WPF, Blazor)
- **Server applications** (ASP.NET, multiple concurrent operations)
```csharp
// Async extraction (non-blocking)
using (var archive = ZipArchive.Open("archive.zip"))
{
await archive.WriteToDirectoryAsync(
@"C:\output",
new ExtractionOptions { ExtractFullPath = true, Overwrite = true },
cancellationToken
);
}
// Thread can handle other work while I/O happens
```
### When Async Doesn't Help
Async doesn't improve performance for:
- **CPU-bound operations** (already fast)
- **Local SSD I/O** (I/O is fast enough)
- **Single-threaded scenarios** (no parallelism benefit)
```csharp
// Sync extraction (simpler, same performance on fast I/O)
using (var archive = ZipArchive.Open("archive.zip"))
{
archive.WriteToDirectory(
@"C:\output",
new ExtractionOptions { ExtractFullPath = true, Overwrite = true }
);
}
// Simple and fast - no async needed
```
### Cancellation Pattern
```csharp
var cts = new CancellationTokenSource();
// Cancel after 5 minutes
cts.CancelAfter(TimeSpan.FromMinutes(5));
try
{
using (var archive = ZipArchive.Open("archive.zip"))
{
await archive.WriteToDirectoryAsync(
@"C:\output",
new ExtractionOptions { ExtractFullPath = true, Overwrite = true },
cts.Token
);
}
}
catch (OperationCanceledException)
{
Console.WriteLine("Extraction cancelled");
// Clean up partial extraction if needed
}
```
---
## Practical Performance Tips
### 1. Choose the Right API
| Scenario | API | Why |
|----------|-----|-----|
| Small archives | Archive | Faster random access |
| Large archives | Reader | Lower memory |
| Streaming | Reader | Works on non-seekable streams |
| Download streams | Reader | Async extraction while downloading |
### 2. Batch Operations
```csharp
// ✗ Slow - opens each archive separately
foreach (var file in files)
{
using (var archive = ZipArchive.Open("archive.zip"))
{
archive.WriteToDirectory(@"C:\output");
}
}
// ✓ Better - process multiple entries at once
using (var archive = ZipArchive.Open("archive.zip"))
{
archive.WriteToDirectory(@"C:\output");
}
```
### 3. Profile Your Code
```csharp
var sw = Stopwatch.StartNew();
using (var archive = ZipArchive.Open("large.zip"))
{
archive.WriteToDirectory(@"C:\output");
}
sw.Stop();
Console.WriteLine($"Extraction took {sw.ElapsedMilliseconds}ms");
// Measure memory before/after
var beforeMem = GC.GetTotalMemory(true);
// ... do work ...
var afterMem = GC.GetTotalMemory(true);
Console.WriteLine($"Memory used: {(afterMem - beforeMem) / 1024 / 1024}MB");
```
---
## Troubleshooting Performance
### Extraction is Slow
1. **Check if solid archive** → Use sequential extraction
2. **Check API** → Reader API might be faster for large files
3. **Check compression level** → Higher levels are slower to decompress
4. **Check I/O** → Network drives are much slower than SSD
5. **Check buffer size** → May need larger buffers for network
### High Memory Usage
1. **Use Reader API** instead of Archive API
2. **Process entries immediately** rather than buffering
3. **Reduce compression level** if writing
4. **Check for memory leaks** in your code
### CPU Usage at 100%
1. **Normal for compression** - especially with high compression levels
2. **Consider lower level** for faster processing
3. **Reduce parallelism** if processing multiple archives
4. **Check if awaiting properly** in async code
---
## Related Documentation
- [PERFORMANCE.md](USAGE.md) - Usage examples with performance considerations
- [FORMATS.md](FORMATS.md) - Format-specific performance notes

View File

@@ -1,6 +1,6 @@
# SharpCompress Usage
## Async/Await Support
## Async/Await Support (Beta)
SharpCompress now provides full async/await support for all I/O operations. All `Read`, `Write`, and extraction operations have async equivalents ending in `Async` that accept an optional `CancellationToken`. This enables better performance and scalability for I/O-bound operations.
@@ -13,7 +13,7 @@ SharpCompress now provides full async/await support for all I/O operations. All
See [Async Examples](#async-examples) section below for usage patterns.
## Stream Rules (changed with 0.21)
## Stream Rules
When dealing with Streams, the rule should be that you don't close a stream you didn't create. This, in effect, should mean you should always put a Stream in a using block to dispose it.

View File

@@ -216,9 +216,9 @@
"net10.0": {
"Microsoft.NET.ILLink.Tasks": {
"type": "Direct",
"requested": "[10.0.1, )",
"resolved": "10.0.1",
"contentHash": "ISahzLHsHY7vrwqr2p1YWZ+gsxoBRtH7gWRDK8fDUst9pp2He0GiesaqEfeX0V8QMCJM3eNEHGGpnIcPjFo2NQ=="
"requested": "[10.0.0, )",
"resolved": "10.0.0",
"contentHash": "kICGrGYEzCNI3wPzfEXcwNHgTvlvVn9yJDhSdRK+oZQy4jvYH529u7O0xf5ocQKzOMjfS07+3z9PKRIjrFMJDA=="
},
"Microsoft.NETFramework.ReferenceAssemblies": {
"type": "Direct",

View File

@@ -0,0 +1,86 @@
using System.IO;
using System.Linq;
using BenchmarkDotNet.Attributes;
using SharpCompress.Archives;
namespace SharpCompress.Performance;
/// <summary>
/// Benchmarks for Archive API operations across different formats.
/// Archive API is used for random access to entries with seekable streams.
/// </summary>
[MemoryDiagnoser]
public class ArchiveReadBenchmarks : BenchmarkBase
{
[Benchmark]
public void ZipArchiveRead()
{
var path = GetTestArchivePath("Zip.deflate.zip");
using var archive = ArchiveFactory.Open(path);
foreach (var entry in archive.Entries.Where(e => !e.IsDirectory))
{
using var stream = entry.OpenEntryStream();
stream.CopyTo(Stream.Null);
}
}
[Benchmark]
public void TarArchiveRead()
{
var path = GetTestArchivePath("Tar.tar");
using var archive = ArchiveFactory.Open(path);
foreach (var entry in archive.Entries.Where(e => !e.IsDirectory))
{
using var stream = entry.OpenEntryStream();
stream.CopyTo(Stream.Null);
}
}
[Benchmark]
public void TarGzArchiveRead()
{
var path = GetTestArchivePath("Tar.tar.gz");
using var archive = ArchiveFactory.Open(path);
foreach (var entry in archive.Entries.Where(e => !e.IsDirectory))
{
using var stream = entry.OpenEntryStream();
stream.CopyTo(Stream.Null);
}
}
[Benchmark]
public void TarBz2ArchiveRead()
{
var path = GetTestArchivePath("Tar.tar.bz2");
using var archive = ArchiveFactory.Open(path);
foreach (var entry in archive.Entries.Where(e => !e.IsDirectory))
{
using var stream = entry.OpenEntryStream();
stream.CopyTo(Stream.Null);
}
}
[Benchmark]
public void SevenZipArchiveRead()
{
var path = GetTestArchivePath("7Zip.LZMA2.7z");
using var archive = ArchiveFactory.Open(path);
foreach (var entry in archive.Entries.Where(e => !e.IsDirectory))
{
using var stream = entry.OpenEntryStream();
stream.CopyTo(Stream.Null);
}
}
[Benchmark]
public void RarArchiveRead()
{
var path = GetTestArchivePath("Rar.rar");
using var archive = ArchiveFactory.Open(path);
foreach (var entry in archive.Entries.Where(e => !e.IsDirectory))
{
using var stream = entry.OpenEntryStream();
stream.CopyTo(Stream.Null);
}
}
}

View File

@@ -0,0 +1,47 @@
using System.IO;
using System.Linq;
using BenchmarkDotNet.Attributes;
using SharpCompress.Archives;
namespace SharpCompress.Performance;
/// <summary>
/// Benchmarks comparing current code against a baseline.
/// Use [Baseline] attribute to mark the reference benchmark.
/// </summary>
[MemoryDiagnoser]
[RankColumn]
public class BaselineComparisonBenchmarks : BenchmarkBase
{
/// <summary>
/// Baseline benchmark for Zip archive reading.
/// This serves as the reference point for comparison.
/// </summary>
[Benchmark(Baseline = true)]
public void ZipArchiveRead_Baseline()
{
var path = GetTestArchivePath("Zip.deflate.zip");
using var archive = ArchiveFactory.Open(path);
foreach (var entry in archive.Entries.Where(e => !e.IsDirectory))
{
using var stream = entry.OpenEntryStream();
stream.CopyTo(Stream.Null);
}
}
/// <summary>
/// Current implementation benchmark for Zip archive reading.
/// BenchmarkDotNet will compare this against the baseline.
/// </summary>
[Benchmark]
public void ZipArchiveRead_Current()
{
var path = GetTestArchivePath("Zip.deflate.zip");
using var archive = ArchiveFactory.Open(path);
foreach (var entry in archive.Entries.Where(e => !e.IsDirectory))
{
using var stream = entry.OpenEntryStream();
stream.CopyTo(Stream.Null);
}
}
}

View File

@@ -0,0 +1,37 @@
using System;
using System.IO;
namespace SharpCompress.Performance;
/// <summary>
/// Base class for all benchmarks providing common setup for test archives path
/// </summary>
public class BenchmarkBase
{
protected readonly string TEST_ARCHIVES_PATH;
public BenchmarkBase()
{
var index = AppDomain.CurrentDomain.BaseDirectory.IndexOf(
"SharpCompress.Performance",
StringComparison.OrdinalIgnoreCase
);
if (index == -1)
{
throw new InvalidOperationException(
"Could not locate SharpCompress.Performance in the base directory path"
);
}
var path = AppDomain.CurrentDomain.BaseDirectory.Substring(0, index);
var solutionBasePath =
Path.GetDirectoryName(path)
?? throw new InvalidOperationException("Could not determine solution base path");
TEST_ARCHIVES_PATH = Path.Combine(solutionBasePath, "TestArchives", "Archives");
}
protected string GetTestArchivePath(string filename) =>
Path.Combine(TEST_ARCHIVES_PATH, filename);
}

View File

@@ -1,54 +1,16 @@
using System;
using System.IO;
using System.Linq;
using System.Threading.Tasks;
using SharpCompress.Archives;
using SharpCompress.Performance;
using SharpCompress.Readers;
using SharpCompress.Test;
using BenchmarkDotNet.Configs;
using BenchmarkDotNet.Running;
var index = AppDomain.CurrentDomain.BaseDirectory.IndexOf(
"SharpCompress.Performance",
StringComparison.OrdinalIgnoreCase
);
var path = AppDomain.CurrentDomain.BaseDirectory.Substring(0, index);
var SOLUTION_BASE_PATH = Path.GetDirectoryName(path) ?? throw new ArgumentNullException();
namespace SharpCompress.Performance;
var TEST_ARCHIVES_PATH = Path.Combine(SOLUTION_BASE_PATH, "TestArchives", "Archives");
//using var _ = JetbrainsProfiler.Memory($"/Users/adam/temp/");
using (var __ = JetbrainsProfiler.Cpu($"/Users/adam/temp/"))
internal class Program
{
var testArchives = new[]
static void Main(string[] args)
{
"Rar.Audio_program.rar",
// Run all benchmarks in the assembly
var config = DefaultConfig.Instance;
//"64bitstream.zip.7z",
//"TarWithSymlink.tar.gz"
};
var arcs = testArchives.Select(a => Path.Combine(TEST_ARCHIVES_PATH, a)).ToArray();
for (int i = 0; i < 50; i++)
{
using var found = ArchiveFactory.Open(arcs[0]);
foreach (var entry in found.Entries.Where(entry => !entry.IsDirectory))
{
Console.WriteLine($"Extracting {entry.Key}");
using var entryStream = entry.OpenEntryStream();
entryStream.CopyTo(Stream.Null);
}
/*using var found = ReaderFactory.Open(arcs[0]);
while (found.MoveToNextEntry())
{
var entry = found.Entry;
if (entry.IsDirectory)
continue;
Console.WriteLine($"Extracting {entry.Key}");
found.WriteEntryTo(Stream.Null);
}*/
// BenchmarkRunner will find all classes with [Benchmark] attributes
BenchmarkSwitcher.FromAssembly(typeof(Program).Assembly).Run(args, config);
}
Console.WriteLine("Still running...");
}
await Task.Delay(500);

View File

@@ -0,0 +1,131 @@
# SharpCompress Performance Benchmarks
This project uses [BenchmarkDotNet](https://benchmarkdotnet.org/) to measure and track performance of SharpCompress archive operations.
## Running Benchmarks
### Run All Benchmarks
```bash
cd tests/SharpCompress.Performance
dotnet run -c Release
```
### Run Specific Benchmark Classes
```bash
# Run only Archive API benchmarks
dotnet run -c Release -- --filter "*ArchiveReadBenchmarks*"
# Run only Reader API benchmarks
dotnet run -c Release -- --filter "*ReaderBenchmarks*"
```
### Run Specific Benchmark Methods
```bash
# Run only Zip benchmarks
dotnet run -c Release -- --filter "*Zip*"
# Run a specific method
dotnet run -c Release -- --filter "ArchiveReadBenchmarks.ZipArchiveRead"
```
### Quick Dry Run (for testing)
```bash
dotnet run -c Release -- --job dry
```
## Benchmark Categories
### ArchiveReadBenchmarks
Tests the **Archive API** which provides random access to entries with seekable streams. Covers:
- Zip (deflate compression)
- Tar (uncompressed)
- Tar.gz (gzip compression)
- Tar.bz2 (bzip2 compression)
- 7Zip (LZMA2 compression)
- Rar
### ReaderBenchmarks
Tests the **Reader API** which provides forward-only streaming for non-seekable streams. Covers:
- Zip
- Tar
- Tar.gz
- Tar.bz2
- Rar
### WriteBenchmarks
Tests the **Writer API** for creating archives using forward-only writing. Covers:
- Zip (deflate compression)
- Tar (uncompressed)
- Tar.gz (gzip compression)
### BaselineComparisonBenchmarks
Example benchmark showing how to compare implementations using the `[Baseline]` attribute. The baseline benchmark serves as a reference point, and BenchmarkDotNet calculates the ratio of performance between baseline and other methods.
## Comparing Against Previous Versions
### Using Baseline Attribute
Mark one benchmark with `[Baseline = true]` and BenchmarkDotNet will show relative performance:
```csharp
[Benchmark(Baseline = true)]
public void MethodA() { /* ... */ }
[Benchmark]
public void MethodB() { /* ... */ }
```
Results will show ratios like "1.5x slower" or "0.8x faster" compared to the baseline.
### Using BenchmarkDotNet.Artifacts for Historical Comparison
BenchmarkDotNet saves results to `BenchmarkDotNet.Artifacts/results/`. You can:
1. Run benchmarks and save the results
2. Keep a snapshot of the results file
3. Compare new runs against saved results
### Using Different NuGet Versions (Advanced)
To compare against a published NuGet package:
1. Create a separate benchmark project referencing the NuGet package
2. Use BenchmarkDotNet's `[SimpleJob]` attribute with different runtimes
3. Reference both the local project and NuGet package in different jobs
## Interpreting Results
BenchmarkDotNet provides:
- **Mean**: Average execution time
- **Error**: Half of 99.9% confidence interval
- **StdDev**: Standard deviation of measurements
- **Allocated**: Memory allocated per operation
- **Rank**: Relative ranking (when using `[RankColumn]`)
- **Ratio**: Relative performance vs baseline (when using `[Baseline]`)
## Output Artifacts
Results are saved to `BenchmarkDotNet.Artifacts/results/`:
- `*.csv`: Raw data for further analysis
- `*-report.html`: HTML report with charts
- `*-report-github.md`: Markdown report for GitHub
- `*.log`: Detailed execution log
## Best Practices
1. **Always run in Release mode**: Debug builds have significant overhead
2. **Close other applications**: Minimize system noise during benchmarks
3. **Run multiple times**: Look for consistency across runs
4. **Use appropriate workload**: Ensure benchmarks run for at least 100ms
5. **Track trends**: Compare results over time to detect regressions
6. **Archive results**: Keep snapshots of benchmark results for historical comparison
## CI/CD Integration
Consider adding benchmarks to CI/CD to:
- Detect performance regressions automatically
- Track performance trends over time
- Compare PR performance against main branch
## Additional Resources
- [BenchmarkDotNet Documentation](https://benchmarkdotnet.org/articles/overview.html)
- [BenchmarkDotNet Configuration](https://benchmarkdotnet.org/articles/configs/configs.html)
- [BenchmarkDotNet Baseline](https://benchmarkdotnet.org/articles/features/baselines.html)

View File

@@ -0,0 +1,88 @@
using System.IO;
using BenchmarkDotNet.Attributes;
using SharpCompress.Readers;
namespace SharpCompress.Performance;
/// <summary>
/// Benchmarks for Reader API operations across different formats.
/// Reader API is used for forward-only streaming with non-seekable streams.
/// </summary>
[MemoryDiagnoser]
public class ReaderBenchmarks : BenchmarkBase
{
[Benchmark]
public void ZipReaderRead()
{
var path = GetTestArchivePath("Zip.deflate.zip");
using var stream = File.OpenRead(path);
using var reader = ReaderFactory.Open(stream);
while (reader.MoveToNextEntry())
{
if (!reader.Entry.IsDirectory)
{
reader.WriteEntryTo(Stream.Null);
}
}
}
[Benchmark]
public void TarReaderRead()
{
var path = GetTestArchivePath("Tar.tar");
using var stream = File.OpenRead(path);
using var reader = ReaderFactory.Open(stream);
while (reader.MoveToNextEntry())
{
if (!reader.Entry.IsDirectory)
{
reader.WriteEntryTo(Stream.Null);
}
}
}
[Benchmark]
public void TarGzReaderRead()
{
var path = GetTestArchivePath("Tar.tar.gz");
using var stream = File.OpenRead(path);
using var reader = ReaderFactory.Open(stream);
while (reader.MoveToNextEntry())
{
if (!reader.Entry.IsDirectory)
{
reader.WriteEntryTo(Stream.Null);
}
}
}
[Benchmark]
public void TarBz2ReaderRead()
{
var path = GetTestArchivePath("Tar.tar.bz2");
using var stream = File.OpenRead(path);
using var reader = ReaderFactory.Open(stream);
while (reader.MoveToNextEntry())
{
if (!reader.Entry.IsDirectory)
{
reader.WriteEntryTo(Stream.Null);
}
}
}
[Benchmark]
public void RarReaderRead()
{
var path = GetTestArchivePath("Rar.rar");
using var stream = File.OpenRead(path);
using var reader = ReaderFactory.Open(stream);
while (reader.MoveToNextEntry())
{
if (!reader.Entry.IsDirectory)
{
reader.WriteEntryTo(Stream.Null);
}
}
}
}

View File

@@ -4,6 +4,7 @@
<TargetFramework>net10.0</TargetFramework>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="BenchmarkDotNet" />
<PackageReference Include="JetBrains.Profiler.SelfApi" />
<ProjectReference Include="..\..\src\SharpCompress\SharpCompress.csproj" />
</ItemGroup>

View File

@@ -0,0 +1,106 @@
using System.IO;
using System.Linq;
using BenchmarkDotNet.Attributes;
using SharpCompress.Common;
using SharpCompress.Writers;
namespace SharpCompress.Performance;
/// <summary>
/// Benchmarks for Writer operations.
/// Tests creating archives with different compression formats using forward-only Writer API.
/// </summary>
[MemoryDiagnoser]
public class WriteBenchmarks : BenchmarkBase
{
private string _tempOutputPath = null!;
private string[] _testFiles = null!;
[GlobalSetup]
public void Setup()
{
_tempOutputPath = Path.Combine(Path.GetTempPath(), Path.GetRandomFileName());
Directory.CreateDirectory(_tempOutputPath);
// Get some test files to compress
var originalPath = Path.Combine(Path.GetDirectoryName(TEST_ARCHIVES_PATH)!, "Original");
if (Directory.Exists(originalPath))
{
_testFiles = Directory.GetFiles(originalPath).Take(5).ToArray();
}
else
{
_testFiles = [];
}
}
[IterationCleanup]
public void IterationCleanup()
{
// Clean up created archives after each iteration to avoid file reuse affecting measurements
if (Directory.Exists(_tempOutputPath))
{
foreach (var file in Directory.GetFiles(_tempOutputPath))
{
File.Delete(file);
}
}
}
[GlobalCleanup]
public void Cleanup()
{
if (Directory.Exists(_tempOutputPath))
{
Directory.Delete(_tempOutputPath, true);
}
}
[Benchmark]
public void ZipWriterWrite()
{
var outputFile = Path.Combine(_tempOutputPath, "test.zip");
using var stream = File.Create(outputFile);
using var writer = WriterFactory.Open(
stream,
ArchiveType.Zip,
new WriterOptions(CompressionType.Deflate)
);
foreach (var file in _testFiles)
{
writer.Write(Path.GetFileName(file), file);
}
}
[Benchmark]
public void TarWriterWrite()
{
var outputFile = Path.Combine(_tempOutputPath, "test.tar");
using var stream = File.Create(outputFile);
using var writer = WriterFactory.Open(
stream,
ArchiveType.Tar,
new WriterOptions(CompressionType.None)
);
foreach (var file in _testFiles)
{
writer.Write(Path.GetFileName(file), file);
}
}
[Benchmark]
public void TarGzWriterWrite()
{
var outputFile = Path.Combine(_tempOutputPath, "test.tar.gz");
using var stream = File.Create(outputFile);
using var writer = WriterFactory.Open(
stream,
ArchiveType.Tar,
new WriterOptions(CompressionType.GZip)
);
foreach (var file in _testFiles)
{
writer.Write(Path.GetFileName(file), file);
}
}
}

View File

@@ -2,6 +2,24 @@
"version": 2,
"dependencies": {
"net10.0": {
"BenchmarkDotNet": {
"type": "Direct",
"requested": "[0.14.0, )",
"resolved": "0.14.0",
"contentHash": "eIPSDKi3oni734M1rt/XJAwGQQOIf9gLjRRKKJ0HuVy3vYd7gnmAIX1bTjzI9ZbAY/nPddgqqgM/TeBYitMCIg==",
"dependencies": {
"BenchmarkDotNet.Annotations": "0.14.0",
"CommandLineParser": "2.9.1",
"Gee.External.Capstone": "2.3.0",
"Iced": "1.17.0",
"Microsoft.CodeAnalysis.CSharp": "4.1.0",
"Microsoft.Diagnostics.Runtime": "2.2.332302",
"Microsoft.Diagnostics.Tracing.TraceEvent": "3.1.8",
"Microsoft.DotNet.PlatformAbstractions": "3.1.6",
"Perfolizer": "[0.3.17]",
"System.Management": "5.0.0"
}
},
"JetBrains.Profiler.SelfApi": {
"type": "Direct",
"requested": "[2.5.15, )",
@@ -37,6 +55,26 @@
"resolved": "17.14.15",
"contentHash": "mXQPJsbuUD2ydq4/ffd8h8tSOFCXec+2xJOVNCvXjuMOq/+5EKHq3D2m2MC2+nUaXeFMSt66VS/J4HdKBixgcw=="
},
"BenchmarkDotNet.Annotations": {
"type": "Transitive",
"resolved": "0.14.0",
"contentHash": "CUDCg6bgHrDzhjnA+IOBl5gAo8Y5hZ2YSs7MBXrYMlMKpBZqrD5ez0537uDveOkcf+YWAoK+S4sMcuWPbIz8bw=="
},
"CommandLineParser": {
"type": "Transitive",
"resolved": "2.9.1",
"contentHash": "OE0sl1/sQ37bjVsPKKtwQlWDgqaxWgtme3xZz7JssWUzg5JpMIyHgCTY9MVMxOg48fJ1AgGT3tgdH5m/kQ5xhA=="
},
"Gee.External.Capstone": {
"type": "Transitive",
"resolved": "2.3.0",
"contentHash": "2ap/rYmjtzCOT8hxrnEW/QeiOt+paD8iRrIcdKX0cxVwWLFa1e+JDBNeECakmccXrSFeBQuu5AV8SNkipFMMMw=="
},
"Iced": {
"type": "Transitive",
"resolved": "1.17.0",
"contentHash": "8x+HCVTl/HHTGpscH3vMBhV8sknN/muZFw9s3TsI8SA6+c43cOTCi2+jE4KsU8pNLbJ++iF2ZFcpcXHXtDglnw=="
},
"JetBrains.FormatRipper": {
"type": "Transitive",
"resolved": "2.4.0",
@@ -63,6 +101,118 @@
"resolved": "8.0.0",
"contentHash": "bZKfSIKJRXLTuSzLudMFte/8CempWjVamNUR5eHJizsy+iuOuO/k2gnh7W0dHJmYY0tBf+gUErfluCv5mySAOQ=="
},
"Microsoft.CodeAnalysis.Analyzers": {
"type": "Transitive",
"resolved": "3.3.3",
"contentHash": "j/rOZtLMVJjrfLRlAMckJLPW/1rze9MT1yfWqSIbUPGRu1m1P0fuo9PmqapwsmePfGB5PJrudQLvmUOAMF0DqQ=="
},
"Microsoft.CodeAnalysis.Common": {
"type": "Transitive",
"resolved": "4.1.0",
"contentHash": "bNzTyxP3iD5FPFHfVDl15Y6/wSoI7e3MeV0lOaj9igbIKTjgrmuw6LoVJ06jUNFA7+KaDC/OIsStWl/FQJz6sQ==",
"dependencies": {
"Microsoft.CodeAnalysis.Analyzers": "3.3.3"
}
},
"Microsoft.CodeAnalysis.CSharp": {
"type": "Transitive",
"resolved": "4.1.0",
"contentHash": "sbu6kDGzo9bfQxuqWpeEE7I9P30bSuZEnpDz9/qz20OU6pm79Z63+/BsAzO2e/R/Q97kBrpj647wokZnEVr97w==",
"dependencies": {
"Microsoft.CodeAnalysis.Common": "[4.1.0]"
}
},
"Microsoft.Diagnostics.NETCore.Client": {
"type": "Transitive",
"resolved": "0.2.251802",
"contentHash": "bqnYl6AdSeboeN4v25hSukK6Odm6/54E3Y2B8rBvgqvAW0mF8fo7XNRVE2DMOG7Rk0fiuA079QIH28+V+W1Zdg==",
"dependencies": {
"Microsoft.Bcl.AsyncInterfaces": "1.1.0",
"Microsoft.Extensions.Logging": "2.1.1"
}
},
"Microsoft.Diagnostics.Runtime": {
"type": "Transitive",
"resolved": "2.2.332302",
"contentHash": "Hp84ivxSKIMTBzYSATxmUsm3YSXHWivcwiRRbsydGmqujMUK8BAueLN0ssAVEOkOBmh0vjUBhrq7YcroT7VCug==",
"dependencies": {
"Microsoft.Diagnostics.NETCore.Client": "0.2.251802"
}
},
"Microsoft.Diagnostics.Tracing.TraceEvent": {
"type": "Transitive",
"resolved": "3.1.8",
"contentHash": "kl3UMrZKSeSEYZ8rt/GjLUQToREjgQABqfg6PzQBmSlYHTZOKE9ePEOS2xptROQ9SVvngg3QGX51TIT11iZ0wA=="
},
"Microsoft.DotNet.PlatformAbstractions": {
"type": "Transitive",
"resolved": "3.1.6",
"contentHash": "jek4XYaQ/PGUwDKKhwR8K47Uh1189PFzMeLqO83mXrXQVIpARZCcfuDedH50YDTepBkfijCZN5U/vZi++erxtg=="
},
"Microsoft.Extensions.Configuration": {
"type": "Transitive",
"resolved": "2.1.1",
"contentHash": "LjVKO6P2y52c5ZhTLX/w8zc5H4Y3J/LJsgqTBj49TtFq/hAtVNue/WA0F6/7GMY90xhD7K0MDZ4qpOeWXbLvzg==",
"dependencies": {
"Microsoft.Extensions.Configuration.Abstractions": "2.1.1"
}
},
"Microsoft.Extensions.Configuration.Abstractions": {
"type": "Transitive",
"resolved": "2.1.1",
"contentHash": "VfuZJNa0WUshZ/+8BFZAhwFKiKuu/qOUCFntfdLpHj7vcRnsGHqd3G2Hse78DM+pgozczGM63lGPRLmy+uhUOA==",
"dependencies": {
"Microsoft.Extensions.Primitives": "2.1.1"
}
},
"Microsoft.Extensions.Configuration.Binder": {
"type": "Transitive",
"resolved": "2.1.1",
"contentHash": "fcLCTS03poWE4v9tSNBr3pWn0QwGgAn1vzqHXlXgvqZeOc7LvQNzaWcKRQZTdEc3+YhQKwMsOtm3VKSA2aWQ8w==",
"dependencies": {
"Microsoft.Extensions.Configuration": "2.1.1"
}
},
"Microsoft.Extensions.DependencyInjection.Abstractions": {
"type": "Transitive",
"resolved": "2.1.1",
"contentHash": "MgYpU5cwZohUMKKg3sbPhvGG+eAZ/59E9UwPwlrUkyXU+PGzqwZg9yyQNjhxuAWmoNoFReoemeCku50prYSGzA=="
},
"Microsoft.Extensions.Logging": {
"type": "Transitive",
"resolved": "2.1.1",
"contentHash": "hh+mkOAQDTp6XH80xJt3+wwYVzkbwYQl9XZRCz4Um0JjP/o7N9vHM3rZ6wwwtr+BBe/L6iBO2sz0px6OWBzqZQ==",
"dependencies": {
"Microsoft.Extensions.Configuration.Binder": "2.1.1",
"Microsoft.Extensions.DependencyInjection.Abstractions": "2.1.1",
"Microsoft.Extensions.Logging.Abstractions": "2.1.1",
"Microsoft.Extensions.Options": "2.1.1"
}
},
"Microsoft.Extensions.Logging.Abstractions": {
"type": "Transitive",
"resolved": "2.1.1",
"contentHash": "XRzK7ZF+O6FzdfWrlFTi1Rgj2080ZDsd46vzOjadHUB0Cz5kOvDG8vI7caa5YFrsHQpcfn0DxtjS4E46N4FZsA=="
},
"Microsoft.Extensions.Options": {
"type": "Transitive",
"resolved": "2.1.1",
"contentHash": "V7lXCU78lAbzaulCGFKojcCyG8RTJicEbiBkPJjFqiqXwndEBBIehdXRMWEVU3UtzQ1yDvphiWUL9th6/4gJ7w==",
"dependencies": {
"Microsoft.Extensions.DependencyInjection.Abstractions": "2.1.1",
"Microsoft.Extensions.Primitives": "2.1.1"
}
},
"Microsoft.Extensions.Primitives": {
"type": "Transitive",
"resolved": "2.1.1",
"contentHash": "scJ1GZNIxMmjpENh0UZ8XCQ6vzr/LzeF9WvEA51Ix2OQGAs9WPgPu8ABVUdvpKPLuor/t05gm6menJK3PwqOXg=="
},
"Microsoft.NETCore.Platforms": {
"type": "Transitive",
"resolved": "5.0.0",
"contentHash": "VyPlqzH2wavqquTcYpkIIAQ6WdenuKoFN0BdYBbCWsclXacSOHNQn66Gt4z5NBqEYW0FAPm5rlvki9ZiCij5xQ=="
},
"Microsoft.NETFramework.ReferenceAssemblies.net461": {
"type": "Transitive",
"resolved": "1.0.3",
@@ -73,8 +223,33 @@
"resolved": "8.0.0",
"contentHash": "dk9JPxTCIevS75HyEQ0E4OVAFhB2N+V9ShCXf8Q6FkUQZDkgLI12y679Nym1YqsiSysuQskT7Z+6nUf3yab6Vw=="
},
"Perfolizer": {
"type": "Transitive",
"resolved": "0.3.17",
"contentHash": "FQgtCoF2HFwvzKWulAwBS5BGLlh8pgbrJtOp47jyBwh2CW16juVtacN1azOA2BqdrJXkXTNLNRMo7ZlHHiuAnA=="
},
"System.CodeDom": {
"type": "Transitive",
"resolved": "5.0.0",
"contentHash": "JPJArwA1kdj8qDAkY2XGjSWoYnqiM7q/3yRNkt6n28Mnn95MuEGkZXUbPBf7qc3IjwrGY5ttQon7yqHZyQJmOQ=="
},
"System.Management": {
"type": "Transitive",
"resolved": "5.0.0",
"contentHash": "MF1CHaRcC+MLFdnDthv4/bKWBZnlnSpkGqa87pKukQefgEdwtb9zFW6zs0GjPp73qtpYYg4q6PEKbzJbxCpKfw==",
"dependencies": {
"Microsoft.NETCore.Platforms": "5.0.0",
"System.CodeDom": "5.0.0"
}
},
"sharpcompress": {
"type": "Project"
},
"Microsoft.Bcl.AsyncInterfaces": {
"type": "CentralTransitive",
"requested": "[10.0.0, )",
"resolved": "1.1.0",
"contentHash": "1Am6l4Vpn3/K32daEqZI+FFr96OlZkgwK2LcT3pZ2zWubR5zTPW3/FkO1Rat9kb7oQOa4rxgl9LJHc5tspCWfg=="
}
}
}

View File

@@ -92,7 +92,7 @@ public class TarReaderAsyncTests : ReaderTests
}
var destinationFileName = Path.Combine(destdir, file.NotNull());
using var fs = File.Create(destinationFileName);
using var fs = File.OpenWrite(destinationFileName);
await entryStream.CopyToAsync(fs);
}
}

View File

@@ -55,7 +55,7 @@ public class TestBase : IAsyncDisposable
Directory.CreateDirectory(SCRATCH2_FILES_PATH);
}
//akways use async dispose since we have I/O and sync Dispose doesn't wait when using xunit
//always use async dispose since we have I/O and sync Dispose doesn't wait when using xunit
public async ValueTask DisposeAsync()
{
await Task.CompletedTask;