Compare commits

...

3 Commits

Author SHA1 Message Date
copilot-swe-agent[bot]
4a6e5232ae Add opt-in multi-threading support with SupportsMultiThreadedExtraction flag
- Added IArchive.SupportsMultiThreadedExtraction property to indicate if multi-threading is supported
- Added ReaderOptions.EnableMultiThreadedExtraction option to opt-in to multi-threading
- Updated SeekableZipFilePart, TarFilePart, and SeekableFilePart to check the flag
- Added test to verify multi-threading flag behavior
- Multi-threading is now disabled by default for backward compatibility

Co-authored-by: adamhathcock <527620+adamhathcock@users.noreply.github.com>
2026-01-18 16:27:59 +00:00
copilot-swe-agent[bot]
3e23a6e5a6 Add multi-threading support for file-based archives - sync test passing
Co-authored-by: adamhathcock <527620+adamhathcock@users.noreply.github.com>
2026-01-18 16:09:54 +00:00
copilot-swe-agent[bot]
e0a43e9727 Initial plan 2026-01-18 15:56:08 +00:00
9 changed files with 640 additions and 11 deletions

View File

@@ -145,6 +145,19 @@ public abstract class AbstractArchive<TEntry, TVolume> : IArchive, IAsyncArchive
/// </summary>
public virtual bool IsEncrypted => false;
/// <summary>
/// Returns whether multi-threaded extraction is supported for this archive.
/// Multi-threading is supported when:
/// 1. The archive is opened from a FileInfo or file path (not a stream)
/// 2. Multi-threading is explicitly enabled in ReaderOptions
/// 3. The archive is not SOLID (SOLID archives should use sequential extraction)
/// </summary>
public virtual bool SupportsMultiThreadedExtraction =>
_sourceStream is not null
&& _sourceStream.IsFileMode
&& ReaderOptions.EnableMultiThreadedExtraction
&& !IsSolid;
/// <summary>
/// The archive can find all the parts of the archive needed to fully extract the archive. This forces the parsing of the entire archive.
/// </summary>

View File

@@ -44,4 +44,12 @@ public interface IArchive : IDisposable
/// Returns whether the archive is encrypted.
/// </summary>
bool IsEncrypted { get; }
/// <summary>
/// Returns whether multi-threaded extraction is supported for this archive.
/// Multi-threading is supported when the archive is opened from a FileInfo or file path
/// (not a stream) and the format supports random access (e.g., Zip, Tar, Rar).
/// SOLID archives (some Rar, all 7Zip) should use sequential extraction for best performance.
/// </summary>
bool SupportsMultiThreadedExtraction { get; }
}

View File

@@ -1,6 +1,7 @@
using System.IO;
using SharpCompress.Common.Rar;
using SharpCompress.Common.Rar.Headers;
using SharpCompress.IO;
namespace SharpCompress.Archives.Rar;
@@ -24,6 +25,76 @@ internal class SeekableFilePart : RarFilePart
internal override Stream GetCompressedStream()
{
Stream streamToUse;
// If the stream is a SourceStream in file mode with multi-threading enabled,
// create an independent stream to support concurrent extraction
if (
_stream is SourceStream sourceStream
&& sourceStream.IsFileMode
&& sourceStream.ReaderOptions.EnableMultiThreadedExtraction
)
{
var independentStream = sourceStream.CreateIndependentStream(0);
if (independentStream is not null)
{
streamToUse = independentStream;
streamToUse.Position = FileHeader.DataStartPosition;
if (FileHeader.R4Salt != null)
{
var cryptKey = new CryptKey3(_password!);
return new RarCryptoWrapper(streamToUse, FileHeader.R4Salt, cryptKey);
}
if (FileHeader.Rar5CryptoInfo != null)
{
var cryptKey = new CryptKey5(_password!, FileHeader.Rar5CryptoInfo);
return new RarCryptoWrapper(
streamToUse,
FileHeader.Rar5CryptoInfo.Salt,
cryptKey
);
}
return streamToUse;
}
}
// Check if the stream wraps a FileStream
Stream? underlyingStream = _stream;
if (_stream is IStreamStack streamStack)
{
underlyingStream = streamStack.BaseStream();
}
if (underlyingStream is FileStream fileStream)
{
// Create a new independent stream from the file
streamToUse = new FileStream(
fileStream.Name,
FileMode.Open,
FileAccess.Read,
FileShare.Read
);
streamToUse.Position = FileHeader.DataStartPosition;
if (FileHeader.R4Salt != null)
{
var cryptKey = new CryptKey3(_password!);
return new RarCryptoWrapper(streamToUse, FileHeader.R4Salt, cryptKey);
}
if (FileHeader.Rar5CryptoInfo != null)
{
var cryptKey = new CryptKey5(_password!, FileHeader.Rar5CryptoInfo);
return new RarCryptoWrapper(streamToUse, FileHeader.Rar5CryptoInfo.Salt, cryptKey);
}
return streamToUse;
}
// Fall back to existing behavior for stream-based sources
_stream.Position = FileHeader.DataStartPosition;
if (FileHeader.R4Salt != null)

View File

@@ -1,5 +1,6 @@
using System.IO;
using SharpCompress.Common.Tar.Headers;
using SharpCompress.IO;
namespace SharpCompress.Common.Tar;
@@ -20,8 +21,45 @@ internal sealed class TarFilePart : FilePart
internal override Stream GetCompressedStream()
{
if (_seekableStream != null)
if (_seekableStream is not null)
{
// If the seekable stream is a SourceStream in file mode with multi-threading enabled,
// create an independent stream to support concurrent extraction
if (
_seekableStream is SourceStream sourceStream
&& sourceStream.IsFileMode
&& sourceStream.ReaderOptions.EnableMultiThreadedExtraction
)
{
var independentStream = sourceStream.CreateIndependentStream(0);
if (independentStream is not null)
{
independentStream.Position = Header.DataStartPosition ?? 0;
return new TarReadOnlySubStream(independentStream, Header.Size);
}
}
// Check if the seekable stream wraps a FileStream
Stream? underlyingStream = _seekableStream;
if (_seekableStream is IStreamStack streamStack)
{
underlyingStream = streamStack.BaseStream();
}
if (underlyingStream is FileStream fileStream)
{
// Create a new independent stream from the file
var independentStream = new FileStream(
fileStream.Name,
FileMode.Open,
FileAccess.Read,
FileShare.Read
);
independentStream.Position = Header.DataStartPosition ?? 0;
return new TarReadOnlySubStream(independentStream, Header.Size);
}
// Fall back to existing behavior for stream-based sources
_seekableStream.Position = Header.DataStartPosition ?? 0;
return new TarReadOnlySubStream(_seekableStream, Header.Size);
}

View File

@@ -2,13 +2,16 @@ using System.IO;
using System.Threading;
using System.Threading.Tasks;
using SharpCompress.Common.Zip.Headers;
using SharpCompress.IO;
namespace SharpCompress.Common.Zip;
internal class SeekableZipFilePart : ZipFilePart
{
private bool _isLocalHeaderLoaded;
private volatile bool _isLocalHeaderLoaded;
private readonly SeekableZipHeaderFactory _headerFactory;
private readonly object _headerLock = new();
private readonly SemaphoreSlim _asyncHeaderSemaphore = new(1, 1);
internal SeekableZipFilePart(
SeekableZipHeaderFactory headerFactory,
@@ -21,8 +24,14 @@ internal class SeekableZipFilePart : ZipFilePart
{
if (!_isLocalHeaderLoaded)
{
LoadLocalHeader();
_isLocalHeaderLoaded = true;
lock (_headerLock)
{
if (!_isLocalHeaderLoaded)
{
LoadLocalHeader();
_isLocalHeaderLoaded = true;
}
}
}
return base.GetCompressedStream();
}
@@ -33,22 +42,173 @@ internal class SeekableZipFilePart : ZipFilePart
{
if (!_isLocalHeaderLoaded)
{
await LoadLocalHeaderAsync(cancellationToken);
_isLocalHeaderLoaded = true;
await _asyncHeaderSemaphore.WaitAsync(cancellationToken);
try
{
if (!_isLocalHeaderLoaded)
{
await LoadLocalHeaderAsync(cancellationToken);
_isLocalHeaderLoaded = true;
}
}
finally
{
_asyncHeaderSemaphore.Release();
}
}
return await base.GetCompressedStreamAsync(cancellationToken);
}
private void LoadLocalHeader() =>
Header = _headerFactory.GetLocalHeader(BaseStream, (DirectoryEntryHeader)Header);
private void LoadLocalHeader()
{
// Use an independent stream for loading the header if multi-threading is enabled
Stream streamToUse = BaseStream;
bool disposeStream = false;
private async ValueTask LoadLocalHeaderAsync(CancellationToken cancellationToken = default) =>
Header = await _headerFactory.GetLocalHeaderAsync(BaseStream, (DirectoryEntryHeader)Header);
if (
BaseStream is SourceStream sourceStream
&& sourceStream.IsFileMode
&& sourceStream.ReaderOptions.EnableMultiThreadedExtraction
)
{
var independentStream = sourceStream.CreateIndependentStream(0);
if (independentStream is not null)
{
streamToUse = independentStream;
disposeStream = true;
}
}
else
{
// Check if BaseStream wraps a FileStream
Stream? underlyingStream = BaseStream;
if (BaseStream is IStreamStack streamStack)
{
underlyingStream = streamStack.BaseStream();
}
if (underlyingStream is FileStream fileStream)
{
streamToUse = new FileStream(
fileStream.Name,
FileMode.Open,
FileAccess.Read,
FileShare.Read
);
disposeStream = true;
}
}
try
{
Header = _headerFactory.GetLocalHeader(streamToUse, (DirectoryEntryHeader)Header);
}
finally
{
if (disposeStream)
{
streamToUse.Dispose();
}
}
}
private async ValueTask LoadLocalHeaderAsync(CancellationToken cancellationToken = default)
{
// Use an independent stream for loading the header if multi-threading is enabled
Stream streamToUse = BaseStream;
bool disposeStream = false;
if (
BaseStream is SourceStream sourceStream
&& sourceStream.IsFileMode
&& sourceStream.ReaderOptions.EnableMultiThreadedExtraction
)
{
var independentStream = sourceStream.CreateIndependentStream(0);
if (independentStream is not null)
{
streamToUse = independentStream;
disposeStream = true;
}
}
else
{
// Check if BaseStream wraps a FileStream
Stream? underlyingStream = BaseStream;
if (BaseStream is IStreamStack streamStack)
{
underlyingStream = streamStack.BaseStream();
}
if (underlyingStream is FileStream fileStream)
{
streamToUse = new FileStream(
fileStream.Name,
FileMode.Open,
FileAccess.Read,
FileShare.Read
);
disposeStream = true;
}
}
try
{
Header = await _headerFactory.GetLocalHeaderAsync(
streamToUse,
(DirectoryEntryHeader)Header
);
}
finally
{
if (disposeStream)
{
streamToUse.Dispose();
}
}
}
protected override Stream CreateBaseStream()
{
BaseStream.Position = Header.DataStartPosition.NotNull();
// If BaseStream is a SourceStream in file mode with multi-threading enabled,
// create an independent stream to support concurrent extraction
if (
BaseStream is SourceStream sourceStream
&& sourceStream.IsFileMode
&& sourceStream.ReaderOptions.EnableMultiThreadedExtraction
)
{
// Create a new independent stream for this entry
var independentStream = sourceStream.CreateIndependentStream(0);
if (independentStream is not null)
{
independentStream.Position = Header.DataStartPosition.NotNull();
return independentStream;
}
}
// Check if BaseStream wraps a FileStream (for multi-volume archives)
Stream? underlyingStream = BaseStream;
if (BaseStream is IStreamStack streamStack)
{
underlyingStream = streamStack.BaseStream();
}
if (underlyingStream is FileStream fileStream)
{
// Create a new independent stream from the file
var independentStream = new FileStream(
fileStream.Name,
FileMode.Open,
FileAccess.Read,
FileShare.Read
);
independentStream.Position = Header.DataStartPosition.NotNull();
return independentStream;
}
// Fall back to existing behavior for stream-based sources
BaseStream.Position = Header.DataStartPosition.NotNull();
return BaseStream;
}
}

View File

@@ -98,6 +98,30 @@ public class SourceStream : Stream, IStreamStack
private Stream Current => _streams[_stream];
/// <summary>
/// Creates an independent stream for the specified volume index.
/// This allows multiple threads to read from different positions concurrently.
/// Only works when IsFileMode is true.
/// </summary>
/// <param name="volumeIndex">The volume index to create a stream for</param>
/// <returns>A new independent FileStream, or null if not in file mode or volume doesn't exist</returns>
public Stream? CreateIndependentStream(int volumeIndex)
{
if (!IsFileMode)
{
return null;
}
// Ensure the volume is loaded
if (!LoadStream(volumeIndex))
{
return null;
}
// Create a new independent stream from the FileInfo
return _files[volumeIndex].OpenRead();
}
public bool LoadStream(int index) //ensure all parts to id are loaded
{
while (_streams.Count <= index)

View File

@@ -28,4 +28,12 @@ public class ReaderOptions : OptionsBase
/// When set, progress updates will be reported as entries are extracted.
/// </summary>
public IProgress<ProgressReport>? Progress { get; set; }
/// <summary>
/// Enable multi-threaded extraction support when the archive is opened from a FileInfo or file path.
/// When enabled, multiple threads can extract different entries concurrently by creating
/// independent file streams. This is only effective for archives opened from files, not streams.
/// Default is false for backward compatibility.
/// </summary>
public bool EnableMultiThreadedExtraction { get; set; }
}

View File

@@ -0,0 +1,115 @@
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Threading.Tasks;
using SharpCompress.Archives.Tar;
using SharpCompress.Common;
using Xunit;
namespace SharpCompress.Test.Tar;
public class TarMultiThreadTests : TestBase
{
[Fact]
public void Tar_Archive_Concurrent_Extraction_From_FileInfo()
{
// Test concurrent extraction of multiple entries from a Tar archive opened from FileInfo
var testArchive = Path.Combine(TEST_ARCHIVES_PATH, "Tar.tar");
var fileInfo = new FileInfo(testArchive);
var options = new SharpCompress.Readers.ReaderOptions
{
EnableMultiThreadedExtraction = true,
};
using var archive = TarArchive.OpenArchive(fileInfo, options);
// Verify multi-threading is supported
Assert.True(archive.SupportsMultiThreadedExtraction);
var entries = archive.Entries.Where(e => !e.IsDirectory).Take(5).ToList();
// Extract multiple entries concurrently
var tasks = new List<Task>();
var outputFiles = new List<string>();
foreach (var entry in entries)
{
var outputFile = Path.Combine(SCRATCH_FILES_PATH, entry.Key!);
outputFiles.Add(outputFile);
tasks.Add(
Task.Run(() =>
{
var dir = Path.GetDirectoryName(outputFile);
if (dir != null)
{
Directory.CreateDirectory(dir);
}
using var entryStream = entry.OpenEntryStream();
using var fileStream = File.Create(outputFile);
entryStream.CopyTo(fileStream);
})
);
}
Task.WaitAll(tasks.ToArray());
// Verify all files were extracted
Assert.Equal(entries.Count, outputFiles.Count);
foreach (var outputFile in outputFiles)
{
Assert.True(File.Exists(outputFile), $"File {outputFile} should exist");
}
}
[Fact]
public async Task Tar_Archive_Concurrent_Extraction_From_FileInfo_Async()
{
// Test concurrent async extraction of multiple entries from a Tar archive opened from FileInfo
var testArchive = Path.Combine(TEST_ARCHIVES_PATH, "Tar.tar");
var fileInfo = new FileInfo(testArchive);
var options = new SharpCompress.Readers.ReaderOptions
{
EnableMultiThreadedExtraction = true,
};
using var archive = TarArchive.OpenArchive(fileInfo, options);
var entries = archive.Entries.Where(e => !e.IsDirectory).Take(5).ToList();
// Extract multiple entries concurrently
var tasks = new List<Task>();
var outputFiles = new List<string>();
foreach (var entry in entries)
{
var outputFile = Path.Combine(SCRATCH_FILES_PATH, entry.Key!);
outputFiles.Add(outputFile);
tasks.Add(
Task.Run(async () =>
{
var dir = Path.GetDirectoryName(outputFile);
if (dir != null)
{
Directory.CreateDirectory(dir);
}
using var entryStream = await entry.OpenEntryStreamAsync();
using var fileStream = File.Create(outputFile);
await entryStream.CopyToAsync(fileStream);
})
);
}
await Task.WhenAll(tasks);
// Verify all files were extracted
Assert.Equal(entries.Count, outputFiles.Count);
foreach (var outputFile in outputFiles)
{
Assert.True(File.Exists(outputFile), $"File {outputFile} should exist");
}
}
}

View File

@@ -0,0 +1,192 @@
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Threading.Tasks;
using SharpCompress.Archives.Zip;
using SharpCompress.Common;
using Xunit;
namespace SharpCompress.Test.Zip;
public class ZipMultiThreadTests : TestBase
{
[Fact]
public void Zip_Archive_Without_MultiThreading_Enabled()
{
// Test that extraction still works when multi-threading is NOT enabled
var testArchive = Path.Combine(TEST_ARCHIVES_PATH, "Zip.none.zip");
var fileInfo = new FileInfo(testArchive);
// Default options - multi-threading disabled
using var archive = ZipArchive.OpenArchive(fileInfo);
// Verify multi-threading is NOT supported
Assert.False(archive.SupportsMultiThreadedExtraction);
var entry = archive.Entries.First(e => !e.IsDirectory);
var outputFile = Path.Combine(SCRATCH_FILES_PATH, entry.Key!);
var dir = Path.GetDirectoryName(outputFile);
if (dir != null)
{
Directory.CreateDirectory(dir);
}
using var entryStream = entry.OpenEntryStream();
using var fileStream = File.Create(outputFile);
entryStream.CopyTo(fileStream);
Assert.True(File.Exists(outputFile));
}
[Fact]
public void Zip_Archive_Concurrent_Extraction_From_FileInfo()
{
// Test concurrent extraction of multiple entries from a Zip archive opened from FileInfo
var testArchive = Path.Combine(TEST_ARCHIVES_PATH, "Zip.none.zip");
var fileInfo = new FileInfo(testArchive);
var options = new SharpCompress.Readers.ReaderOptions
{
EnableMultiThreadedExtraction = true,
};
using var archive = ZipArchive.OpenArchive(fileInfo, options);
// Verify multi-threading is supported
Assert.True(archive.SupportsMultiThreadedExtraction);
var entries = archive.Entries.Where(e => !e.IsDirectory).Take(5).ToList();
// Extract multiple entries concurrently
var tasks = new List<Task>();
var outputFiles = new List<string>();
foreach (var entry in entries)
{
var outputFile = Path.Combine(SCRATCH_FILES_PATH, entry.Key!);
outputFiles.Add(outputFile);
tasks.Add(
Task.Run(() =>
{
var dir = Path.GetDirectoryName(outputFile);
if (dir != null)
{
Directory.CreateDirectory(dir);
}
using var entryStream = entry.OpenEntryStream();
using var fileStream = File.Create(outputFile);
entryStream.CopyTo(fileStream);
})
);
}
Task.WaitAll(tasks.ToArray());
// Verify all files were extracted
Assert.Equal(entries.Count, outputFiles.Count);
foreach (var outputFile in outputFiles)
{
Assert.True(File.Exists(outputFile), $"File {outputFile} should exist");
}
}
[Fact]
public async Task Zip_Archive_Concurrent_Extraction_From_FileInfo_Async()
{
// Test concurrent async extraction of multiple entries from a Zip archive opened from FileInfo
var testArchive = Path.Combine(TEST_ARCHIVES_PATH, "Zip.none.zip");
var fileInfo = new FileInfo(testArchive);
var options = new SharpCompress.Readers.ReaderOptions
{
EnableMultiThreadedExtraction = true,
};
using var archive = ZipArchive.OpenArchive(fileInfo, options);
var entries = archive.Entries.Where(e => !e.IsDirectory).Take(5).ToList();
// Extract multiple entries concurrently
var tasks = new List<Task>();
var outputFiles = new List<string>();
foreach (var entry in entries)
{
var outputFile = Path.Combine(SCRATCH_FILES_PATH, entry.Key!);
outputFiles.Add(outputFile);
tasks.Add(
Task.Run(async () =>
{
var dir = Path.GetDirectoryName(outputFile);
if (dir != null)
{
Directory.CreateDirectory(dir);
}
using var entryStream = await entry.OpenEntryStreamAsync();
using var fileStream = File.Create(outputFile);
await entryStream.CopyToAsync(fileStream);
})
);
}
await Task.WhenAll(tasks);
// Verify all files were extracted
Assert.Equal(entries.Count, outputFiles.Count);
foreach (var outputFile in outputFiles)
{
Assert.True(File.Exists(outputFile), $"File {outputFile} should exist");
}
}
[Fact]
public void Zip_Archive_Concurrent_Extraction_From_Path()
{
// Test concurrent extraction when opening from path (should use FileInfo internally)
var testArchive = Path.Combine(TEST_ARCHIVES_PATH, "Zip.none.zip");
var options = new SharpCompress.Readers.ReaderOptions
{
EnableMultiThreadedExtraction = true,
};
using var archive = ZipArchive.OpenArchive(testArchive, options);
var entries = archive.Entries.Where(e => !e.IsDirectory).Take(5).ToList();
// Extract multiple entries concurrently
var tasks = new List<Task>();
var outputFiles = new List<string>();
foreach (var entry in entries)
{
var outputFile = Path.Combine(SCRATCH_FILES_PATH, entry.Key!);
outputFiles.Add(outputFile);
tasks.Add(
Task.Run(() =>
{
var dir = Path.GetDirectoryName(outputFile);
if (dir != null)
{
Directory.CreateDirectory(dir);
}
using var entryStream = entry.OpenEntryStream();
using var fileStream = File.Create(outputFile);
entryStream.CopyTo(fileStream);
})
);
}
Task.WaitAll(tasks.ToArray());
// Verify all files were extracted
Assert.Equal(entries.Count, outputFiles.Count);
foreach (var outputFile in outputFiles)
{
Assert.True(File.Exists(outputFile), $"File {outputFile} should exist");
}
}
}