mirror of
https://github.com/adamhathcock/sharpcompress.git
synced 2026-02-04 05:25:00 +00:00
Compare commits
10 Commits
0.44.2
...
copilot/ad
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
b3ce90ae94 | ||
|
|
130e169862 | ||
|
|
0dc63223ab | ||
|
|
9058645fea | ||
|
|
7339567880 | ||
|
|
8c6d914004 | ||
|
|
d9c9612b8f | ||
|
|
a35089900f | ||
|
|
ac4bcd0fe3 | ||
|
|
0ac6b46379 |
@@ -25,7 +25,7 @@
|
||||
| 7Zip (4) | LZMA, LZMA2, BZip2, PPMd, BCJ, BCJ2, Deflate | Decompress | SevenZipArchive | N/A | N/A |
|
||||
|
||||
1. SOLID Rars are only supported in the RarReader API.
|
||||
2. Zip format supports pkware and WinzipAES encryption. However, encrypted LZMA is not supported. Zip64 reading/writing is supported but only with seekable streams as the Zip spec doesn't support Zip64 data in post data descriptors. Deflate64 is only supported for reading. See [Zip Format Notes](#zip-format-notes) for details on multi-volume archives and streaming behavior.
|
||||
2. Zip format supports pkware and WinzipAES encryption. However, encrypted LZMA is not supported. Zip64 reading/writing is supported but only with seekable streams as the Zip spec doesn't support Zip64 data in post data descriptors. Deflate64 is only supported for reading. SOZip (Seek-Optimized ZIP) detection is supported for reading. See [Zip Format Notes](#zip-format-notes) for details on multi-volume archives and streaming behavior.
|
||||
3. The Tar format requires a file size in the header. If no size is specified to the TarWriter and the stream is not seekable, then an exception will be thrown.
|
||||
4. The 7Zip format doesn't allow for reading as a forward-only stream so 7Zip is only supported through the Archive API. See [7Zip Format Notes](#7zip-format-notes) for details on async extraction behavior.
|
||||
5. LZip has no support for extra data like the file name or timestamp. There is a default filename used when looking at the entry Key on the archive.
|
||||
|
||||
@@ -15,6 +15,10 @@ internal enum ExtraDataType : ushort
|
||||
UnicodePathExtraField = 0x7075,
|
||||
Zip64ExtendedInformationExtraField = 0x0001,
|
||||
UnixTimeExtraField = 0x5455,
|
||||
|
||||
// SOZip (Seek-Optimized ZIP) extra field
|
||||
// Used to link a main file to its SOZip index file
|
||||
SOZip = 0x564B,
|
||||
}
|
||||
|
||||
internal class ExtraData
|
||||
@@ -233,6 +237,44 @@ internal sealed class UnixTimeExtraField : ExtraData
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// SOZip (Seek-Optimized ZIP) extra field that links a main file to its index file.
|
||||
/// The extra field contains the offset within the ZIP file where the index entry's
|
||||
/// local header is located.
|
||||
/// </summary>
|
||||
internal sealed class SOZipExtraField : ExtraData
|
||||
{
|
||||
public SOZipExtraField(ExtraDataType type, ushort length, byte[] dataBytes)
|
||||
: base(type, length, dataBytes) { }
|
||||
|
||||
/// <summary>
|
||||
/// Gets the offset to the SOZip index file's local entry header within the ZIP archive.
|
||||
/// </summary>
|
||||
internal ulong IndexOffset
|
||||
{
|
||||
get
|
||||
{
|
||||
if (DataBytes is null || DataBytes.Length < 8)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
return BinaryPrimitives.ReadUInt64LittleEndian(DataBytes);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Creates a SOZip extra field with the specified index offset
|
||||
/// </summary>
|
||||
/// <param name="indexOffset">The offset to the index file's local entry header</param>
|
||||
/// <returns>A new SOZipExtraField instance</returns>
|
||||
public static SOZipExtraField Create(ulong indexOffset)
|
||||
{
|
||||
var data = new byte[8];
|
||||
BinaryPrimitives.WriteUInt64LittleEndian(data, indexOffset);
|
||||
return new SOZipExtraField(ExtraDataType.SOZip, 8, data);
|
||||
}
|
||||
}
|
||||
|
||||
internal static class LocalEntryHeaderExtraFactory
|
||||
{
|
||||
internal static ExtraData Create(ExtraDataType type, ushort length, byte[] extraData) =>
|
||||
@@ -246,6 +288,7 @@ internal static class LocalEntryHeaderExtraFactory
|
||||
ExtraDataType.Zip64ExtendedInformationExtraField =>
|
||||
new Zip64ExtendedInformationExtraField(type, length, extraData),
|
||||
ExtraDataType.UnixTimeExtraField => new UnixTimeExtraField(type, length, extraData),
|
||||
ExtraDataType.SOZip => new SOZipExtraField(type, length, extraData),
|
||||
_ => new ExtraData(type, length, extraData),
|
||||
};
|
||||
}
|
||||
|
||||
150
src/SharpCompress/Common/Zip/SOZip/SOZipDeflateStream.cs
Normal file
150
src/SharpCompress/Common/Zip/SOZip/SOZipDeflateStream.cs
Normal file
@@ -0,0 +1,150 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.IO;
|
||||
using SharpCompress.Compressors;
|
||||
using SharpCompress.Compressors.Deflate;
|
||||
|
||||
namespace SharpCompress.Common.Zip.SOZip;
|
||||
|
||||
/// <summary>
|
||||
/// A Deflate stream that inserts sync flush points at regular intervals
|
||||
/// to enable random access (SOZip optimization).
|
||||
/// </summary>
|
||||
internal sealed class SOZipDeflateStream : Stream
|
||||
{
|
||||
private readonly DeflateStream _deflateStream;
|
||||
private readonly Stream _baseStream;
|
||||
private readonly uint _chunkSize;
|
||||
private readonly List<ulong> _compressedOffsets = new();
|
||||
private readonly long _baseOffset;
|
||||
private long _uncompressedBytesWritten;
|
||||
private long _nextSyncPoint;
|
||||
private bool _disposed;
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new SOZip Deflate stream
|
||||
/// </summary>
|
||||
/// <param name="baseStream">The underlying stream to write to</param>
|
||||
/// <param name="compressionLevel">The compression level</param>
|
||||
/// <param name="chunkSize">The chunk size for sync flush points</param>
|
||||
public SOZipDeflateStream(Stream baseStream, CompressionLevel compressionLevel, int chunkSize)
|
||||
{
|
||||
_baseStream = baseStream;
|
||||
_chunkSize = (uint)chunkSize;
|
||||
_baseOffset = baseStream.Position;
|
||||
_nextSyncPoint = chunkSize;
|
||||
|
||||
// Record the first offset (start of compressed data)
|
||||
_compressedOffsets.Add(0);
|
||||
|
||||
_deflateStream = new DeflateStream(baseStream, CompressionMode.Compress, compressionLevel);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets the array of compressed offsets recorded during writing
|
||||
/// </summary>
|
||||
public ulong[] CompressedOffsets => _compressedOffsets.ToArray();
|
||||
|
||||
/// <summary>
|
||||
/// Gets the total number of uncompressed bytes written
|
||||
/// </summary>
|
||||
public ulong UncompressedBytesWritten => (ulong)_uncompressedBytesWritten;
|
||||
|
||||
/// <summary>
|
||||
/// Gets the total number of compressed bytes written
|
||||
/// </summary>
|
||||
public ulong CompressedBytesWritten => (ulong)(_baseStream.Position - _baseOffset);
|
||||
|
||||
/// <summary>
|
||||
/// Gets the chunk size being used
|
||||
/// </summary>
|
||||
public uint ChunkSize => _chunkSize;
|
||||
|
||||
public override bool CanRead => false;
|
||||
|
||||
public override bool CanSeek => false;
|
||||
|
||||
public override bool CanWrite => !_disposed && _deflateStream.CanWrite;
|
||||
|
||||
public override long Length => throw new NotSupportedException();
|
||||
|
||||
public override long Position
|
||||
{
|
||||
get => throw new NotSupportedException();
|
||||
set => throw new NotSupportedException();
|
||||
}
|
||||
|
||||
public override void Flush() => _deflateStream.Flush();
|
||||
|
||||
public override int Read(byte[] buffer, int offset, int count) =>
|
||||
throw new NotSupportedException();
|
||||
|
||||
public override long Seek(long offset, SeekOrigin origin) => throw new NotSupportedException();
|
||||
|
||||
public override void SetLength(long value) => throw new NotSupportedException();
|
||||
|
||||
public override void Write(byte[] buffer, int offset, int count)
|
||||
{
|
||||
if (_disposed)
|
||||
{
|
||||
throw new ObjectDisposedException(nameof(SOZipDeflateStream));
|
||||
}
|
||||
|
||||
var remaining = count;
|
||||
var currentOffset = offset;
|
||||
|
||||
while (remaining > 0)
|
||||
{
|
||||
// Calculate how many bytes until the next sync point
|
||||
var bytesUntilSync = (int)(_nextSyncPoint - _uncompressedBytesWritten);
|
||||
|
||||
if (bytesUntilSync <= 0)
|
||||
{
|
||||
// We've reached a sync point - perform sync flush
|
||||
PerformSyncFlush();
|
||||
continue;
|
||||
}
|
||||
|
||||
// Write up to the next sync point
|
||||
var bytesToWrite = Math.Min(remaining, bytesUntilSync);
|
||||
_deflateStream.Write(buffer, currentOffset, bytesToWrite);
|
||||
|
||||
_uncompressedBytesWritten += bytesToWrite;
|
||||
currentOffset += bytesToWrite;
|
||||
remaining -= bytesToWrite;
|
||||
}
|
||||
}
|
||||
|
||||
private void PerformSyncFlush()
|
||||
{
|
||||
// Flush with Z_SYNC_FLUSH to create an independent block
|
||||
var originalFlushMode = _deflateStream.FlushMode;
|
||||
_deflateStream.FlushMode = FlushType.Sync;
|
||||
_deflateStream.Flush();
|
||||
_deflateStream.FlushMode = originalFlushMode;
|
||||
|
||||
// Record the compressed offset for this sync point
|
||||
var compressedOffset = (ulong)(_baseStream.Position - _baseOffset);
|
||||
_compressedOffsets.Add(compressedOffset);
|
||||
|
||||
// Set the next sync point
|
||||
_nextSyncPoint += _chunkSize;
|
||||
}
|
||||
|
||||
protected override void Dispose(bool disposing)
|
||||
{
|
||||
if (_disposed)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
_disposed = true;
|
||||
|
||||
if (disposing)
|
||||
{
|
||||
_deflateStream.Dispose();
|
||||
}
|
||||
|
||||
base.Dispose(disposing);
|
||||
}
|
||||
}
|
||||
367
src/SharpCompress/Common/Zip/SOZip/SOZipIndex.cs
Normal file
367
src/SharpCompress/Common/Zip/SOZip/SOZipIndex.cs
Normal file
@@ -0,0 +1,367 @@
|
||||
using System;
|
||||
using System.Buffers.Binary;
|
||||
using System.IO;
|
||||
|
||||
namespace SharpCompress.Common.Zip.SOZip;
|
||||
|
||||
/// <summary>
|
||||
/// Represents a SOZip (Seek-Optimized ZIP) index that enables random access
|
||||
/// within DEFLATE-compressed files by storing offsets to sync flush points.
|
||||
/// </summary>
|
||||
/// <remarks>
|
||||
/// SOZip index files (.sozip.idx) contain a header followed by offset entries
|
||||
/// that point to the beginning of independently decompressable DEFLATE blocks.
|
||||
/// </remarks>
|
||||
[CLSCompliant(false)]
|
||||
public sealed class SOZipIndex
|
||||
{
|
||||
/// <summary>
|
||||
/// SOZip index file magic number: "SOZo" (0x534F5A6F)
|
||||
/// </summary>
|
||||
public const uint SOZIP_MAGIC = 0x6F5A4F53; // "SOZo" little-endian
|
||||
|
||||
/// <summary>
|
||||
/// Current SOZip specification version
|
||||
/// </summary>
|
||||
public const byte SOZIP_VERSION = 1;
|
||||
|
||||
/// <summary>
|
||||
/// Index file extension suffix
|
||||
/// </summary>
|
||||
public const string INDEX_EXTENSION = ".sozip.idx";
|
||||
|
||||
/// <summary>
|
||||
/// Default chunk size in bytes (32KB)
|
||||
/// </summary>
|
||||
public const uint DEFAULT_CHUNK_SIZE = 32768;
|
||||
|
||||
/// <summary>
|
||||
/// The version of the SOZip index format
|
||||
/// </summary>
|
||||
public byte Version { get; private set; }
|
||||
|
||||
/// <summary>
|
||||
/// Size of each uncompressed chunk in bytes
|
||||
/// </summary>
|
||||
public uint ChunkSize { get; private set; }
|
||||
|
||||
/// <summary>
|
||||
/// Total uncompressed size of the file
|
||||
/// </summary>
|
||||
public ulong UncompressedSize { get; private set; }
|
||||
|
||||
/// <summary>
|
||||
/// Total compressed size of the file
|
||||
/// </summary>
|
||||
public ulong CompressedSize { get; private set; }
|
||||
|
||||
/// <summary>
|
||||
/// Number of offset entries in the index
|
||||
/// </summary>
|
||||
public uint OffsetCount { get; private set; }
|
||||
|
||||
/// <summary>
|
||||
/// Array of compressed offsets for each chunk
|
||||
/// </summary>
|
||||
public ulong[] CompressedOffsets { get; private set; } = Array.Empty<ulong>();
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new empty SOZip index
|
||||
/// </summary>
|
||||
public SOZipIndex() { }
|
||||
|
||||
/// <summary>
|
||||
/// Creates a new SOZip index with specified parameters
|
||||
/// </summary>
|
||||
/// <param name="chunkSize">Size of each uncompressed chunk</param>
|
||||
/// <param name="uncompressedSize">Total uncompressed size</param>
|
||||
/// <param name="compressedSize">Total compressed size</param>
|
||||
/// <param name="compressedOffsets">Array of compressed offsets</param>
|
||||
public SOZipIndex(
|
||||
uint chunkSize,
|
||||
ulong uncompressedSize,
|
||||
ulong compressedSize,
|
||||
ulong[] compressedOffsets
|
||||
)
|
||||
{
|
||||
Version = SOZIP_VERSION;
|
||||
ChunkSize = chunkSize;
|
||||
UncompressedSize = uncompressedSize;
|
||||
CompressedSize = compressedSize;
|
||||
OffsetCount = (uint)compressedOffsets.Length;
|
||||
CompressedOffsets = compressedOffsets;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Reads a SOZip index from a stream
|
||||
/// </summary>
|
||||
/// <param name="stream">The stream containing the index data</param>
|
||||
/// <returns>A parsed SOZipIndex instance</returns>
|
||||
/// <exception cref="InvalidDataException">If the stream doesn't contain valid SOZip index data</exception>
|
||||
public static SOZipIndex Read(Stream stream)
|
||||
{
|
||||
var index = new SOZipIndex();
|
||||
Span<byte> header = stackalloc byte[4];
|
||||
|
||||
// Read magic number
|
||||
if (stream.Read(header) != 4)
|
||||
{
|
||||
throw new InvalidDataException("Invalid SOZip index: unable to read magic number");
|
||||
}
|
||||
|
||||
var magic = BinaryPrimitives.ReadUInt32LittleEndian(header);
|
||||
if (magic != SOZIP_MAGIC)
|
||||
{
|
||||
throw new InvalidDataException(
|
||||
$"Invalid SOZip index: magic number mismatch (expected 0x{SOZIP_MAGIC:X8}, got 0x{magic:X8})"
|
||||
);
|
||||
}
|
||||
|
||||
// Read version
|
||||
var versionByte = stream.ReadByte();
|
||||
if (versionByte < 0)
|
||||
{
|
||||
throw new InvalidDataException("Invalid SOZip index: unable to read version");
|
||||
}
|
||||
index.Version = (byte)versionByte;
|
||||
|
||||
if (index.Version != SOZIP_VERSION)
|
||||
{
|
||||
throw new InvalidDataException(
|
||||
$"Unsupported SOZip index version: {index.Version} (expected {SOZIP_VERSION})"
|
||||
);
|
||||
}
|
||||
|
||||
// Read reserved byte (padding)
|
||||
stream.ReadByte();
|
||||
|
||||
// Read chunk size (2 bytes)
|
||||
Span<byte> buf2 = stackalloc byte[2];
|
||||
if (stream.Read(buf2) != 2)
|
||||
{
|
||||
throw new InvalidDataException("Invalid SOZip index: unable to read chunk size");
|
||||
}
|
||||
|
||||
// Chunk size is stored as (actual_size / 1024) - 1
|
||||
var chunkSizeEncoded = BinaryPrimitives.ReadUInt16LittleEndian(buf2);
|
||||
index.ChunkSize = ((uint)chunkSizeEncoded + 1) * 1024;
|
||||
|
||||
// Read uncompressed size (8 bytes)
|
||||
Span<byte> buf8 = stackalloc byte[8];
|
||||
if (stream.Read(buf8) != 8)
|
||||
{
|
||||
throw new InvalidDataException("Invalid SOZip index: unable to read uncompressed size");
|
||||
}
|
||||
index.UncompressedSize = BinaryPrimitives.ReadUInt64LittleEndian(buf8);
|
||||
|
||||
// Read compressed size (8 bytes)
|
||||
if (stream.Read(buf8) != 8)
|
||||
{
|
||||
throw new InvalidDataException("Invalid SOZip index: unable to read compressed size");
|
||||
}
|
||||
index.CompressedSize = BinaryPrimitives.ReadUInt64LittleEndian(buf8);
|
||||
|
||||
// Read offset count (4 bytes)
|
||||
if (stream.Read(header) != 4)
|
||||
{
|
||||
throw new InvalidDataException("Invalid SOZip index: unable to read offset count");
|
||||
}
|
||||
index.OffsetCount = BinaryPrimitives.ReadUInt32LittleEndian(header);
|
||||
|
||||
// Read offsets
|
||||
index.CompressedOffsets = new ulong[index.OffsetCount];
|
||||
for (uint i = 0; i < index.OffsetCount; i++)
|
||||
{
|
||||
if (stream.Read(buf8) != 8)
|
||||
{
|
||||
throw new InvalidDataException($"Invalid SOZip index: unable to read offset {i}");
|
||||
}
|
||||
index.CompressedOffsets[i] = BinaryPrimitives.ReadUInt64LittleEndian(buf8);
|
||||
}
|
||||
|
||||
return index;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Reads a SOZip index from a byte array
|
||||
/// </summary>
|
||||
/// <param name="data">The byte array containing the index data</param>
|
||||
/// <returns>A parsed SOZipIndex instance</returns>
|
||||
public static SOZipIndex Read(byte[] data)
|
||||
{
|
||||
using var stream = new MemoryStream(data);
|
||||
return Read(stream);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Writes this SOZip index to a stream
|
||||
/// </summary>
|
||||
/// <param name="stream">The stream to write to</param>
|
||||
public void Write(Stream stream)
|
||||
{
|
||||
Span<byte> buf8 = stackalloc byte[8];
|
||||
|
||||
// Write magic number
|
||||
BinaryPrimitives.WriteUInt32LittleEndian(buf8, SOZIP_MAGIC);
|
||||
stream.Write(buf8.Slice(0, 4));
|
||||
|
||||
// Write version
|
||||
stream.WriteByte(SOZIP_VERSION);
|
||||
|
||||
// Write reserved byte (padding)
|
||||
stream.WriteByte(0);
|
||||
|
||||
// Write chunk size (encoded as (size/1024)-1)
|
||||
var chunkSizeEncoded = (ushort)((ChunkSize / 1024) - 1);
|
||||
BinaryPrimitives.WriteUInt16LittleEndian(buf8, chunkSizeEncoded);
|
||||
stream.Write(buf8.Slice(0, 2));
|
||||
|
||||
// Write uncompressed size
|
||||
BinaryPrimitives.WriteUInt64LittleEndian(buf8, UncompressedSize);
|
||||
stream.Write(buf8);
|
||||
|
||||
// Write compressed size
|
||||
BinaryPrimitives.WriteUInt64LittleEndian(buf8, CompressedSize);
|
||||
stream.Write(buf8);
|
||||
|
||||
// Write offset count
|
||||
BinaryPrimitives.WriteUInt32LittleEndian(buf8, OffsetCount);
|
||||
stream.Write(buf8.Slice(0, 4));
|
||||
|
||||
// Write offsets
|
||||
foreach (var offset in CompressedOffsets)
|
||||
{
|
||||
BinaryPrimitives.WriteUInt64LittleEndian(buf8, offset);
|
||||
stream.Write(buf8);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Converts this SOZip index to a byte array
|
||||
/// </summary>
|
||||
/// <returns>Byte array containing the serialized index</returns>
|
||||
public byte[] ToByteArray()
|
||||
{
|
||||
using var stream = new MemoryStream();
|
||||
Write(stream);
|
||||
return stream.ToArray();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets the index of the chunk that contains the specified uncompressed offset
|
||||
/// </summary>
|
||||
/// <param name="uncompressedOffset">The uncompressed byte offset</param>
|
||||
/// <returns>The chunk index</returns>
|
||||
public int GetChunkIndex(long uncompressedOffset)
|
||||
{
|
||||
if (uncompressedOffset < 0 || (ulong)uncompressedOffset >= UncompressedSize)
|
||||
{
|
||||
throw new ArgumentOutOfRangeException(
|
||||
nameof(uncompressedOffset),
|
||||
"Offset is out of range"
|
||||
);
|
||||
}
|
||||
|
||||
return (int)((ulong)uncompressedOffset / ChunkSize);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets the compressed offset for the specified chunk index
|
||||
/// </summary>
|
||||
/// <param name="chunkIndex">The chunk index</param>
|
||||
/// <returns>The compressed byte offset for the start of the chunk</returns>
|
||||
public ulong GetCompressedOffset(int chunkIndex)
|
||||
{
|
||||
if (chunkIndex < 0 || chunkIndex >= CompressedOffsets.Length)
|
||||
{
|
||||
throw new ArgumentOutOfRangeException(
|
||||
nameof(chunkIndex),
|
||||
"Chunk index is out of range"
|
||||
);
|
||||
}
|
||||
|
||||
return CompressedOffsets[chunkIndex];
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets the uncompressed offset for the start of the specified chunk
|
||||
/// </summary>
|
||||
/// <param name="chunkIndex">The chunk index</param>
|
||||
/// <returns>The uncompressed byte offset for the start of the chunk</returns>
|
||||
public ulong GetUncompressedOffset(int chunkIndex)
|
||||
{
|
||||
if (chunkIndex < 0 || chunkIndex >= CompressedOffsets.Length)
|
||||
{
|
||||
throw new ArgumentOutOfRangeException(
|
||||
nameof(chunkIndex),
|
||||
"Chunk index is out of range"
|
||||
);
|
||||
}
|
||||
|
||||
return (ulong)chunkIndex * ChunkSize;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets the name of the SOZip index file for a given entry name
|
||||
/// </summary>
|
||||
/// <param name="entryName">The main entry name</param>
|
||||
/// <returns>The index file name (hidden with .sozip.idx extension)</returns>
|
||||
public static string GetIndexFileName(string entryName)
|
||||
{
|
||||
var directory = Path.GetDirectoryName(entryName);
|
||||
var fileName = Path.GetFileName(entryName);
|
||||
|
||||
// The index file is hidden (prefixed with .)
|
||||
var indexFileName = $".{fileName}{INDEX_EXTENSION}";
|
||||
|
||||
if (string.IsNullOrEmpty(directory))
|
||||
{
|
||||
return indexFileName;
|
||||
}
|
||||
|
||||
return Path.Combine(directory, indexFileName).Replace('\\', '/');
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Checks if a file name is a SOZip index file
|
||||
/// </summary>
|
||||
/// <param name="fileName">The file name to check</param>
|
||||
/// <returns>True if the file is a SOZip index file</returns>
|
||||
public static bool IsIndexFile(string fileName)
|
||||
{
|
||||
if (string.IsNullOrEmpty(fileName))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
var name = Path.GetFileName(fileName);
|
||||
return name.StartsWith(".", StringComparison.Ordinal)
|
||||
&& name.EndsWith(INDEX_EXTENSION, StringComparison.OrdinalIgnoreCase);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Gets the main file name from a SOZip index file name
|
||||
/// </summary>
|
||||
/// <param name="indexFileName">The index file name</param>
|
||||
/// <returns>The main file name, or null if not a valid index file</returns>
|
||||
public static string? GetMainFileName(string indexFileName)
|
||||
{
|
||||
if (!IsIndexFile(indexFileName))
|
||||
{
|
||||
return null;
|
||||
}
|
||||
|
||||
var directory = Path.GetDirectoryName(indexFileName);
|
||||
var name = Path.GetFileName(indexFileName);
|
||||
|
||||
// Remove leading '.' and trailing '.sozip.idx'
|
||||
var mainName = name.Substring(1, name.Length - 1 - INDEX_EXTENSION.Length);
|
||||
|
||||
if (string.IsNullOrEmpty(directory))
|
||||
{
|
||||
return mainName;
|
||||
}
|
||||
|
||||
return Path.Combine(directory, mainName).Replace('\\', '/');
|
||||
}
|
||||
}
|
||||
@@ -2,6 +2,7 @@ using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using SharpCompress.Common.Zip.Headers;
|
||||
using SharpCompress.Common.Zip.SOZip;
|
||||
|
||||
namespace SharpCompress.Common.Zip;
|
||||
|
||||
@@ -11,7 +12,7 @@ public class ZipEntry : Entry
|
||||
|
||||
internal ZipEntry(ZipFilePart? filePart)
|
||||
{
|
||||
if (filePart == null)
|
||||
if (filePart is null)
|
||||
{
|
||||
return;
|
||||
}
|
||||
@@ -88,4 +89,24 @@ public class ZipEntry : Entry
|
||||
public override int? Attrib => (int?)_filePart?.Header.ExternalFileAttributes;
|
||||
|
||||
public string? Comment => _filePart?.Header.Comment;
|
||||
|
||||
/// <summary>
|
||||
/// Gets a value indicating whether this entry has SOZip (Seek-Optimized ZIP) support.
|
||||
/// A SOZip entry has an associated index file that enables random access within
|
||||
/// the compressed data.
|
||||
/// </summary>
|
||||
public bool IsSozip => _filePart?.Header.Extra.Any(e => e.Type == ExtraDataType.SOZip) ?? false;
|
||||
|
||||
/// <summary>
|
||||
/// Gets a value indicating whether this entry is a SOZip index file.
|
||||
/// Index files are hidden files with a .sozip.idx extension that contain
|
||||
/// offsets into the main compressed file.
|
||||
/// </summary>
|
||||
public bool IsSozipIndexFile => Key is not null && SOZipIndex.IsIndexFile(Key);
|
||||
|
||||
/// <summary>
|
||||
/// Gets the SOZip extra field data, if present.
|
||||
/// </summary>
|
||||
internal SOZipExtraField? SOZipExtra =>
|
||||
_filePart?.Header.Extra.OfType<SOZipExtraField>().FirstOrDefault();
|
||||
}
|
||||
|
||||
@@ -34,6 +34,7 @@ internal class ZipCentralDirectoryEntry
|
||||
internal ulong Decompressed { get; set; }
|
||||
internal ushort Zip64HeaderOffset { get; set; }
|
||||
internal ulong HeaderOffset { get; }
|
||||
internal string FileName => fileName;
|
||||
|
||||
internal uint Write(Stream outputStream)
|
||||
{
|
||||
|
||||
@@ -8,6 +8,7 @@ using System.Threading.Tasks;
|
||||
using SharpCompress.Common;
|
||||
using SharpCompress.Common.Zip;
|
||||
using SharpCompress.Common.Zip.Headers;
|
||||
using SharpCompress.Common.Zip.SOZip;
|
||||
using SharpCompress.Compressors;
|
||||
using SharpCompress.Compressors.BZip2;
|
||||
using SharpCompress.Compressors.Deflate;
|
||||
@@ -27,12 +28,19 @@ public class ZipWriter : AbstractWriter
|
||||
private long streamPosition;
|
||||
private PpmdProperties? ppmdProps;
|
||||
private readonly bool isZip64;
|
||||
private readonly bool enableSOZip;
|
||||
private readonly int sozipChunkSize;
|
||||
private readonly long sozipMinFileSize;
|
||||
|
||||
public ZipWriter(Stream destination, ZipWriterOptions zipWriterOptions)
|
||||
: base(ArchiveType.Zip, zipWriterOptions)
|
||||
{
|
||||
zipComment = zipWriterOptions.ArchiveComment ?? string.Empty;
|
||||
isZip64 = zipWriterOptions.UseZip64;
|
||||
enableSOZip = zipWriterOptions.EnableSOZip;
|
||||
sozipChunkSize = zipWriterOptions.SOZipChunkSize;
|
||||
sozipMinFileSize = zipWriterOptions.SOZipMinFileSize;
|
||||
|
||||
if (destination.CanSeek)
|
||||
{
|
||||
streamPosition = destination.Position;
|
||||
@@ -117,12 +125,21 @@ public class ZipWriter : AbstractWriter
|
||||
|
||||
var headersize = (uint)WriteHeader(entryPath, options, entry, useZip64);
|
||||
streamPosition += headersize;
|
||||
|
||||
// Determine if SOZip should be used for this entry
|
||||
var useSozip =
|
||||
(options.EnableSOZip ?? enableSOZip)
|
||||
&& compression == ZipCompressionMethod.Deflate
|
||||
&& OutputStream.CanSeek;
|
||||
|
||||
return new ZipWritingStream(
|
||||
this,
|
||||
OutputStream.NotNull(),
|
||||
entry,
|
||||
compression,
|
||||
options.CompressionLevel ?? compressionLevel
|
||||
options.CompressionLevel ?? compressionLevel,
|
||||
useSozip,
|
||||
useSozip ? sozipChunkSize : 0
|
||||
);
|
||||
}
|
||||
|
||||
@@ -304,6 +321,64 @@ public class ZipWriter : AbstractWriter
|
||||
OutputStream.Write(intBuf);
|
||||
}
|
||||
|
||||
private void WriteSozipIndexFile(
|
||||
ZipCentralDirectoryEntry dataEntry,
|
||||
SOZipDeflateStream sozipStream
|
||||
)
|
||||
{
|
||||
var indexFileName = SOZipIndex.GetIndexFileName(dataEntry.FileName);
|
||||
|
||||
// Create the SOZip index
|
||||
var index = new SOZipIndex(
|
||||
chunkSize: sozipStream.ChunkSize,
|
||||
uncompressedSize: sozipStream.UncompressedBytesWritten,
|
||||
compressedSize: sozipStream.CompressedBytesWritten,
|
||||
compressedOffsets: sozipStream.CompressedOffsets
|
||||
);
|
||||
|
||||
var indexBytes = index.ToByteArray();
|
||||
|
||||
// Calculate CRC for index data
|
||||
var crc = new CRC32();
|
||||
crc.SlurpBlock(indexBytes, 0, indexBytes.Length);
|
||||
var indexCrc = (uint)crc.Crc32Result;
|
||||
|
||||
// Write the index file as a stored (uncompressed) entry
|
||||
var indexEntry = new ZipCentralDirectoryEntry(
|
||||
ZipCompressionMethod.None,
|
||||
indexFileName,
|
||||
(ulong)streamPosition,
|
||||
WriterOptions.ArchiveEncoding
|
||||
)
|
||||
{
|
||||
ModificationTime = DateTime.Now,
|
||||
};
|
||||
|
||||
// Write the local file header for index
|
||||
var indexOptions = new ZipWriterEntryOptions { CompressionType = CompressionType.None };
|
||||
var headerSize = (uint)WriteHeader(indexFileName, indexOptions, indexEntry, isZip64);
|
||||
streamPosition += headerSize;
|
||||
|
||||
// Write the index data directly
|
||||
OutputStream.Write(indexBytes, 0, indexBytes.Length);
|
||||
|
||||
// Finalize the index entry
|
||||
indexEntry.Crc = indexCrc;
|
||||
indexEntry.Compressed = (ulong)indexBytes.Length;
|
||||
indexEntry.Decompressed = (ulong)indexBytes.Length;
|
||||
|
||||
if (OutputStream.CanSeek)
|
||||
{
|
||||
// Update the header with sizes and CRC
|
||||
OutputStream.Position = (long)(indexEntry.HeaderOffset + 14);
|
||||
WriteFooter(indexCrc, (uint)indexBytes.Length, (uint)indexBytes.Length);
|
||||
OutputStream.Position = streamPosition + indexBytes.Length;
|
||||
}
|
||||
|
||||
streamPosition += indexBytes.Length;
|
||||
entries.Add(indexEntry);
|
||||
}
|
||||
|
||||
private void WriteEndRecord(ulong size)
|
||||
{
|
||||
var zip64EndOfCentralDirectoryNeeded =
|
||||
@@ -385,7 +460,10 @@ public class ZipWriter : AbstractWriter
|
||||
private readonly ZipWriter writer;
|
||||
private readonly ZipCompressionMethod zipCompressionMethod;
|
||||
private readonly int compressionLevel;
|
||||
private readonly bool useSozip;
|
||||
private readonly int sozipChunkSize;
|
||||
private SharpCompressStream? counting;
|
||||
private SOZipDeflateStream? sozipStream;
|
||||
private ulong decompressed;
|
||||
|
||||
// Flag to prevent throwing exceptions on Dispose
|
||||
@@ -397,7 +475,9 @@ public class ZipWriter : AbstractWriter
|
||||
Stream originalStream,
|
||||
ZipCentralDirectoryEntry entry,
|
||||
ZipCompressionMethod zipCompressionMethod,
|
||||
int compressionLevel
|
||||
int compressionLevel,
|
||||
bool useSozip = false,
|
||||
int sozipChunkSize = 0
|
||||
)
|
||||
{
|
||||
this.writer = writer;
|
||||
@@ -406,6 +486,8 @@ public class ZipWriter : AbstractWriter
|
||||
this.entry = entry;
|
||||
this.zipCompressionMethod = zipCompressionMethod;
|
||||
this.compressionLevel = compressionLevel;
|
||||
this.useSozip = useSozip;
|
||||
this.sozipChunkSize = sozipChunkSize;
|
||||
writeStream = GetWriteStream(originalStream);
|
||||
}
|
||||
|
||||
@@ -435,6 +517,15 @@ public class ZipWriter : AbstractWriter
|
||||
}
|
||||
case ZipCompressionMethod.Deflate:
|
||||
{
|
||||
if (useSozip && sozipChunkSize > 0)
|
||||
{
|
||||
sozipStream = new SOZipDeflateStream(
|
||||
counting,
|
||||
(CompressionLevel)compressionLevel,
|
||||
sozipChunkSize
|
||||
);
|
||||
return sozipStream;
|
||||
}
|
||||
return new DeflateStream(
|
||||
counting,
|
||||
CompressionMode.Compress,
|
||||
@@ -581,7 +672,18 @@ public class ZipWriter : AbstractWriter
|
||||
writer.WriteFooter(entry.Crc, compressedvalue, decompressedvalue);
|
||||
writer.streamPosition += (long)entry.Compressed + 16;
|
||||
}
|
||||
|
||||
writer.entries.Add(entry);
|
||||
|
||||
// Write SOZip index file if SOZip was used and file meets minimum size
|
||||
if (
|
||||
useSozip
|
||||
&& sozipStream is not null
|
||||
&& entry.Decompressed >= (ulong)writer.sozipMinFileSize
|
||||
)
|
||||
{
|
||||
writer.WriteSozipIndexFile(entry, sozipStream);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -49,4 +49,11 @@ public class ZipWriterEntryOptions
|
||||
/// This option is not supported with non-seekable streams.
|
||||
/// </summary>
|
||||
public bool? EnableZip64 { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Enable or disable SOZip (Seek-Optimized ZIP) for this entry.
|
||||
/// When null, uses the archive's default setting.
|
||||
/// SOZip is only applicable to Deflate-compressed files on seekable streams.
|
||||
/// </summary>
|
||||
public bool? EnableSOZip { get; set; }
|
||||
}
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
using System;
|
||||
using SharpCompress.Common;
|
||||
using SharpCompress.Common.Zip.SOZip;
|
||||
using SharpCompress.Compressors.Deflate;
|
||||
using D = SharpCompress.Compressors.Deflate;
|
||||
|
||||
@@ -24,6 +25,9 @@ public class ZipWriterOptions : WriterOptions
|
||||
{
|
||||
UseZip64 = writerOptions.UseZip64;
|
||||
ArchiveComment = writerOptions.ArchiveComment;
|
||||
EnableSOZip = writerOptions.EnableSOZip;
|
||||
SOZipChunkSize = writerOptions.SOZipChunkSize;
|
||||
SOZipMinFileSize = writerOptions.SOZipMinFileSize;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -80,4 +84,27 @@ public class ZipWriterOptions : WriterOptions
|
||||
/// are less than 4GiB in length.
|
||||
/// </summary>
|
||||
public bool UseZip64 { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// Enables SOZip (Seek-Optimized ZIP) for Deflate-compressed files.
|
||||
/// When enabled, files that meet the minimum size requirement will have
|
||||
/// an accompanying index file that allows random access within the
|
||||
/// compressed data. Requires a seekable output stream.
|
||||
/// </summary>
|
||||
public bool EnableSOZip { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// The chunk size for SOZip index creation in bytes.
|
||||
/// Must be a multiple of 1024 bytes. Default is 32KB (32768 bytes).
|
||||
/// Smaller chunks allow for finer-grained random access but result
|
||||
/// in larger index files and slightly less efficient compression.
|
||||
/// </summary>
|
||||
public int SOZipChunkSize { get; set; } = (int)SOZipIndex.DEFAULT_CHUNK_SIZE;
|
||||
|
||||
/// <summary>
|
||||
/// Minimum file size (uncompressed) in bytes for SOZip optimization.
|
||||
/// Files smaller than this size will not have SOZip index files created.
|
||||
/// Default is 1MB (1048576 bytes).
|
||||
/// </summary>
|
||||
public long SOZipMinFileSize { get; set; } = 1048576;
|
||||
}
|
||||
|
||||
@@ -3,6 +3,7 @@ using System.Collections.Generic;
|
||||
using System.IO;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
using SharpCompress.Common.Zip.SOZip;
|
||||
using SharpCompress.Readers;
|
||||
using Xunit;
|
||||
|
||||
@@ -45,7 +46,7 @@ public class TestBase : IDisposable
|
||||
|
||||
public void Dispose() => Directory.Delete(SCRATCH_BASE_PATH, true);
|
||||
|
||||
public void VerifyFiles()
|
||||
public void VerifyFiles(bool skipSoIndexes = false)
|
||||
{
|
||||
if (UseExtensionInsteadOfNameToVerify)
|
||||
{
|
||||
@@ -53,7 +54,7 @@ public class TestBase : IDisposable
|
||||
}
|
||||
else
|
||||
{
|
||||
VerifyFilesByName();
|
||||
VerifyFilesByName(skipSoIndexes);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -72,10 +73,23 @@ public class TestBase : IDisposable
|
||||
}
|
||||
}
|
||||
|
||||
protected void VerifyFilesByName()
|
||||
private void VerifyFilesByName(bool skipSoIndexes)
|
||||
{
|
||||
var extracted = Directory
|
||||
.EnumerateFiles(SCRATCH_FILES_PATH, "*.*", SearchOption.AllDirectories)
|
||||
.Where(x =>
|
||||
{
|
||||
if (
|
||||
skipSoIndexes
|
||||
&& Path.GetFileName(x)
|
||||
.EndsWith(SOZipIndex.INDEX_EXTENSION, StringComparison.OrdinalIgnoreCase)
|
||||
)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
})
|
||||
.ToLookup(path => path.Substring(SCRATCH_FILES_PATH.Length));
|
||||
var original = Directory
|
||||
.EnumerateFiles(ORIGINAL_FILES_PATH, "*.*", SearchOption.AllDirectories)
|
||||
|
||||
257
tests/SharpCompress.Test/Zip/SOZipReaderTests.cs
Normal file
257
tests/SharpCompress.Test/Zip/SOZipReaderTests.cs
Normal file
@@ -0,0 +1,257 @@
|
||||
using System.IO;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
using System.Threading.Tasks;
|
||||
using SharpCompress.Archives.Zip;
|
||||
using SharpCompress.Common;
|
||||
using SharpCompress.Common.Zip.SOZip;
|
||||
using SharpCompress.Readers.Zip;
|
||||
using SharpCompress.Test.Mocks;
|
||||
using SharpCompress.Writers;
|
||||
using SharpCompress.Writers.Zip;
|
||||
using Xunit;
|
||||
|
||||
namespace SharpCompress.Test.Zip;
|
||||
|
||||
public class SoZipReaderTests : TestBase
|
||||
{
|
||||
[Fact]
|
||||
public async Task SOZip_Reader_RegularZip_NoSozipEntries()
|
||||
{
|
||||
// Regular zip files should not have SOZip entries
|
||||
var path = Path.Combine(TEST_ARCHIVES_PATH, "Zip.deflate.zip");
|
||||
using Stream stream = new ForwardOnlyStream(File.OpenRead(path));
|
||||
using var reader = ZipReader.Open(stream);
|
||||
while (await reader.MoveToNextEntryAsync())
|
||||
{
|
||||
// Regular zip entries should NOT be SOZip
|
||||
Assert.False(reader.Entry.IsSozip, $"Entry {reader.Entry.Key} should not be SOZip");
|
||||
Assert.False(
|
||||
reader.Entry.IsSozipIndexFile,
|
||||
$"Entry {reader.Entry.Key} should not be a SOZip index file"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void SOZip_Archive_RegularZip_NoSozipEntries()
|
||||
{
|
||||
// Regular zip files should not have SOZip entries
|
||||
var path = Path.Combine(TEST_ARCHIVES_PATH, "Zip.deflate.zip");
|
||||
using Stream stream = File.OpenRead(path);
|
||||
using var archive = ZipArchive.Open(stream);
|
||||
foreach (var entry in archive.Entries)
|
||||
{
|
||||
// Regular zip entries should NOT be SOZip
|
||||
Assert.False(entry.IsSozip, $"Entry {entry.Key} should not be SOZip");
|
||||
Assert.False(
|
||||
entry.IsSozipIndexFile,
|
||||
$"Entry {entry.Key} should not be a SOZip index file"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void SOZip_Archive_ReadSOZipFile()
|
||||
{
|
||||
// Read the SOZip test archive
|
||||
var path = Path.Combine(TEST_ARCHIVES_PATH, "Zip.sozip.zip");
|
||||
using Stream stream = File.OpenRead(path);
|
||||
using var archive = ZipArchive.Open(stream);
|
||||
|
||||
var entries = archive.Entries.ToList();
|
||||
|
||||
// Should have 3 entries: data.txt, .data.txt.sozip.idx, and small.txt
|
||||
Assert.Equal(3, entries.Count);
|
||||
|
||||
// Verify we have one SOZip index file
|
||||
var indexFiles = entries.Where(e => e.IsSozipIndexFile).ToList();
|
||||
Assert.Single(indexFiles);
|
||||
Assert.Equal(".data.txt.sozip.idx", indexFiles[0].Key);
|
||||
|
||||
// Verify the index file is not compressed
|
||||
Assert.Equal(CompressionType.None, indexFiles[0].CompressionType);
|
||||
|
||||
// Read and validate the index
|
||||
using (var indexStream = indexFiles[0].OpenEntryStream())
|
||||
{
|
||||
using var memStream = new MemoryStream();
|
||||
indexStream.CopyTo(memStream);
|
||||
var indexBytes = memStream.ToArray();
|
||||
|
||||
var index = SOZipIndex.Read(indexBytes);
|
||||
Assert.Equal(SOZipIndex.SOZIP_VERSION, index.Version);
|
||||
Assert.Equal(1024u, index.ChunkSize); // As set in CreateSOZipTestArchive
|
||||
Assert.True(index.UncompressedSize > 0);
|
||||
Assert.True(index.OffsetCount > 0);
|
||||
}
|
||||
|
||||
// Verify the data file can be read correctly
|
||||
var dataEntry = entries.First(e => e.Key == "data.txt");
|
||||
using (var dataStream = dataEntry.OpenEntryStream())
|
||||
{
|
||||
using var reader = new StreamReader(dataStream);
|
||||
var content = reader.ReadToEnd();
|
||||
Assert.Equal(5000, content.Length);
|
||||
Assert.True(content.All(c => c == 'A'));
|
||||
}
|
||||
|
||||
// Verify the small file
|
||||
var smallEntry = entries.First(e => e.Key == "small.txt");
|
||||
Assert.False(smallEntry.IsSozipIndexFile);
|
||||
using (var smallStream = smallEntry.OpenEntryStream())
|
||||
{
|
||||
using var reader = new StreamReader(smallStream);
|
||||
var content = reader.ReadToEnd();
|
||||
Assert.Equal("Small content", content);
|
||||
}
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task SOZip_Reader_ReadSOZipFile()
|
||||
{
|
||||
// Read the SOZip test archive with ZipReader
|
||||
var path = Path.Combine(TEST_ARCHIVES_PATH, "Zip.sozip.zip");
|
||||
using Stream stream = new ForwardOnlyStream(File.OpenRead(path));
|
||||
using var reader = ZipReader.Open(stream);
|
||||
|
||||
var foundData = false;
|
||||
var foundIndex = false;
|
||||
var foundSmall = false;
|
||||
|
||||
while (await reader.MoveToNextEntryAsync())
|
||||
{
|
||||
if (reader.Entry.Key == "data.txt")
|
||||
{
|
||||
foundData = true;
|
||||
Assert.False(reader.Entry.IsSozipIndexFile);
|
||||
|
||||
using var entryStream = reader.OpenEntryStream();
|
||||
using var streamReader = new StreamReader(entryStream);
|
||||
var content = streamReader.ReadToEnd();
|
||||
Assert.Equal(5000, content.Length);
|
||||
Assert.True(content.All(c => c == 'A'));
|
||||
}
|
||||
else if (reader.Entry.Key == ".data.txt.sozip.idx")
|
||||
{
|
||||
foundIndex = true;
|
||||
Assert.True(reader.Entry.IsSozipIndexFile);
|
||||
|
||||
using var indexStream = reader.OpenEntryStream();
|
||||
using var memStream = new MemoryStream();
|
||||
await indexStream.CopyToAsync(memStream);
|
||||
var indexBytes = memStream.ToArray();
|
||||
|
||||
var index = SOZipIndex.Read(indexBytes);
|
||||
Assert.Equal(SOZipIndex.SOZIP_VERSION, index.Version);
|
||||
}
|
||||
else if (reader.Entry.Key == "small.txt")
|
||||
{
|
||||
foundSmall = true;
|
||||
Assert.False(reader.Entry.IsSozipIndexFile);
|
||||
}
|
||||
}
|
||||
|
||||
Assert.True(foundData, "data.txt entry not found");
|
||||
Assert.True(foundIndex, ".data.txt.sozip.idx entry not found");
|
||||
Assert.True(foundSmall, "small.txt entry not found");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void SOZip_Archive_DetectsIndexFileByName()
|
||||
{
|
||||
// Create a zip with a SOZip index file (by name pattern)
|
||||
using var memoryStream = new MemoryStream();
|
||||
|
||||
using (
|
||||
var writer = WriterFactory.Open(
|
||||
memoryStream,
|
||||
ArchiveType.Zip,
|
||||
new ZipWriterOptions(CompressionType.Deflate) { LeaveStreamOpen = true }
|
||||
)
|
||||
)
|
||||
{
|
||||
// Write a regular file
|
||||
writer.Write("test.txt", new MemoryStream(Encoding.UTF8.GetBytes("Hello World")));
|
||||
|
||||
// Write a file that looks like a SOZip index (by name pattern)
|
||||
var indexData = new SOZipIndex(
|
||||
chunkSize: 32768,
|
||||
uncompressedSize: 100,
|
||||
compressedSize: 50,
|
||||
compressedOffsets: new ulong[] { 0 }
|
||||
);
|
||||
writer.Write(".test.txt.sozip.idx", new MemoryStream(indexData.ToByteArray()));
|
||||
}
|
||||
|
||||
memoryStream.Position = 0;
|
||||
|
||||
// Test with ZipArchive
|
||||
using var archive = ZipArchive.Open(memoryStream);
|
||||
var entries = archive.Entries.ToList();
|
||||
|
||||
Assert.Equal(2, entries.Count);
|
||||
|
||||
var regularEntry = entries.First(e => e.Key == "test.txt");
|
||||
Assert.False(regularEntry.IsSozipIndexFile);
|
||||
Assert.False(regularEntry.IsSozip); // No SOZip extra field
|
||||
|
||||
var indexEntry = entries.First(e => e.Key == ".test.txt.sozip.idx");
|
||||
Assert.True(indexEntry.IsSozipIndexFile);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public async Task SOZip_Reader_DetectsIndexFileByName()
|
||||
{
|
||||
// Create a zip with a SOZip index file (by name pattern)
|
||||
using var memoryStream = new MemoryStream();
|
||||
|
||||
using (
|
||||
var writer = WriterFactory.Open(
|
||||
memoryStream,
|
||||
ArchiveType.Zip,
|
||||
new ZipWriterOptions(CompressionType.Deflate) { LeaveStreamOpen = true }
|
||||
)
|
||||
)
|
||||
{
|
||||
// Write a regular file
|
||||
writer.Write("test.txt", new MemoryStream(Encoding.UTF8.GetBytes("Hello World")));
|
||||
|
||||
// Write a file that looks like a SOZip index (by name pattern)
|
||||
var indexData = new SOZipIndex(
|
||||
chunkSize: 32768,
|
||||
uncompressedSize: 100,
|
||||
compressedSize: 50,
|
||||
compressedOffsets: new ulong[] { 0 }
|
||||
);
|
||||
writer.Write(".test.txt.sozip.idx", new MemoryStream(indexData.ToByteArray()));
|
||||
}
|
||||
|
||||
memoryStream.Position = 0;
|
||||
|
||||
// Test with ZipReader
|
||||
using Stream stream = new ForwardOnlyStream(memoryStream);
|
||||
using var reader = ZipReader.Open(stream);
|
||||
|
||||
var foundRegular = false;
|
||||
var foundIndex = false;
|
||||
|
||||
while (await reader.MoveToNextEntryAsync())
|
||||
{
|
||||
if (reader.Entry.Key == "test.txt")
|
||||
{
|
||||
foundRegular = true;
|
||||
Assert.False(reader.Entry.IsSozipIndexFile);
|
||||
Assert.False(reader.Entry.IsSozip);
|
||||
}
|
||||
else if (reader.Entry.Key == ".test.txt.sozip.idx")
|
||||
{
|
||||
foundIndex = true;
|
||||
Assert.True(reader.Entry.IsSozipIndexFile);
|
||||
}
|
||||
}
|
||||
|
||||
Assert.True(foundRegular, "Regular entry not found");
|
||||
Assert.True(foundIndex, "Index entry not found");
|
||||
}
|
||||
}
|
||||
358
tests/SharpCompress.Test/Zip/SoZipWriterTests.cs
Normal file
358
tests/SharpCompress.Test/Zip/SoZipWriterTests.cs
Normal file
@@ -0,0 +1,358 @@
|
||||
using System;
|
||||
using System.IO;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
using SharpCompress.Archives.Zip;
|
||||
using SharpCompress.Common;
|
||||
using SharpCompress.Common.Zip.SOZip;
|
||||
using SharpCompress.Readers;
|
||||
using SharpCompress.Writers;
|
||||
using SharpCompress.Writers.Zip;
|
||||
using Xunit;
|
||||
|
||||
namespace SharpCompress.Test.Zip;
|
||||
|
||||
public class SoZipWriterTests : TestBase
|
||||
{
|
||||
[Fact]
|
||||
public void SOZipIndex_RoundTrip()
|
||||
{
|
||||
// Create an index
|
||||
var offsets = new ulong[] { 0, 1024, 2048, 3072 };
|
||||
var originalIndex = new SOZipIndex(
|
||||
chunkSize: 32768,
|
||||
uncompressedSize: 100000,
|
||||
compressedSize: 50000,
|
||||
compressedOffsets: offsets
|
||||
);
|
||||
|
||||
// Serialize to bytes
|
||||
var bytes = originalIndex.ToByteArray();
|
||||
|
||||
// Deserialize back
|
||||
var parsedIndex = SOZipIndex.Read(bytes);
|
||||
|
||||
// Verify all fields
|
||||
Assert.Equal(SOZipIndex.SOZIP_VERSION, parsedIndex.Version);
|
||||
Assert.Equal(32768u, parsedIndex.ChunkSize);
|
||||
Assert.Equal(100000ul, parsedIndex.UncompressedSize);
|
||||
Assert.Equal(50000ul, parsedIndex.CompressedSize);
|
||||
Assert.Equal(4u, parsedIndex.OffsetCount);
|
||||
Assert.Equal(offsets, parsedIndex.CompressedOffsets);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void SOZipIndex_Read_InvalidMagic_ThrowsException()
|
||||
{
|
||||
var invalidData = new byte[] { 0x00, 0x00, 0x00, 0x00 };
|
||||
|
||||
var exception = Assert.Throws<InvalidDataException>(() => SOZipIndex.Read(invalidData));
|
||||
|
||||
Assert.Contains("magic number mismatch", exception.Message);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void SOZipIndex_GetChunkIndex()
|
||||
{
|
||||
var offsets = new ulong[] { 0, 1000, 2000, 3000, 4000 };
|
||||
var index = new SOZipIndex(
|
||||
chunkSize: 32768,
|
||||
uncompressedSize: 163840, // 5 * 32768
|
||||
compressedSize: 5000,
|
||||
compressedOffsets: offsets
|
||||
);
|
||||
|
||||
Assert.Equal(0, index.GetChunkIndex(0));
|
||||
Assert.Equal(0, index.GetChunkIndex(32767));
|
||||
Assert.Equal(1, index.GetChunkIndex(32768));
|
||||
Assert.Equal(2, index.GetChunkIndex(65536));
|
||||
Assert.Equal(4, index.GetChunkIndex(163839));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void SOZipIndex_GetCompressedOffset()
|
||||
{
|
||||
var offsets = new ulong[] { 0, 1000, 2000, 3000, 4000 };
|
||||
var index = new SOZipIndex(
|
||||
chunkSize: 32768,
|
||||
uncompressedSize: 163840,
|
||||
compressedSize: 5000,
|
||||
compressedOffsets: offsets
|
||||
);
|
||||
|
||||
Assert.Equal(0ul, index.GetCompressedOffset(0));
|
||||
Assert.Equal(1000ul, index.GetCompressedOffset(1));
|
||||
Assert.Equal(2000ul, index.GetCompressedOffset(2));
|
||||
Assert.Equal(3000ul, index.GetCompressedOffset(3));
|
||||
Assert.Equal(4000ul, index.GetCompressedOffset(4));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void SOZipIndex_GetUncompressedOffset()
|
||||
{
|
||||
var offsets = new ulong[] { 0, 1000, 2000, 3000, 4000 };
|
||||
var index = new SOZipIndex(
|
||||
chunkSize: 32768,
|
||||
uncompressedSize: 163840,
|
||||
compressedSize: 5000,
|
||||
compressedOffsets: offsets
|
||||
);
|
||||
|
||||
Assert.Equal(0ul, index.GetUncompressedOffset(0));
|
||||
Assert.Equal(32768ul, index.GetUncompressedOffset(1));
|
||||
Assert.Equal(65536ul, index.GetUncompressedOffset(2));
|
||||
Assert.Equal(98304ul, index.GetUncompressedOffset(3));
|
||||
Assert.Equal(131072ul, index.GetUncompressedOffset(4));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void SOZipIndex_GetIndexFileName()
|
||||
{
|
||||
Assert.Equal(".file.txt.sozip.idx", SOZipIndex.GetIndexFileName("file.txt"));
|
||||
Assert.Equal("dir/.file.txt.sozip.idx", SOZipIndex.GetIndexFileName("dir/file.txt"));
|
||||
Assert.Equal("a/b/.file.txt.sozip.idx", SOZipIndex.GetIndexFileName("a/b/file.txt"));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void SOZipIndex_IsIndexFile()
|
||||
{
|
||||
Assert.True(SOZipIndex.IsIndexFile(".file.txt.sozip.idx"));
|
||||
Assert.True(SOZipIndex.IsIndexFile("dir/.file.txt.sozip.idx"));
|
||||
Assert.True(SOZipIndex.IsIndexFile(".test.sozip.idx"));
|
||||
|
||||
Assert.False(SOZipIndex.IsIndexFile("file.txt"));
|
||||
Assert.False(SOZipIndex.IsIndexFile("file.sozip.idx")); // Missing leading dot
|
||||
Assert.False(SOZipIndex.IsIndexFile(".file.txt")); // Missing .sozip.idx
|
||||
Assert.False(SOZipIndex.IsIndexFile(""));
|
||||
Assert.False(SOZipIndex.IsIndexFile(null!));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void SOZipIndex_GetMainFileName()
|
||||
{
|
||||
Assert.Equal("file.txt", SOZipIndex.GetMainFileName(".file.txt.sozip.idx"));
|
||||
Assert.Equal("dir/file.txt", SOZipIndex.GetMainFileName("dir/.file.txt.sozip.idx"));
|
||||
Assert.Equal("test", SOZipIndex.GetMainFileName(".test.sozip.idx"));
|
||||
|
||||
Assert.Null(SOZipIndex.GetMainFileName("file.txt"));
|
||||
Assert.Null(SOZipIndex.GetMainFileName(""));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ZipEntry_IsSozipIndexFile_Detection()
|
||||
{
|
||||
// Create a zip with a file that has a SOZip index file name pattern
|
||||
using var memoryStream = new MemoryStream();
|
||||
|
||||
using (
|
||||
var writer = WriterFactory.Open(
|
||||
memoryStream,
|
||||
ArchiveType.Zip,
|
||||
new ZipWriterOptions(CompressionType.Deflate) { LeaveStreamOpen = true }
|
||||
)
|
||||
)
|
||||
{
|
||||
// Write a regular file
|
||||
writer.Write("test.txt", new MemoryStream(Encoding.UTF8.GetBytes("Hello World")));
|
||||
|
||||
// Write a file with SOZip index name pattern
|
||||
var indexData = new SOZipIndex(
|
||||
chunkSize: 32768,
|
||||
uncompressedSize: 100,
|
||||
compressedSize: 50,
|
||||
compressedOffsets: new ulong[] { 0 }
|
||||
);
|
||||
writer.Write(".test.txt.sozip.idx", new MemoryStream(indexData.ToByteArray()));
|
||||
}
|
||||
|
||||
memoryStream.Position = 0;
|
||||
|
||||
using var archive = ZipArchive.Open(memoryStream);
|
||||
var entries = archive.Entries.ToList();
|
||||
|
||||
Assert.Equal(2, entries.Count);
|
||||
|
||||
var regularEntry = entries.First(e => e.Key == "test.txt");
|
||||
Assert.False(regularEntry.IsSozipIndexFile);
|
||||
Assert.False(regularEntry.IsSozip); // No SOZip extra field
|
||||
|
||||
var indexEntry = entries.First(e => e.Key == ".test.txt.sozip.idx");
|
||||
Assert.True(indexEntry.IsSozipIndexFile);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ZipWriterOptions_SOZipDefaults()
|
||||
{
|
||||
var options = new ZipWriterOptions(CompressionType.Deflate);
|
||||
|
||||
Assert.False(options.EnableSOZip);
|
||||
Assert.Equal((int)SOZipIndex.DEFAULT_CHUNK_SIZE, options.SOZipChunkSize);
|
||||
Assert.Equal(1048576L, options.SOZipMinFileSize); // 1MB
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ZipWriterEntryOptions_SOZipDefaults()
|
||||
{
|
||||
var options = new ZipWriterEntryOptions();
|
||||
|
||||
Assert.Null(options.EnableSOZip);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void SOZip_RoundTrip_CompressAndDecompress()
|
||||
{
|
||||
// Create a SOZip archive from Original files
|
||||
var archivePath = Path.Combine(SCRATCH2_FILES_PATH, "test.sozip.zip");
|
||||
|
||||
using (var stream = File.Create(archivePath))
|
||||
{
|
||||
var options = new ZipWriterOptions(CompressionType.Deflate)
|
||||
{
|
||||
EnableSOZip = true,
|
||||
SOZipMinFileSize = 1024, // 1KB to ensure test files qualify
|
||||
LeaveStreamOpen = false,
|
||||
};
|
||||
|
||||
using var writer = new ZipWriter(stream, options);
|
||||
|
||||
// Write all files from Original directory
|
||||
var files = Directory.GetFiles(ORIGINAL_FILES_PATH, "*", SearchOption.AllDirectories);
|
||||
foreach (var filePath in files)
|
||||
{
|
||||
var relativePath = filePath
|
||||
.Substring(ORIGINAL_FILES_PATH.Length + 1)
|
||||
.Replace('\\', '/');
|
||||
using var fileStream = File.OpenRead(filePath);
|
||||
writer.Write(relativePath, fileStream, new ZipWriterEntryOptions());
|
||||
}
|
||||
}
|
||||
|
||||
// Validate the archive was created and has files
|
||||
Assert.True(File.Exists(archivePath));
|
||||
|
||||
// Validate the archive has SOZip entries
|
||||
using (var stream = File.OpenRead(archivePath))
|
||||
{
|
||||
using var archive = ZipArchive.Open(stream);
|
||||
|
||||
var allEntries = archive.Entries.ToList();
|
||||
|
||||
// Archive should have files
|
||||
Assert.NotEmpty(allEntries);
|
||||
|
||||
var sozipIndexEntries = allEntries.Where(e => e.IsSozipIndexFile).ToList();
|
||||
|
||||
// Should have at least one SOZip index file
|
||||
Assert.NotEmpty(sozipIndexEntries);
|
||||
|
||||
// Verify index files have valid SOZip index data
|
||||
foreach (var indexEntry in sozipIndexEntries)
|
||||
{
|
||||
// Check that the entry is stored (not compressed)
|
||||
Assert.Equal(CompressionType.None, indexEntry.CompressionType);
|
||||
|
||||
using var indexStream = indexEntry.OpenEntryStream();
|
||||
using var memStream = new MemoryStream();
|
||||
indexStream.CopyTo(memStream);
|
||||
var indexBytes = memStream.ToArray();
|
||||
|
||||
// Debug: Check first 4 bytes
|
||||
Assert.True(
|
||||
indexBytes.Length >= 4,
|
||||
$"Index file too small: {indexBytes.Length} bytes"
|
||||
);
|
||||
|
||||
// Should be able to parse the index without exception
|
||||
var index = SOZipIndex.Read(indexBytes);
|
||||
Assert.Equal(SOZipIndex.SOZIP_VERSION, index.Version);
|
||||
Assert.True(index.ChunkSize > 0);
|
||||
Assert.True(index.UncompressedSize > 0);
|
||||
Assert.True(index.OffsetCount > 0);
|
||||
|
||||
// Verify there's a corresponding data file
|
||||
var mainFileName = SOZipIndex.GetMainFileName(indexEntry.Key!);
|
||||
Assert.NotNull(mainFileName);
|
||||
Assert.Contains(allEntries, e => e.Key == mainFileName);
|
||||
}
|
||||
}
|
||||
|
||||
// Read and decompress the archive
|
||||
using (var stream = File.OpenRead(archivePath))
|
||||
{
|
||||
using var reader = ReaderFactory.Open(stream);
|
||||
reader.WriteAllToDirectory(
|
||||
SCRATCH_FILES_PATH,
|
||||
new ExtractionOptions { ExtractFullPath = true }
|
||||
);
|
||||
}
|
||||
|
||||
// Verify extracted files match originals
|
||||
VerifyFiles(true);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void CreateSOZipTestArchive()
|
||||
{
|
||||
// Create a SOZip test archive that can be committed to the repository
|
||||
var archivePath = Path.Combine(TEST_ARCHIVES_PATH, "Zip.sozip.zip");
|
||||
|
||||
using (var stream = File.Create(archivePath))
|
||||
{
|
||||
var options = new ZipWriterOptions(CompressionType.Deflate)
|
||||
{
|
||||
EnableSOZip = true,
|
||||
SOZipMinFileSize = 100, // Low threshold to ensure test content is optimized
|
||||
SOZipChunkSize = 1024, // Small chunks for testing
|
||||
LeaveStreamOpen = false,
|
||||
};
|
||||
|
||||
using var writer = new ZipWriter(stream, options);
|
||||
|
||||
// Create test content that's large enough to create multiple chunks
|
||||
var largeContent = new string('A', 5000); // 5KB of 'A's
|
||||
|
||||
// Write a file with enough data to be SOZip-optimized
|
||||
writer.Write(
|
||||
"data.txt",
|
||||
new MemoryStream(Encoding.UTF8.GetBytes(largeContent)),
|
||||
new ZipWriterEntryOptions()
|
||||
);
|
||||
|
||||
// Write a smaller file that won't be SOZip-optimized
|
||||
writer.Write(
|
||||
"small.txt",
|
||||
new MemoryStream(Encoding.UTF8.GetBytes("Small content")),
|
||||
new ZipWriterEntryOptions()
|
||||
);
|
||||
}
|
||||
|
||||
// Validate the archive was created
|
||||
Assert.True(File.Exists(archivePath));
|
||||
|
||||
// Validate it's a valid SOZip archive
|
||||
using (var stream = File.OpenRead(archivePath))
|
||||
{
|
||||
using var archive = ZipArchive.Open(stream);
|
||||
var entries = archive.Entries.ToList();
|
||||
|
||||
// Should have data file, small file, and index file
|
||||
Assert.Equal(3, entries.Count);
|
||||
|
||||
// Verify we have one SOZip index file
|
||||
var indexFiles = entries.Where(e => e.IsSozipIndexFile).ToList();
|
||||
Assert.Single(indexFiles);
|
||||
|
||||
// Verify the index file
|
||||
var indexEntry = indexFiles.First();
|
||||
Assert.Equal(".data.txt.sozip.idx", indexEntry.Key);
|
||||
|
||||
// Verify the data file can be read
|
||||
var dataEntry = entries.First(e => e.Key == "data.txt");
|
||||
using var dataStream = dataEntry.OpenEntryStream();
|
||||
using var reader = new StreamReader(dataStream);
|
||||
var content = reader.ReadToEnd();
|
||||
Assert.Equal(5000, content.Length);
|
||||
Assert.True(content.All(c => c == 'A'));
|
||||
}
|
||||
}
|
||||
}
|
||||
BIN
tests/TestArchives/Archives/Zip.sozip.zip
Normal file
BIN
tests/TestArchives/Archives/Zip.sozip.zip
Normal file
Binary file not shown.
Reference in New Issue
Block a user