Compare commits

...

10 Commits

Author SHA1 Message Date
copilot-swe-agent[bot]
b3ce90ae94 Remove foo.zip and add Zip.sozip.zip test archive with tests
Co-authored-by: adamhathcock <527620+adamhathcock@users.noreply.github.com>
2026-01-06 13:36:21 +00:00
Adam Hathcock
130e169862 Merge remote-tracking branch 'origin/master' into copilot/add-so-optimized-zip-support
# Conflicts:
#	Directory.Packages.props
#	FORMATS.md
#	build/packages.lock.json
2026-01-06 13:22:45 +00:00
copilot-swe-agent[bot]
0dc63223ab Merge master branch and resolve FORMATS.md conflict
Co-authored-by: adamhathcock <527620+adamhathcock@users.noreply.github.com>
2026-01-05 17:21:38 +00:00
Adam Hathcock
9058645fea sozip writing and validation 2025-11-27 10:49:19 +00:00
copilot-swe-agent[bot]
7339567880 Fix SOZip tests to work correctly with ZipReader and ZipArchive
Co-authored-by: adamhathcock <527620+adamhathcock@users.noreply.github.com>
2025-11-27 08:18:27 +00:00
Adam Hathcock
8c6d914004 reader tests don't pass or make sense 2025-11-26 15:11:03 +00:00
copilot-swe-agent[bot]
d9c9612b8f Update documentation for SOZip support
Co-authored-by: adamhathcock <527620+adamhathcock@users.noreply.github.com>
2025-11-26 08:31:49 +00:00
copilot-swe-agent[bot]
a35089900f Add SOZip detection in ZipEntry and additional tests
Co-authored-by: adamhathcock <527620+adamhathcock@users.noreply.github.com>
2025-11-26 08:27:16 +00:00
copilot-swe-agent[bot]
ac4bcd0fe3 Add SOZip index data structure and basic tests
Co-authored-by: adamhathcock <527620+adamhathcock@users.noreply.github.com>
2025-11-26 08:23:29 +00:00
copilot-swe-agent[bot]
0ac6b46379 Initial plan 2025-11-26 08:12:38 +00:00
13 changed files with 1354 additions and 7 deletions

View File

@@ -25,7 +25,7 @@
| 7Zip (4) | LZMA, LZMA2, BZip2, PPMd, BCJ, BCJ2, Deflate | Decompress | SevenZipArchive | N/A | N/A |
1. SOLID Rars are only supported in the RarReader API.
2. Zip format supports pkware and WinzipAES encryption. However, encrypted LZMA is not supported. Zip64 reading/writing is supported but only with seekable streams as the Zip spec doesn't support Zip64 data in post data descriptors. Deflate64 is only supported for reading. See [Zip Format Notes](#zip-format-notes) for details on multi-volume archives and streaming behavior.
2. Zip format supports pkware and WinzipAES encryption. However, encrypted LZMA is not supported. Zip64 reading/writing is supported but only with seekable streams as the Zip spec doesn't support Zip64 data in post data descriptors. Deflate64 is only supported for reading. SOZip (Seek-Optimized ZIP) detection is supported for reading. See [Zip Format Notes](#zip-format-notes) for details on multi-volume archives and streaming behavior.
3. The Tar format requires a file size in the header. If no size is specified to the TarWriter and the stream is not seekable, then an exception will be thrown.
4. The 7Zip format doesn't allow for reading as a forward-only stream so 7Zip is only supported through the Archive API. See [7Zip Format Notes](#7zip-format-notes) for details on async extraction behavior.
5. LZip has no support for extra data like the file name or timestamp. There is a default filename used when looking at the entry Key on the archive.

View File

@@ -15,6 +15,10 @@ internal enum ExtraDataType : ushort
UnicodePathExtraField = 0x7075,
Zip64ExtendedInformationExtraField = 0x0001,
UnixTimeExtraField = 0x5455,
// SOZip (Seek-Optimized ZIP) extra field
// Used to link a main file to its SOZip index file
SOZip = 0x564B,
}
internal class ExtraData
@@ -233,6 +237,44 @@ internal sealed class UnixTimeExtraField : ExtraData
}
}
/// <summary>
/// SOZip (Seek-Optimized ZIP) extra field that links a main file to its index file.
/// The extra field contains the offset within the ZIP file where the index entry's
/// local header is located.
/// </summary>
internal sealed class SOZipExtraField : ExtraData
{
public SOZipExtraField(ExtraDataType type, ushort length, byte[] dataBytes)
: base(type, length, dataBytes) { }
/// <summary>
/// Gets the offset to the SOZip index file's local entry header within the ZIP archive.
/// </summary>
internal ulong IndexOffset
{
get
{
if (DataBytes is null || DataBytes.Length < 8)
{
return 0;
}
return BinaryPrimitives.ReadUInt64LittleEndian(DataBytes);
}
}
/// <summary>
/// Creates a SOZip extra field with the specified index offset
/// </summary>
/// <param name="indexOffset">The offset to the index file's local entry header</param>
/// <returns>A new SOZipExtraField instance</returns>
public static SOZipExtraField Create(ulong indexOffset)
{
var data = new byte[8];
BinaryPrimitives.WriteUInt64LittleEndian(data, indexOffset);
return new SOZipExtraField(ExtraDataType.SOZip, 8, data);
}
}
internal static class LocalEntryHeaderExtraFactory
{
internal static ExtraData Create(ExtraDataType type, ushort length, byte[] extraData) =>
@@ -246,6 +288,7 @@ internal static class LocalEntryHeaderExtraFactory
ExtraDataType.Zip64ExtendedInformationExtraField =>
new Zip64ExtendedInformationExtraField(type, length, extraData),
ExtraDataType.UnixTimeExtraField => new UnixTimeExtraField(type, length, extraData),
ExtraDataType.SOZip => new SOZipExtraField(type, length, extraData),
_ => new ExtraData(type, length, extraData),
};
}

View File

@@ -0,0 +1,150 @@
using System;
using System.Collections.Generic;
using System.IO;
using SharpCompress.Compressors;
using SharpCompress.Compressors.Deflate;
namespace SharpCompress.Common.Zip.SOZip;
/// <summary>
/// A Deflate stream that inserts sync flush points at regular intervals
/// to enable random access (SOZip optimization).
/// </summary>
internal sealed class SOZipDeflateStream : Stream
{
private readonly DeflateStream _deflateStream;
private readonly Stream _baseStream;
private readonly uint _chunkSize;
private readonly List<ulong> _compressedOffsets = new();
private readonly long _baseOffset;
private long _uncompressedBytesWritten;
private long _nextSyncPoint;
private bool _disposed;
/// <summary>
/// Creates a new SOZip Deflate stream
/// </summary>
/// <param name="baseStream">The underlying stream to write to</param>
/// <param name="compressionLevel">The compression level</param>
/// <param name="chunkSize">The chunk size for sync flush points</param>
public SOZipDeflateStream(Stream baseStream, CompressionLevel compressionLevel, int chunkSize)
{
_baseStream = baseStream;
_chunkSize = (uint)chunkSize;
_baseOffset = baseStream.Position;
_nextSyncPoint = chunkSize;
// Record the first offset (start of compressed data)
_compressedOffsets.Add(0);
_deflateStream = new DeflateStream(baseStream, CompressionMode.Compress, compressionLevel);
}
/// <summary>
/// Gets the array of compressed offsets recorded during writing
/// </summary>
public ulong[] CompressedOffsets => _compressedOffsets.ToArray();
/// <summary>
/// Gets the total number of uncompressed bytes written
/// </summary>
public ulong UncompressedBytesWritten => (ulong)_uncompressedBytesWritten;
/// <summary>
/// Gets the total number of compressed bytes written
/// </summary>
public ulong CompressedBytesWritten => (ulong)(_baseStream.Position - _baseOffset);
/// <summary>
/// Gets the chunk size being used
/// </summary>
public uint ChunkSize => _chunkSize;
public override bool CanRead => false;
public override bool CanSeek => false;
public override bool CanWrite => !_disposed && _deflateStream.CanWrite;
public override long Length => throw new NotSupportedException();
public override long Position
{
get => throw new NotSupportedException();
set => throw new NotSupportedException();
}
public override void Flush() => _deflateStream.Flush();
public override int Read(byte[] buffer, int offset, int count) =>
throw new NotSupportedException();
public override long Seek(long offset, SeekOrigin origin) => throw new NotSupportedException();
public override void SetLength(long value) => throw new NotSupportedException();
public override void Write(byte[] buffer, int offset, int count)
{
if (_disposed)
{
throw new ObjectDisposedException(nameof(SOZipDeflateStream));
}
var remaining = count;
var currentOffset = offset;
while (remaining > 0)
{
// Calculate how many bytes until the next sync point
var bytesUntilSync = (int)(_nextSyncPoint - _uncompressedBytesWritten);
if (bytesUntilSync <= 0)
{
// We've reached a sync point - perform sync flush
PerformSyncFlush();
continue;
}
// Write up to the next sync point
var bytesToWrite = Math.Min(remaining, bytesUntilSync);
_deflateStream.Write(buffer, currentOffset, bytesToWrite);
_uncompressedBytesWritten += bytesToWrite;
currentOffset += bytesToWrite;
remaining -= bytesToWrite;
}
}
private void PerformSyncFlush()
{
// Flush with Z_SYNC_FLUSH to create an independent block
var originalFlushMode = _deflateStream.FlushMode;
_deflateStream.FlushMode = FlushType.Sync;
_deflateStream.Flush();
_deflateStream.FlushMode = originalFlushMode;
// Record the compressed offset for this sync point
var compressedOffset = (ulong)(_baseStream.Position - _baseOffset);
_compressedOffsets.Add(compressedOffset);
// Set the next sync point
_nextSyncPoint += _chunkSize;
}
protected override void Dispose(bool disposing)
{
if (_disposed)
{
return;
}
_disposed = true;
if (disposing)
{
_deflateStream.Dispose();
}
base.Dispose(disposing);
}
}

View File

@@ -0,0 +1,367 @@
using System;
using System.Buffers.Binary;
using System.IO;
namespace SharpCompress.Common.Zip.SOZip;
/// <summary>
/// Represents a SOZip (Seek-Optimized ZIP) index that enables random access
/// within DEFLATE-compressed files by storing offsets to sync flush points.
/// </summary>
/// <remarks>
/// SOZip index files (.sozip.idx) contain a header followed by offset entries
/// that point to the beginning of independently decompressable DEFLATE blocks.
/// </remarks>
[CLSCompliant(false)]
public sealed class SOZipIndex
{
/// <summary>
/// SOZip index file magic number: "SOZo" (0x534F5A6F)
/// </summary>
public const uint SOZIP_MAGIC = 0x6F5A4F53; // "SOZo" little-endian
/// <summary>
/// Current SOZip specification version
/// </summary>
public const byte SOZIP_VERSION = 1;
/// <summary>
/// Index file extension suffix
/// </summary>
public const string INDEX_EXTENSION = ".sozip.idx";
/// <summary>
/// Default chunk size in bytes (32KB)
/// </summary>
public const uint DEFAULT_CHUNK_SIZE = 32768;
/// <summary>
/// The version of the SOZip index format
/// </summary>
public byte Version { get; private set; }
/// <summary>
/// Size of each uncompressed chunk in bytes
/// </summary>
public uint ChunkSize { get; private set; }
/// <summary>
/// Total uncompressed size of the file
/// </summary>
public ulong UncompressedSize { get; private set; }
/// <summary>
/// Total compressed size of the file
/// </summary>
public ulong CompressedSize { get; private set; }
/// <summary>
/// Number of offset entries in the index
/// </summary>
public uint OffsetCount { get; private set; }
/// <summary>
/// Array of compressed offsets for each chunk
/// </summary>
public ulong[] CompressedOffsets { get; private set; } = Array.Empty<ulong>();
/// <summary>
/// Creates a new empty SOZip index
/// </summary>
public SOZipIndex() { }
/// <summary>
/// Creates a new SOZip index with specified parameters
/// </summary>
/// <param name="chunkSize">Size of each uncompressed chunk</param>
/// <param name="uncompressedSize">Total uncompressed size</param>
/// <param name="compressedSize">Total compressed size</param>
/// <param name="compressedOffsets">Array of compressed offsets</param>
public SOZipIndex(
uint chunkSize,
ulong uncompressedSize,
ulong compressedSize,
ulong[] compressedOffsets
)
{
Version = SOZIP_VERSION;
ChunkSize = chunkSize;
UncompressedSize = uncompressedSize;
CompressedSize = compressedSize;
OffsetCount = (uint)compressedOffsets.Length;
CompressedOffsets = compressedOffsets;
}
/// <summary>
/// Reads a SOZip index from a stream
/// </summary>
/// <param name="stream">The stream containing the index data</param>
/// <returns>A parsed SOZipIndex instance</returns>
/// <exception cref="InvalidDataException">If the stream doesn't contain valid SOZip index data</exception>
public static SOZipIndex Read(Stream stream)
{
var index = new SOZipIndex();
Span<byte> header = stackalloc byte[4];
// Read magic number
if (stream.Read(header) != 4)
{
throw new InvalidDataException("Invalid SOZip index: unable to read magic number");
}
var magic = BinaryPrimitives.ReadUInt32LittleEndian(header);
if (magic != SOZIP_MAGIC)
{
throw new InvalidDataException(
$"Invalid SOZip index: magic number mismatch (expected 0x{SOZIP_MAGIC:X8}, got 0x{magic:X8})"
);
}
// Read version
var versionByte = stream.ReadByte();
if (versionByte < 0)
{
throw new InvalidDataException("Invalid SOZip index: unable to read version");
}
index.Version = (byte)versionByte;
if (index.Version != SOZIP_VERSION)
{
throw new InvalidDataException(
$"Unsupported SOZip index version: {index.Version} (expected {SOZIP_VERSION})"
);
}
// Read reserved byte (padding)
stream.ReadByte();
// Read chunk size (2 bytes)
Span<byte> buf2 = stackalloc byte[2];
if (stream.Read(buf2) != 2)
{
throw new InvalidDataException("Invalid SOZip index: unable to read chunk size");
}
// Chunk size is stored as (actual_size / 1024) - 1
var chunkSizeEncoded = BinaryPrimitives.ReadUInt16LittleEndian(buf2);
index.ChunkSize = ((uint)chunkSizeEncoded + 1) * 1024;
// Read uncompressed size (8 bytes)
Span<byte> buf8 = stackalloc byte[8];
if (stream.Read(buf8) != 8)
{
throw new InvalidDataException("Invalid SOZip index: unable to read uncompressed size");
}
index.UncompressedSize = BinaryPrimitives.ReadUInt64LittleEndian(buf8);
// Read compressed size (8 bytes)
if (stream.Read(buf8) != 8)
{
throw new InvalidDataException("Invalid SOZip index: unable to read compressed size");
}
index.CompressedSize = BinaryPrimitives.ReadUInt64LittleEndian(buf8);
// Read offset count (4 bytes)
if (stream.Read(header) != 4)
{
throw new InvalidDataException("Invalid SOZip index: unable to read offset count");
}
index.OffsetCount = BinaryPrimitives.ReadUInt32LittleEndian(header);
// Read offsets
index.CompressedOffsets = new ulong[index.OffsetCount];
for (uint i = 0; i < index.OffsetCount; i++)
{
if (stream.Read(buf8) != 8)
{
throw new InvalidDataException($"Invalid SOZip index: unable to read offset {i}");
}
index.CompressedOffsets[i] = BinaryPrimitives.ReadUInt64LittleEndian(buf8);
}
return index;
}
/// <summary>
/// Reads a SOZip index from a byte array
/// </summary>
/// <param name="data">The byte array containing the index data</param>
/// <returns>A parsed SOZipIndex instance</returns>
public static SOZipIndex Read(byte[] data)
{
using var stream = new MemoryStream(data);
return Read(stream);
}
/// <summary>
/// Writes this SOZip index to a stream
/// </summary>
/// <param name="stream">The stream to write to</param>
public void Write(Stream stream)
{
Span<byte> buf8 = stackalloc byte[8];
// Write magic number
BinaryPrimitives.WriteUInt32LittleEndian(buf8, SOZIP_MAGIC);
stream.Write(buf8.Slice(0, 4));
// Write version
stream.WriteByte(SOZIP_VERSION);
// Write reserved byte (padding)
stream.WriteByte(0);
// Write chunk size (encoded as (size/1024)-1)
var chunkSizeEncoded = (ushort)((ChunkSize / 1024) - 1);
BinaryPrimitives.WriteUInt16LittleEndian(buf8, chunkSizeEncoded);
stream.Write(buf8.Slice(0, 2));
// Write uncompressed size
BinaryPrimitives.WriteUInt64LittleEndian(buf8, UncompressedSize);
stream.Write(buf8);
// Write compressed size
BinaryPrimitives.WriteUInt64LittleEndian(buf8, CompressedSize);
stream.Write(buf8);
// Write offset count
BinaryPrimitives.WriteUInt32LittleEndian(buf8, OffsetCount);
stream.Write(buf8.Slice(0, 4));
// Write offsets
foreach (var offset in CompressedOffsets)
{
BinaryPrimitives.WriteUInt64LittleEndian(buf8, offset);
stream.Write(buf8);
}
}
/// <summary>
/// Converts this SOZip index to a byte array
/// </summary>
/// <returns>Byte array containing the serialized index</returns>
public byte[] ToByteArray()
{
using var stream = new MemoryStream();
Write(stream);
return stream.ToArray();
}
/// <summary>
/// Gets the index of the chunk that contains the specified uncompressed offset
/// </summary>
/// <param name="uncompressedOffset">The uncompressed byte offset</param>
/// <returns>The chunk index</returns>
public int GetChunkIndex(long uncompressedOffset)
{
if (uncompressedOffset < 0 || (ulong)uncompressedOffset >= UncompressedSize)
{
throw new ArgumentOutOfRangeException(
nameof(uncompressedOffset),
"Offset is out of range"
);
}
return (int)((ulong)uncompressedOffset / ChunkSize);
}
/// <summary>
/// Gets the compressed offset for the specified chunk index
/// </summary>
/// <param name="chunkIndex">The chunk index</param>
/// <returns>The compressed byte offset for the start of the chunk</returns>
public ulong GetCompressedOffset(int chunkIndex)
{
if (chunkIndex < 0 || chunkIndex >= CompressedOffsets.Length)
{
throw new ArgumentOutOfRangeException(
nameof(chunkIndex),
"Chunk index is out of range"
);
}
return CompressedOffsets[chunkIndex];
}
/// <summary>
/// Gets the uncompressed offset for the start of the specified chunk
/// </summary>
/// <param name="chunkIndex">The chunk index</param>
/// <returns>The uncompressed byte offset for the start of the chunk</returns>
public ulong GetUncompressedOffset(int chunkIndex)
{
if (chunkIndex < 0 || chunkIndex >= CompressedOffsets.Length)
{
throw new ArgumentOutOfRangeException(
nameof(chunkIndex),
"Chunk index is out of range"
);
}
return (ulong)chunkIndex * ChunkSize;
}
/// <summary>
/// Gets the name of the SOZip index file for a given entry name
/// </summary>
/// <param name="entryName">The main entry name</param>
/// <returns>The index file name (hidden with .sozip.idx extension)</returns>
public static string GetIndexFileName(string entryName)
{
var directory = Path.GetDirectoryName(entryName);
var fileName = Path.GetFileName(entryName);
// The index file is hidden (prefixed with .)
var indexFileName = $".{fileName}{INDEX_EXTENSION}";
if (string.IsNullOrEmpty(directory))
{
return indexFileName;
}
return Path.Combine(directory, indexFileName).Replace('\\', '/');
}
/// <summary>
/// Checks if a file name is a SOZip index file
/// </summary>
/// <param name="fileName">The file name to check</param>
/// <returns>True if the file is a SOZip index file</returns>
public static bool IsIndexFile(string fileName)
{
if (string.IsNullOrEmpty(fileName))
{
return false;
}
var name = Path.GetFileName(fileName);
return name.StartsWith(".", StringComparison.Ordinal)
&& name.EndsWith(INDEX_EXTENSION, StringComparison.OrdinalIgnoreCase);
}
/// <summary>
/// Gets the main file name from a SOZip index file name
/// </summary>
/// <param name="indexFileName">The index file name</param>
/// <returns>The main file name, or null if not a valid index file</returns>
public static string? GetMainFileName(string indexFileName)
{
if (!IsIndexFile(indexFileName))
{
return null;
}
var directory = Path.GetDirectoryName(indexFileName);
var name = Path.GetFileName(indexFileName);
// Remove leading '.' and trailing '.sozip.idx'
var mainName = name.Substring(1, name.Length - 1 - INDEX_EXTENSION.Length);
if (string.IsNullOrEmpty(directory))
{
return mainName;
}
return Path.Combine(directory, mainName).Replace('\\', '/');
}
}

View File

@@ -2,6 +2,7 @@ using System;
using System.Collections.Generic;
using System.Linq;
using SharpCompress.Common.Zip.Headers;
using SharpCompress.Common.Zip.SOZip;
namespace SharpCompress.Common.Zip;
@@ -11,7 +12,7 @@ public class ZipEntry : Entry
internal ZipEntry(ZipFilePart? filePart)
{
if (filePart == null)
if (filePart is null)
{
return;
}
@@ -88,4 +89,24 @@ public class ZipEntry : Entry
public override int? Attrib => (int?)_filePart?.Header.ExternalFileAttributes;
public string? Comment => _filePart?.Header.Comment;
/// <summary>
/// Gets a value indicating whether this entry has SOZip (Seek-Optimized ZIP) support.
/// A SOZip entry has an associated index file that enables random access within
/// the compressed data.
/// </summary>
public bool IsSozip => _filePart?.Header.Extra.Any(e => e.Type == ExtraDataType.SOZip) ?? false;
/// <summary>
/// Gets a value indicating whether this entry is a SOZip index file.
/// Index files are hidden files with a .sozip.idx extension that contain
/// offsets into the main compressed file.
/// </summary>
public bool IsSozipIndexFile => Key is not null && SOZipIndex.IsIndexFile(Key);
/// <summary>
/// Gets the SOZip extra field data, if present.
/// </summary>
internal SOZipExtraField? SOZipExtra =>
_filePart?.Header.Extra.OfType<SOZipExtraField>().FirstOrDefault();
}

View File

@@ -34,6 +34,7 @@ internal class ZipCentralDirectoryEntry
internal ulong Decompressed { get; set; }
internal ushort Zip64HeaderOffset { get; set; }
internal ulong HeaderOffset { get; }
internal string FileName => fileName;
internal uint Write(Stream outputStream)
{

View File

@@ -8,6 +8,7 @@ using System.Threading.Tasks;
using SharpCompress.Common;
using SharpCompress.Common.Zip;
using SharpCompress.Common.Zip.Headers;
using SharpCompress.Common.Zip.SOZip;
using SharpCompress.Compressors;
using SharpCompress.Compressors.BZip2;
using SharpCompress.Compressors.Deflate;
@@ -27,12 +28,19 @@ public class ZipWriter : AbstractWriter
private long streamPosition;
private PpmdProperties? ppmdProps;
private readonly bool isZip64;
private readonly bool enableSOZip;
private readonly int sozipChunkSize;
private readonly long sozipMinFileSize;
public ZipWriter(Stream destination, ZipWriterOptions zipWriterOptions)
: base(ArchiveType.Zip, zipWriterOptions)
{
zipComment = zipWriterOptions.ArchiveComment ?? string.Empty;
isZip64 = zipWriterOptions.UseZip64;
enableSOZip = zipWriterOptions.EnableSOZip;
sozipChunkSize = zipWriterOptions.SOZipChunkSize;
sozipMinFileSize = zipWriterOptions.SOZipMinFileSize;
if (destination.CanSeek)
{
streamPosition = destination.Position;
@@ -117,12 +125,21 @@ public class ZipWriter : AbstractWriter
var headersize = (uint)WriteHeader(entryPath, options, entry, useZip64);
streamPosition += headersize;
// Determine if SOZip should be used for this entry
var useSozip =
(options.EnableSOZip ?? enableSOZip)
&& compression == ZipCompressionMethod.Deflate
&& OutputStream.CanSeek;
return new ZipWritingStream(
this,
OutputStream.NotNull(),
entry,
compression,
options.CompressionLevel ?? compressionLevel
options.CompressionLevel ?? compressionLevel,
useSozip,
useSozip ? sozipChunkSize : 0
);
}
@@ -304,6 +321,64 @@ public class ZipWriter : AbstractWriter
OutputStream.Write(intBuf);
}
private void WriteSozipIndexFile(
ZipCentralDirectoryEntry dataEntry,
SOZipDeflateStream sozipStream
)
{
var indexFileName = SOZipIndex.GetIndexFileName(dataEntry.FileName);
// Create the SOZip index
var index = new SOZipIndex(
chunkSize: sozipStream.ChunkSize,
uncompressedSize: sozipStream.UncompressedBytesWritten,
compressedSize: sozipStream.CompressedBytesWritten,
compressedOffsets: sozipStream.CompressedOffsets
);
var indexBytes = index.ToByteArray();
// Calculate CRC for index data
var crc = new CRC32();
crc.SlurpBlock(indexBytes, 0, indexBytes.Length);
var indexCrc = (uint)crc.Crc32Result;
// Write the index file as a stored (uncompressed) entry
var indexEntry = new ZipCentralDirectoryEntry(
ZipCompressionMethod.None,
indexFileName,
(ulong)streamPosition,
WriterOptions.ArchiveEncoding
)
{
ModificationTime = DateTime.Now,
};
// Write the local file header for index
var indexOptions = new ZipWriterEntryOptions { CompressionType = CompressionType.None };
var headerSize = (uint)WriteHeader(indexFileName, indexOptions, indexEntry, isZip64);
streamPosition += headerSize;
// Write the index data directly
OutputStream.Write(indexBytes, 0, indexBytes.Length);
// Finalize the index entry
indexEntry.Crc = indexCrc;
indexEntry.Compressed = (ulong)indexBytes.Length;
indexEntry.Decompressed = (ulong)indexBytes.Length;
if (OutputStream.CanSeek)
{
// Update the header with sizes and CRC
OutputStream.Position = (long)(indexEntry.HeaderOffset + 14);
WriteFooter(indexCrc, (uint)indexBytes.Length, (uint)indexBytes.Length);
OutputStream.Position = streamPosition + indexBytes.Length;
}
streamPosition += indexBytes.Length;
entries.Add(indexEntry);
}
private void WriteEndRecord(ulong size)
{
var zip64EndOfCentralDirectoryNeeded =
@@ -385,7 +460,10 @@ public class ZipWriter : AbstractWriter
private readonly ZipWriter writer;
private readonly ZipCompressionMethod zipCompressionMethod;
private readonly int compressionLevel;
private readonly bool useSozip;
private readonly int sozipChunkSize;
private SharpCompressStream? counting;
private SOZipDeflateStream? sozipStream;
private ulong decompressed;
// Flag to prevent throwing exceptions on Dispose
@@ -397,7 +475,9 @@ public class ZipWriter : AbstractWriter
Stream originalStream,
ZipCentralDirectoryEntry entry,
ZipCompressionMethod zipCompressionMethod,
int compressionLevel
int compressionLevel,
bool useSozip = false,
int sozipChunkSize = 0
)
{
this.writer = writer;
@@ -406,6 +486,8 @@ public class ZipWriter : AbstractWriter
this.entry = entry;
this.zipCompressionMethod = zipCompressionMethod;
this.compressionLevel = compressionLevel;
this.useSozip = useSozip;
this.sozipChunkSize = sozipChunkSize;
writeStream = GetWriteStream(originalStream);
}
@@ -435,6 +517,15 @@ public class ZipWriter : AbstractWriter
}
case ZipCompressionMethod.Deflate:
{
if (useSozip && sozipChunkSize > 0)
{
sozipStream = new SOZipDeflateStream(
counting,
(CompressionLevel)compressionLevel,
sozipChunkSize
);
return sozipStream;
}
return new DeflateStream(
counting,
CompressionMode.Compress,
@@ -581,7 +672,18 @@ public class ZipWriter : AbstractWriter
writer.WriteFooter(entry.Crc, compressedvalue, decompressedvalue);
writer.streamPosition += (long)entry.Compressed + 16;
}
writer.entries.Add(entry);
// Write SOZip index file if SOZip was used and file meets minimum size
if (
useSozip
&& sozipStream is not null
&& entry.Decompressed >= (ulong)writer.sozipMinFileSize
)
{
writer.WriteSozipIndexFile(entry, sozipStream);
}
}
}

View File

@@ -49,4 +49,11 @@ public class ZipWriterEntryOptions
/// This option is not supported with non-seekable streams.
/// </summary>
public bool? EnableZip64 { get; set; }
/// <summary>
/// Enable or disable SOZip (Seek-Optimized ZIP) for this entry.
/// When null, uses the archive's default setting.
/// SOZip is only applicable to Deflate-compressed files on seekable streams.
/// </summary>
public bool? EnableSOZip { get; set; }
}

View File

@@ -1,5 +1,6 @@
using System;
using SharpCompress.Common;
using SharpCompress.Common.Zip.SOZip;
using SharpCompress.Compressors.Deflate;
using D = SharpCompress.Compressors.Deflate;
@@ -24,6 +25,9 @@ public class ZipWriterOptions : WriterOptions
{
UseZip64 = writerOptions.UseZip64;
ArchiveComment = writerOptions.ArchiveComment;
EnableSOZip = writerOptions.EnableSOZip;
SOZipChunkSize = writerOptions.SOZipChunkSize;
SOZipMinFileSize = writerOptions.SOZipMinFileSize;
}
}
@@ -80,4 +84,27 @@ public class ZipWriterOptions : WriterOptions
/// are less than 4GiB in length.
/// </summary>
public bool UseZip64 { get; set; }
/// <summary>
/// Enables SOZip (Seek-Optimized ZIP) for Deflate-compressed files.
/// When enabled, files that meet the minimum size requirement will have
/// an accompanying index file that allows random access within the
/// compressed data. Requires a seekable output stream.
/// </summary>
public bool EnableSOZip { get; set; }
/// <summary>
/// The chunk size for SOZip index creation in bytes.
/// Must be a multiple of 1024 bytes. Default is 32KB (32768 bytes).
/// Smaller chunks allow for finer-grained random access but result
/// in larger index files and slightly less efficient compression.
/// </summary>
public int SOZipChunkSize { get; set; } = (int)SOZipIndex.DEFAULT_CHUNK_SIZE;
/// <summary>
/// Minimum file size (uncompressed) in bytes for SOZip optimization.
/// Files smaller than this size will not have SOZip index files created.
/// Default is 1MB (1048576 bytes).
/// </summary>
public long SOZipMinFileSize { get; set; } = 1048576;
}

View File

@@ -3,6 +3,7 @@ using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;
using SharpCompress.Common.Zip.SOZip;
using SharpCompress.Readers;
using Xunit;
@@ -45,7 +46,7 @@ public class TestBase : IDisposable
public void Dispose() => Directory.Delete(SCRATCH_BASE_PATH, true);
public void VerifyFiles()
public void VerifyFiles(bool skipSoIndexes = false)
{
if (UseExtensionInsteadOfNameToVerify)
{
@@ -53,7 +54,7 @@ public class TestBase : IDisposable
}
else
{
VerifyFilesByName();
VerifyFilesByName(skipSoIndexes);
}
}
@@ -72,10 +73,23 @@ public class TestBase : IDisposable
}
}
protected void VerifyFilesByName()
private void VerifyFilesByName(bool skipSoIndexes)
{
var extracted = Directory
.EnumerateFiles(SCRATCH_FILES_PATH, "*.*", SearchOption.AllDirectories)
.Where(x =>
{
if (
skipSoIndexes
&& Path.GetFileName(x)
.EndsWith(SOZipIndex.INDEX_EXTENSION, StringComparison.OrdinalIgnoreCase)
)
{
return false;
}
return true;
})
.ToLookup(path => path.Substring(SCRATCH_FILES_PATH.Length));
var original = Directory
.EnumerateFiles(ORIGINAL_FILES_PATH, "*.*", SearchOption.AllDirectories)

View File

@@ -0,0 +1,257 @@
using System.IO;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using SharpCompress.Archives.Zip;
using SharpCompress.Common;
using SharpCompress.Common.Zip.SOZip;
using SharpCompress.Readers.Zip;
using SharpCompress.Test.Mocks;
using SharpCompress.Writers;
using SharpCompress.Writers.Zip;
using Xunit;
namespace SharpCompress.Test.Zip;
public class SoZipReaderTests : TestBase
{
[Fact]
public async Task SOZip_Reader_RegularZip_NoSozipEntries()
{
// Regular zip files should not have SOZip entries
var path = Path.Combine(TEST_ARCHIVES_PATH, "Zip.deflate.zip");
using Stream stream = new ForwardOnlyStream(File.OpenRead(path));
using var reader = ZipReader.Open(stream);
while (await reader.MoveToNextEntryAsync())
{
// Regular zip entries should NOT be SOZip
Assert.False(reader.Entry.IsSozip, $"Entry {reader.Entry.Key} should not be SOZip");
Assert.False(
reader.Entry.IsSozipIndexFile,
$"Entry {reader.Entry.Key} should not be a SOZip index file"
);
}
}
[Fact]
public void SOZip_Archive_RegularZip_NoSozipEntries()
{
// Regular zip files should not have SOZip entries
var path = Path.Combine(TEST_ARCHIVES_PATH, "Zip.deflate.zip");
using Stream stream = File.OpenRead(path);
using var archive = ZipArchive.Open(stream);
foreach (var entry in archive.Entries)
{
// Regular zip entries should NOT be SOZip
Assert.False(entry.IsSozip, $"Entry {entry.Key} should not be SOZip");
Assert.False(
entry.IsSozipIndexFile,
$"Entry {entry.Key} should not be a SOZip index file"
);
}
}
[Fact]
public void SOZip_Archive_ReadSOZipFile()
{
// Read the SOZip test archive
var path = Path.Combine(TEST_ARCHIVES_PATH, "Zip.sozip.zip");
using Stream stream = File.OpenRead(path);
using var archive = ZipArchive.Open(stream);
var entries = archive.Entries.ToList();
// Should have 3 entries: data.txt, .data.txt.sozip.idx, and small.txt
Assert.Equal(3, entries.Count);
// Verify we have one SOZip index file
var indexFiles = entries.Where(e => e.IsSozipIndexFile).ToList();
Assert.Single(indexFiles);
Assert.Equal(".data.txt.sozip.idx", indexFiles[0].Key);
// Verify the index file is not compressed
Assert.Equal(CompressionType.None, indexFiles[0].CompressionType);
// Read and validate the index
using (var indexStream = indexFiles[0].OpenEntryStream())
{
using var memStream = new MemoryStream();
indexStream.CopyTo(memStream);
var indexBytes = memStream.ToArray();
var index = SOZipIndex.Read(indexBytes);
Assert.Equal(SOZipIndex.SOZIP_VERSION, index.Version);
Assert.Equal(1024u, index.ChunkSize); // As set in CreateSOZipTestArchive
Assert.True(index.UncompressedSize > 0);
Assert.True(index.OffsetCount > 0);
}
// Verify the data file can be read correctly
var dataEntry = entries.First(e => e.Key == "data.txt");
using (var dataStream = dataEntry.OpenEntryStream())
{
using var reader = new StreamReader(dataStream);
var content = reader.ReadToEnd();
Assert.Equal(5000, content.Length);
Assert.True(content.All(c => c == 'A'));
}
// Verify the small file
var smallEntry = entries.First(e => e.Key == "small.txt");
Assert.False(smallEntry.IsSozipIndexFile);
using (var smallStream = smallEntry.OpenEntryStream())
{
using var reader = new StreamReader(smallStream);
var content = reader.ReadToEnd();
Assert.Equal("Small content", content);
}
}
[Fact]
public async Task SOZip_Reader_ReadSOZipFile()
{
// Read the SOZip test archive with ZipReader
var path = Path.Combine(TEST_ARCHIVES_PATH, "Zip.sozip.zip");
using Stream stream = new ForwardOnlyStream(File.OpenRead(path));
using var reader = ZipReader.Open(stream);
var foundData = false;
var foundIndex = false;
var foundSmall = false;
while (await reader.MoveToNextEntryAsync())
{
if (reader.Entry.Key == "data.txt")
{
foundData = true;
Assert.False(reader.Entry.IsSozipIndexFile);
using var entryStream = reader.OpenEntryStream();
using var streamReader = new StreamReader(entryStream);
var content = streamReader.ReadToEnd();
Assert.Equal(5000, content.Length);
Assert.True(content.All(c => c == 'A'));
}
else if (reader.Entry.Key == ".data.txt.sozip.idx")
{
foundIndex = true;
Assert.True(reader.Entry.IsSozipIndexFile);
using var indexStream = reader.OpenEntryStream();
using var memStream = new MemoryStream();
await indexStream.CopyToAsync(memStream);
var indexBytes = memStream.ToArray();
var index = SOZipIndex.Read(indexBytes);
Assert.Equal(SOZipIndex.SOZIP_VERSION, index.Version);
}
else if (reader.Entry.Key == "small.txt")
{
foundSmall = true;
Assert.False(reader.Entry.IsSozipIndexFile);
}
}
Assert.True(foundData, "data.txt entry not found");
Assert.True(foundIndex, ".data.txt.sozip.idx entry not found");
Assert.True(foundSmall, "small.txt entry not found");
}
[Fact]
public void SOZip_Archive_DetectsIndexFileByName()
{
// Create a zip with a SOZip index file (by name pattern)
using var memoryStream = new MemoryStream();
using (
var writer = WriterFactory.Open(
memoryStream,
ArchiveType.Zip,
new ZipWriterOptions(CompressionType.Deflate) { LeaveStreamOpen = true }
)
)
{
// Write a regular file
writer.Write("test.txt", new MemoryStream(Encoding.UTF8.GetBytes("Hello World")));
// Write a file that looks like a SOZip index (by name pattern)
var indexData = new SOZipIndex(
chunkSize: 32768,
uncompressedSize: 100,
compressedSize: 50,
compressedOffsets: new ulong[] { 0 }
);
writer.Write(".test.txt.sozip.idx", new MemoryStream(indexData.ToByteArray()));
}
memoryStream.Position = 0;
// Test with ZipArchive
using var archive = ZipArchive.Open(memoryStream);
var entries = archive.Entries.ToList();
Assert.Equal(2, entries.Count);
var regularEntry = entries.First(e => e.Key == "test.txt");
Assert.False(regularEntry.IsSozipIndexFile);
Assert.False(regularEntry.IsSozip); // No SOZip extra field
var indexEntry = entries.First(e => e.Key == ".test.txt.sozip.idx");
Assert.True(indexEntry.IsSozipIndexFile);
}
[Fact]
public async Task SOZip_Reader_DetectsIndexFileByName()
{
// Create a zip with a SOZip index file (by name pattern)
using var memoryStream = new MemoryStream();
using (
var writer = WriterFactory.Open(
memoryStream,
ArchiveType.Zip,
new ZipWriterOptions(CompressionType.Deflate) { LeaveStreamOpen = true }
)
)
{
// Write a regular file
writer.Write("test.txt", new MemoryStream(Encoding.UTF8.GetBytes("Hello World")));
// Write a file that looks like a SOZip index (by name pattern)
var indexData = new SOZipIndex(
chunkSize: 32768,
uncompressedSize: 100,
compressedSize: 50,
compressedOffsets: new ulong[] { 0 }
);
writer.Write(".test.txt.sozip.idx", new MemoryStream(indexData.ToByteArray()));
}
memoryStream.Position = 0;
// Test with ZipReader
using Stream stream = new ForwardOnlyStream(memoryStream);
using var reader = ZipReader.Open(stream);
var foundRegular = false;
var foundIndex = false;
while (await reader.MoveToNextEntryAsync())
{
if (reader.Entry.Key == "test.txt")
{
foundRegular = true;
Assert.False(reader.Entry.IsSozipIndexFile);
Assert.False(reader.Entry.IsSozip);
}
else if (reader.Entry.Key == ".test.txt.sozip.idx")
{
foundIndex = true;
Assert.True(reader.Entry.IsSozipIndexFile);
}
}
Assert.True(foundRegular, "Regular entry not found");
Assert.True(foundIndex, "Index entry not found");
}
}

View File

@@ -0,0 +1,358 @@
using System;
using System.IO;
using System.Linq;
using System.Text;
using SharpCompress.Archives.Zip;
using SharpCompress.Common;
using SharpCompress.Common.Zip.SOZip;
using SharpCompress.Readers;
using SharpCompress.Writers;
using SharpCompress.Writers.Zip;
using Xunit;
namespace SharpCompress.Test.Zip;
public class SoZipWriterTests : TestBase
{
[Fact]
public void SOZipIndex_RoundTrip()
{
// Create an index
var offsets = new ulong[] { 0, 1024, 2048, 3072 };
var originalIndex = new SOZipIndex(
chunkSize: 32768,
uncompressedSize: 100000,
compressedSize: 50000,
compressedOffsets: offsets
);
// Serialize to bytes
var bytes = originalIndex.ToByteArray();
// Deserialize back
var parsedIndex = SOZipIndex.Read(bytes);
// Verify all fields
Assert.Equal(SOZipIndex.SOZIP_VERSION, parsedIndex.Version);
Assert.Equal(32768u, parsedIndex.ChunkSize);
Assert.Equal(100000ul, parsedIndex.UncompressedSize);
Assert.Equal(50000ul, parsedIndex.CompressedSize);
Assert.Equal(4u, parsedIndex.OffsetCount);
Assert.Equal(offsets, parsedIndex.CompressedOffsets);
}
[Fact]
public void SOZipIndex_Read_InvalidMagic_ThrowsException()
{
var invalidData = new byte[] { 0x00, 0x00, 0x00, 0x00 };
var exception = Assert.Throws<InvalidDataException>(() => SOZipIndex.Read(invalidData));
Assert.Contains("magic number mismatch", exception.Message);
}
[Fact]
public void SOZipIndex_GetChunkIndex()
{
var offsets = new ulong[] { 0, 1000, 2000, 3000, 4000 };
var index = new SOZipIndex(
chunkSize: 32768,
uncompressedSize: 163840, // 5 * 32768
compressedSize: 5000,
compressedOffsets: offsets
);
Assert.Equal(0, index.GetChunkIndex(0));
Assert.Equal(0, index.GetChunkIndex(32767));
Assert.Equal(1, index.GetChunkIndex(32768));
Assert.Equal(2, index.GetChunkIndex(65536));
Assert.Equal(4, index.GetChunkIndex(163839));
}
[Fact]
public void SOZipIndex_GetCompressedOffset()
{
var offsets = new ulong[] { 0, 1000, 2000, 3000, 4000 };
var index = new SOZipIndex(
chunkSize: 32768,
uncompressedSize: 163840,
compressedSize: 5000,
compressedOffsets: offsets
);
Assert.Equal(0ul, index.GetCompressedOffset(0));
Assert.Equal(1000ul, index.GetCompressedOffset(1));
Assert.Equal(2000ul, index.GetCompressedOffset(2));
Assert.Equal(3000ul, index.GetCompressedOffset(3));
Assert.Equal(4000ul, index.GetCompressedOffset(4));
}
[Fact]
public void SOZipIndex_GetUncompressedOffset()
{
var offsets = new ulong[] { 0, 1000, 2000, 3000, 4000 };
var index = new SOZipIndex(
chunkSize: 32768,
uncompressedSize: 163840,
compressedSize: 5000,
compressedOffsets: offsets
);
Assert.Equal(0ul, index.GetUncompressedOffset(0));
Assert.Equal(32768ul, index.GetUncompressedOffset(1));
Assert.Equal(65536ul, index.GetUncompressedOffset(2));
Assert.Equal(98304ul, index.GetUncompressedOffset(3));
Assert.Equal(131072ul, index.GetUncompressedOffset(4));
}
[Fact]
public void SOZipIndex_GetIndexFileName()
{
Assert.Equal(".file.txt.sozip.idx", SOZipIndex.GetIndexFileName("file.txt"));
Assert.Equal("dir/.file.txt.sozip.idx", SOZipIndex.GetIndexFileName("dir/file.txt"));
Assert.Equal("a/b/.file.txt.sozip.idx", SOZipIndex.GetIndexFileName("a/b/file.txt"));
}
[Fact]
public void SOZipIndex_IsIndexFile()
{
Assert.True(SOZipIndex.IsIndexFile(".file.txt.sozip.idx"));
Assert.True(SOZipIndex.IsIndexFile("dir/.file.txt.sozip.idx"));
Assert.True(SOZipIndex.IsIndexFile(".test.sozip.idx"));
Assert.False(SOZipIndex.IsIndexFile("file.txt"));
Assert.False(SOZipIndex.IsIndexFile("file.sozip.idx")); // Missing leading dot
Assert.False(SOZipIndex.IsIndexFile(".file.txt")); // Missing .sozip.idx
Assert.False(SOZipIndex.IsIndexFile(""));
Assert.False(SOZipIndex.IsIndexFile(null!));
}
[Fact]
public void SOZipIndex_GetMainFileName()
{
Assert.Equal("file.txt", SOZipIndex.GetMainFileName(".file.txt.sozip.idx"));
Assert.Equal("dir/file.txt", SOZipIndex.GetMainFileName("dir/.file.txt.sozip.idx"));
Assert.Equal("test", SOZipIndex.GetMainFileName(".test.sozip.idx"));
Assert.Null(SOZipIndex.GetMainFileName("file.txt"));
Assert.Null(SOZipIndex.GetMainFileName(""));
}
[Fact]
public void ZipEntry_IsSozipIndexFile_Detection()
{
// Create a zip with a file that has a SOZip index file name pattern
using var memoryStream = new MemoryStream();
using (
var writer = WriterFactory.Open(
memoryStream,
ArchiveType.Zip,
new ZipWriterOptions(CompressionType.Deflate) { LeaveStreamOpen = true }
)
)
{
// Write a regular file
writer.Write("test.txt", new MemoryStream(Encoding.UTF8.GetBytes("Hello World")));
// Write a file with SOZip index name pattern
var indexData = new SOZipIndex(
chunkSize: 32768,
uncompressedSize: 100,
compressedSize: 50,
compressedOffsets: new ulong[] { 0 }
);
writer.Write(".test.txt.sozip.idx", new MemoryStream(indexData.ToByteArray()));
}
memoryStream.Position = 0;
using var archive = ZipArchive.Open(memoryStream);
var entries = archive.Entries.ToList();
Assert.Equal(2, entries.Count);
var regularEntry = entries.First(e => e.Key == "test.txt");
Assert.False(regularEntry.IsSozipIndexFile);
Assert.False(regularEntry.IsSozip); // No SOZip extra field
var indexEntry = entries.First(e => e.Key == ".test.txt.sozip.idx");
Assert.True(indexEntry.IsSozipIndexFile);
}
[Fact]
public void ZipWriterOptions_SOZipDefaults()
{
var options = new ZipWriterOptions(CompressionType.Deflate);
Assert.False(options.EnableSOZip);
Assert.Equal((int)SOZipIndex.DEFAULT_CHUNK_SIZE, options.SOZipChunkSize);
Assert.Equal(1048576L, options.SOZipMinFileSize); // 1MB
}
[Fact]
public void ZipWriterEntryOptions_SOZipDefaults()
{
var options = new ZipWriterEntryOptions();
Assert.Null(options.EnableSOZip);
}
[Fact]
public void SOZip_RoundTrip_CompressAndDecompress()
{
// Create a SOZip archive from Original files
var archivePath = Path.Combine(SCRATCH2_FILES_PATH, "test.sozip.zip");
using (var stream = File.Create(archivePath))
{
var options = new ZipWriterOptions(CompressionType.Deflate)
{
EnableSOZip = true,
SOZipMinFileSize = 1024, // 1KB to ensure test files qualify
LeaveStreamOpen = false,
};
using var writer = new ZipWriter(stream, options);
// Write all files from Original directory
var files = Directory.GetFiles(ORIGINAL_FILES_PATH, "*", SearchOption.AllDirectories);
foreach (var filePath in files)
{
var relativePath = filePath
.Substring(ORIGINAL_FILES_PATH.Length + 1)
.Replace('\\', '/');
using var fileStream = File.OpenRead(filePath);
writer.Write(relativePath, fileStream, new ZipWriterEntryOptions());
}
}
// Validate the archive was created and has files
Assert.True(File.Exists(archivePath));
// Validate the archive has SOZip entries
using (var stream = File.OpenRead(archivePath))
{
using var archive = ZipArchive.Open(stream);
var allEntries = archive.Entries.ToList();
// Archive should have files
Assert.NotEmpty(allEntries);
var sozipIndexEntries = allEntries.Where(e => e.IsSozipIndexFile).ToList();
// Should have at least one SOZip index file
Assert.NotEmpty(sozipIndexEntries);
// Verify index files have valid SOZip index data
foreach (var indexEntry in sozipIndexEntries)
{
// Check that the entry is stored (not compressed)
Assert.Equal(CompressionType.None, indexEntry.CompressionType);
using var indexStream = indexEntry.OpenEntryStream();
using var memStream = new MemoryStream();
indexStream.CopyTo(memStream);
var indexBytes = memStream.ToArray();
// Debug: Check first 4 bytes
Assert.True(
indexBytes.Length >= 4,
$"Index file too small: {indexBytes.Length} bytes"
);
// Should be able to parse the index without exception
var index = SOZipIndex.Read(indexBytes);
Assert.Equal(SOZipIndex.SOZIP_VERSION, index.Version);
Assert.True(index.ChunkSize > 0);
Assert.True(index.UncompressedSize > 0);
Assert.True(index.OffsetCount > 0);
// Verify there's a corresponding data file
var mainFileName = SOZipIndex.GetMainFileName(indexEntry.Key!);
Assert.NotNull(mainFileName);
Assert.Contains(allEntries, e => e.Key == mainFileName);
}
}
// Read and decompress the archive
using (var stream = File.OpenRead(archivePath))
{
using var reader = ReaderFactory.Open(stream);
reader.WriteAllToDirectory(
SCRATCH_FILES_PATH,
new ExtractionOptions { ExtractFullPath = true }
);
}
// Verify extracted files match originals
VerifyFiles(true);
}
[Fact]
public void CreateSOZipTestArchive()
{
// Create a SOZip test archive that can be committed to the repository
var archivePath = Path.Combine(TEST_ARCHIVES_PATH, "Zip.sozip.zip");
using (var stream = File.Create(archivePath))
{
var options = new ZipWriterOptions(CompressionType.Deflate)
{
EnableSOZip = true,
SOZipMinFileSize = 100, // Low threshold to ensure test content is optimized
SOZipChunkSize = 1024, // Small chunks for testing
LeaveStreamOpen = false,
};
using var writer = new ZipWriter(stream, options);
// Create test content that's large enough to create multiple chunks
var largeContent = new string('A', 5000); // 5KB of 'A's
// Write a file with enough data to be SOZip-optimized
writer.Write(
"data.txt",
new MemoryStream(Encoding.UTF8.GetBytes(largeContent)),
new ZipWriterEntryOptions()
);
// Write a smaller file that won't be SOZip-optimized
writer.Write(
"small.txt",
new MemoryStream(Encoding.UTF8.GetBytes("Small content")),
new ZipWriterEntryOptions()
);
}
// Validate the archive was created
Assert.True(File.Exists(archivePath));
// Validate it's a valid SOZip archive
using (var stream = File.OpenRead(archivePath))
{
using var archive = ZipArchive.Open(stream);
var entries = archive.Entries.ToList();
// Should have data file, small file, and index file
Assert.Equal(3, entries.Count);
// Verify we have one SOZip index file
var indexFiles = entries.Where(e => e.IsSozipIndexFile).ToList();
Assert.Single(indexFiles);
// Verify the index file
var indexEntry = indexFiles.First();
Assert.Equal(".data.txt.sozip.idx", indexEntry.Key);
// Verify the data file can be read
var dataEntry = entries.First(e => e.Key == "data.txt");
using var dataStream = dataEntry.OpenEntryStream();
using var reader = new StreamReader(dataStream);
var content = reader.ReadToEnd();
Assert.Equal(5000, content.Length);
Assert.True(content.All(c => c == 'A'));
}
}
}

Binary file not shown.