15 Commits
0.4.4 ... 0.5.1

Author SHA1 Message Date
Matt Nadareski
e217f4109c Bump version 2024-04-26 21:42:53 -04:00
Matt Nadareski
882f2c5335 Update SabreTools.IO 2024-04-26 21:41:13 -04:00
Matt Nadareski
3f6c7dc0d6 Bump version 2024-04-24 10:11:31 -04:00
Matt Nadareski
bf413cbb85 Update SabreTools.Models 2024-04-24 10:08:00 -04:00
Matt Nadareski
7a403cf368 Make Linux publish script executable 2024-04-24 01:34:40 -04:00
Matt Nadareski
aec4611d14 Add publish scripts 2024-04-24 01:33:42 -04:00
Matt Nadareski
7052584cea Add incomplete compressions to table 2024-04-24 01:30:47 -04:00
Matt Nadareski
efe6c545b9 Add a couple of tags 2024-04-24 01:29:30 -04:00
Matt Nadareski
612a8b3c83 Use constants that were defined 2024-04-24 00:50:35 -04:00
Matt Nadareski
bc06cb5bdb Port extension code for zlib constant names from UnshieldSharp 2024-04-24 00:49:34 -04:00
Matt Nadareski
ae223a4589 Port Blast code from UnshieldSharp 2024-04-24 00:47:31 -04:00
Matt Nadareski
018fd01922 Port zlibConst from UnshieldSharp 2024-04-24 00:35:12 -04:00
Matt Nadareski
910b01b072 Update packages 2024-04-23 21:07:02 -04:00
Matt Nadareski
d239d9f09b Bump version 2024-04-18 12:08:56 -04:00
Matt Nadareski
0cf3e3e816 Update SabreTools.IO 2024-04-18 12:05:30 -04:00
9 changed files with 768 additions and 4 deletions

View File

@@ -8,8 +8,11 @@ Find the link to the Nuget package [here](https://www.nuget.org/packages/SabreTo
| Compression Name | Decompress | Compress |
| --- | --- | --- |
| Blast | Yes | No |
| LZ | Yes | No |
| LZX | No | No |
| MSZIP | Yes* | No |
| Quantum | No | No |
**Note:** If something is marked with a `*` it means that it need testing.

View File

@@ -0,0 +1,270 @@
/* blast.c
* Copyright (C) 2003, 2012, 2013 Mark Adler
* For conditions of distribution and use, see copyright notice in blast.h
* version 1.3, 24 Aug 2013
*
* blast.c decompresses data compressed by the PKWare Compression Library.
* This function provides functionality similar to the explode() function of
* the PKWare library, hence the name "blast".
*
* This decompressor is based on the excellent format description provided by
* Ben Rudiak-Gould in comp.compression on August 13, 2001. Interestingly, the
* example Ben provided in the post is incorrect. The distance 110001 should
* instead be 111000. When corrected, the example byte stream becomes:
*
* 00 04 82 24 25 8f 80 7f
*
* which decompresses to "AIAIAIAIAIAIA" (without the quotes).
*/
/*
* Change history:
*
* 1.0 12 Feb 2003 - First version
* 1.1 16 Feb 2003 - Fixed distance check for > 4 GB uncompressed data
* 1.2 24 Oct 2012 - Add note about using binary mode in stdio
* - Fix comparisons of differently signed integers
* 1.3 24 Aug 2013 - Return unused input from blast()
* - Fix test code to correctly report unused input
* - Enable the provision of initial input to blast()
*/
using System;
using System.Collections.Generic;
using static SabreTools.Compression.Blast.Constants;
namespace SabreTools.Compression.Blast
{
public unsafe static class BlastDecoder
{
#region Huffman Encoding
/// <summary>
/// Literal code
/// </summary>
private static readonly Huffman litcode = new(MAXBITS + 1, 256);
/// <summary>
/// Length code
/// </summary>
private static readonly Huffman lencode = new(MAXBITS + 1, 16);
/// <summary>
/// Distance code
/// </summary>
private static readonly Huffman distcode = new(MAXBITS + 1, 64);
/// <summary>
/// Base for length codes
/// </summary>
private static readonly short[] baseLength =
[
3, 2, 4, 5, 6, 7, 8, 9, 10, 12, 16, 24, 40, 72, 136, 264
];
/// <summary>
/// Extra bits for length codes
/// </summary>
private static readonly byte[] extra =
[
0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8
];
#endregion
/// <summary>
/// Static constructor
/// </summary>
static BlastDecoder()
{
// Repeated code lengths of literal codes
byte[] litlen =
[
11, 124, 8, 7, 28, 7, 188, 13, 76, 4, 10, 8, 12, 10, 12, 10, 8, 23, 8,
9, 7, 6, 7, 8, 7, 6, 55, 8, 23, 24, 12, 11, 7, 9, 11, 12, 6, 7, 22, 5,
7, 24, 6, 11, 9, 6, 7, 22, 7, 11, 38, 7, 9, 8, 25, 11, 8, 11, 9, 12,
8, 12, 5, 38, 5, 38, 5, 11, 7, 5, 6, 21, 6, 10, 53, 8, 7, 24, 10, 27,
44, 253, 253, 253, 252, 252, 252, 13, 12, 45, 12, 45, 12, 61, 12, 45,
44, 173
];
litcode.Initialize(litlen);
// Repeated code lengths of length codes 0..15
byte[] lenlen =
[
2, 35, 36, 53, 38, 23
];
lencode.Initialize(lenlen);
// Repeated code lengths of distance codes 0..63
byte[] distlen =
[
2, 20, 53, 230, 247, 151, 248
];
distcode.Initialize(distlen);
}
/// <summary>
/// blast() decompresses the PKWare Data Compression Library (DCL) compressed
/// format. It provides the same functionality as the explode() function in
/// that library. (Note: PKWare overused the "implode" verb, and the format
/// used by their library implode() function is completely different and
/// incompatible with the implode compression method supported by PKZIP.)
///
/// The binary mode for stdio functions should be used to assure that the
/// compressed data is not corrupted when read or written. For example:
/// fopen(..., "rb") and fopen(..., "wb").
/// </summary>
public static int Blast(byte[] inhow, List<byte> outhow)
{
// Input/output state
var state = new State(inhow, outhow);
// Attempt to decompress using the above state
int err;
try
{
err = Decomp(state);
}
catch (IndexOutOfRangeException)
{
// This was originally a jump, which is bad form for C#
err = 2;
}
// Write any leftover output and update the error code if needed
if (err != 1 && state.Next != 0 && !state.ProcessOutput() && err == 0)
err = 1;
return err;
}
/// <summary>
/// Decode PKWare Compression Library stream.
/// </summary>
/// <remarks>
/// First byte is 0 if literals are uncoded or 1 if they are coded. Second
/// byte is 4, 5, or 6 for the number of extra bits in the distance code.
/// This is the base-2 logarithm of the dictionary size minus six.
///
/// Compressed data is a combination of literals and length/distance pairs
/// terminated by an end code. Literals are either Huffman coded or
/// uncoded bytes. A length/distance pair is a coded length followed by a
/// coded distance to represent a string that occurs earlier in the
/// uncompressed data that occurs again at the current location.
///
/// A bit preceding a literal or length/distance pair indicates which comes
/// next, 0 for literals, 1 for length/distance.
///
/// If literals are uncoded, then the next eight bits are the literal, in the
/// normal bit order in the stream, i.e. no bit-reversal is needed. Similarly,
/// no bit reversal is needed for either the length extra bits or the distance
/// extra bits.
///
/// Literal bytes are simply written to the output. A length/distance pair is
/// an instruction to copy previously uncompressed bytes to the output. The
/// copy is from distance bytes back in the output stream, copying for length
/// bytes.
///
/// Distances pointing before the beginning of the output data are not
/// permitted.
///
/// Overlapped copies, where the length is greater than the distance, are
/// allowed and common. For example, a distance of one and a length of 518
/// simply copies the last byte 518 times. A distance of four and a length of
/// twelve copies the last four bytes three times. A simple forward copy
/// ignoring whether the length is greater than the distance or not implements
/// this correctly.
/// </remarks>
private static int Decomp(State state)
{
int symbol; // decoded symbol, extra bits for distance
int len; // length for copy
uint dist; // distance for copy
int copy; // copy counter
int from, to; // copy pointers
// Read header
int lit = state.Bits(8); // true if literals are coded
if (lit > 1)
return -1;
int dict = state.Bits(8); // log2(dictionary size) - 6
if (dict < 4 || dict > 6)
return -2;
// Decode literals and length/distance pairs
do
{
if (state.Bits(1) != 0)
{
// Get length
symbol = lencode.Decode(state);
len = baseLength[symbol] + state.Bits(extra[symbol]);
if (len == 519)
break; // end code
// Get distance
symbol = len == 2 ? 2 : dict;
dist = (uint)(distcode.Decode(state) << symbol);
dist += (uint)state.Bits(symbol);
dist++;
if (state.First && dist > state.Next)
return -3; //distance too far back
// Copy length bytes from distance bytes back
do
{
to = (int)(state.OutputPtr + state.Next);
from = (int)(to - dist);
copy = MAXWIN;
if (state.Next < dist)
{
from += copy;
copy = (int)dist;
}
copy -= (int)state.Next;
if (copy > len)
copy = len;
len -= copy;
state.Next += (uint)copy;
do
{
state.Output[to++] = state.Output[from++];
}
while (--copy != 0);
if (state.Next == MAXWIN)
{
if (!state.ProcessOutput())
return 1;
state.Next = 0;
state.First = false;
}
}
while (len != 0);
}
else
{
// Get literal and write it
symbol = lit != 0 ? litcode.Decode(state) : state.Bits(8);
state.Output[state.Next++] = (byte)symbol;
if (state.Next == MAXWIN)
{
if (!state.ProcessOutput())
return 1;
state.Next = 0;
state.First = false;
}
}
}
while (true);
return 0;
}
}
}

View File

@@ -0,0 +1,15 @@
namespace SabreTools.Compression.Blast
{
public static class Constants
{
/// <summary>
/// Maximum code length
/// </summary>
public const int MAXBITS = 13;
/// <summary>
/// Maximum window size
/// </summary>
public const int MAXWIN = 4096;
}
}

View File

@@ -0,0 +1,207 @@
using System;
using static SabreTools.Compression.Blast.Constants;
namespace SabreTools.Compression.Blast
{
/// <summary>
/// Huffman code decoding tables. count[1..MAXBITS] is the number of symbols of
/// each length, which for a canonical code are stepped through in order.
/// symbol[] are the symbol values in canonical order, where the number of
/// entries is the sum of the counts in count[]. The decoding process can be
/// seen in the function decode() below.
/// </summary>
public class Huffman
{
/// <summary>
/// Number of symbols of each length
/// </summary>
public short[] Count { get; set; }
/// <summary>
/// Pointer to number of symbols of each length
/// </summary>
public int CountPtr { get; set; }
/// <summary>
/// Canonically ordered symbols
/// </summary>
public short[] Symbol { get; set; }
/// <summary>
/// Constructor
/// </summary>
/// <param name="countLength">Length of the Count array</param>
/// <param name="symbolLength">Length of the Symbol array</param>
public Huffman(int countLength, int symbolLength)
{
Count = new short[countLength];
Symbol = new short[symbolLength];
}
/// <summary>
/// Given a list of repeated code lengths rep[0..n-1], where each byte is a
/// count (high four bits + 1) and a code length (low four bits), generate the
/// list of code lengths. This compaction reduces the size of the object code.
/// Then given the list of code lengths length[0..n-1] representing a canonical
/// Huffman code for n symbols, construct the tables required to decode those
/// codes. Those tables are the number of codes of each length, and the symbols
/// sorted by length, retaining their original order within each length. The
/// return value is zero for a complete code set, negative for an over-
/// subscribed code set, and positive for an incomplete code set. The tables
/// can be used if the return value is zero or positive, but they cannot be used
/// if the return value is negative. If the return value is zero, it is not
/// possible for decode() using that table to return an error--any stream of
/// enough bits will resolve to a symbol. If the return value is positive, then
/// it is possible for decode() using that table to return an error for received
/// codes past the end of the incomplete lengths.
/// </summary>
/// <param name="rep">Repeated code length array</param>
public int Initialize(byte[] rep)
{
int n = rep.Length; // Length of the bit length array
short symbol = 0; // Current symbol when stepping through length[]
short len; // Current length when stepping through h.Count[]
int left; // Number of possible codes left of current length
short[] offs = new short[MAXBITS + 1]; // offsets in symbol table for each length
short[] length = new short[256]; // Code lengths
// Convert compact repeat counts into symbol bit length list
int repPtr = 0;
do
{
len = rep[repPtr++];
left = (len >> 4) + 1;
len &= 15;
do
{
length[symbol++] = len;
}
while (--left != 0);
}
while (--n != 0);
n = symbol;
// Count number of codes of each length
for (len = 0; len <= MAXBITS; len++)
{
Count[len] = 0;
}
// Assumes lengths are within bounds
for (symbol = 0; symbol < n; symbol++)
{
(Count[length[symbol]])++;
}
// No codes! Complete, but decode() will fail
if (Count[0] == n)
return 0;
// Check for an over-subscribed or incomplete set of lengths
left = 1; // One possible code of zero length
for (len = 1; len <= MAXBITS; len++)
{
left <<= 1; // One more bit, double codes left
left -= Count[len]; // Deduct count from possible codes
if (left < 0)
return left; // over-subscribed--return negative
}
// Generate offsets into symbol table for each length for sorting
offs[1] = 0;
for (len = 1; len < MAXBITS; len++)
{
offs[len + 1] = (short)(offs[len] + Count[len]);
}
// Put symbols in table sorted by length, by symbol order within each length
for (symbol = 0; symbol < n; symbol++)
{
if (length[symbol] != 0)
Symbol[offs[length[symbol]]++] = symbol;
}
// Return zero for complete set, positive for incomplete set
return left;
}
/// <summary>
/// Decode a code from the stream s using huffman table h. Return the symbol or
/// a negative value if there is an error. If all of the lengths are zero, i.e.
/// an empty code, or if the code is incomplete and an invalid code is received,
/// then -9 is returned after reading MAXBITS bits.
/// </summary>
/// <param name="state">Current input/output state to process</param>
/// <remarks>
/// The codes as stored in the compressed data are bit-reversed relative to
/// a simple integer ordering of codes of the same lengths. Hence below the
/// bits are pulled from the compressed data one at a time and used to
/// build the code value reversed from what is in the stream in order to
/// permit simple integer comparisons for decoding.
///
/// The first code for the shortest length is all ones. Subsequent codes of
/// the same length are simply integer decrements of the previous code. When
/// moving up a length, a one bit is appended to the code. For a complete
/// code, the last code of the longest length will be all zeros. To support
/// this ordering, the bits pulled during decoding are inverted to apply the
/// more "natural" ordering starting with all zeros and incrementing.
/// </remarks>
public int Decode(State state)
{
int len = 1; // Current number of bits in code
int code = 0; // len bits being decoded
int first = 0; // First code of length len
int count; // Number of codes of length len
int index = 0; // Index of first code of length len in symbol table
int bitbuf = state.BitBuf; // Bits from stream
int left = state.BitCnt; // Bits left in next or left to process
int nextPtr = CountPtr + 1; // Next number of codes
while (true)
{
while (left-- != 0)
{
// Invert code
code |= (bitbuf & 1) ^ 1;
bitbuf >>= 1;
count = Count[nextPtr++];
// If length len, return symbol
if (code < first + count)
{
state.BitBuf = bitbuf;
state.BitCnt = (state.BitCnt - len) & 7;
return Symbol[index + (code - first)];
}
// Else update for next length
index += count;
first += count;
first <<= 1;
code <<= 1;
len++;
}
left = (MAXBITS + 1) - len;
if (left == 0)
break;
if (state.Left == 0)
{
state.Left = state.ProcessInput();
if (state.Left == 0)
throw new IndexOutOfRangeException();
}
bitbuf = state.Input[state.InputPtr++];
state.Left--;
if (left > 8)
left = 8;
}
// Ran out of codes
return -9;
}
};
}

View File

@@ -0,0 +1,160 @@
using System;
using System.Collections.Generic;
using System.Linq;
using static SabreTools.Compression.Blast.Constants;
namespace SabreTools.Compression.Blast
{
/// <summary>
/// Input and output state
/// </summary>
public class State
{
#region Input State
/// <summary>
/// Opaque information passed to InputFunction()
/// </summary>
public byte[] InHow { get; set; }
/// <summary>
/// Next input location
/// </summary>
public List<byte> Input { get; set; }
/// <summary>
/// Pointer to the next input location
/// </summary>
public int InputPtr { get; set; }
/// <summary>
/// Available input at in
/// </summary>
public uint Left { get; set; }
/// <summary>
/// Bit buffer
/// </summary>
public int BitBuf { get; set; }
/// <summary>
/// Number of bits in bit buffer
/// </summary>
public int BitCnt { get; set; }
#endregion
#region Output State
/// <summary>
/// Opaque information passed to OutputFunction()
/// </summary>
public List<byte> OutHow { get; set; }
/// <summary>
/// Index of next write location in out[]
/// </summary>
public uint Next { get; set; }
/// <summary>
/// True to check distances (for first 4K)
/// </summary>
public bool First { get; set; }
/// <summary>
/// Output buffer and sliding window
/// </summary>
public byte[] Output { get; set; } = new byte[MAXWIN];
/// <summary>
/// Pointer to the next output location
/// </summary>
public int OutputPtr { get; set; }
#endregion
/// <summary>
/// Constructor
/// </summary>
/// <param name="inhow">Input byte array</param>
/// <param name="outhow">Output byte list</param>
public State(byte[] inhow, List<byte> outhow)
{
InHow = inhow;
Input = new List<byte>();
InputPtr = 0;
Left = 0;
BitBuf = 0;
BitCnt = 0;
OutHow = outhow;
Next = 0;
First = true;
}
/// <summary>
/// Return need bits from the input stream. This always leaves less than
/// eight bits in the buffer. bits() works properly for need == 0.
/// </summary>
/// <param name="need">Number of bits to read</param>
/// <remarks>
/// Bits are stored in bytes from the least significant bit to the most
/// significant bit. Therefore bits are dropped from the bottom of the bit
/// buffer, using shift right, and new bytes are appended to the top of the
/// bit buffer, using shift left.
/// </remarks>
public int Bits(int need)
{
// Load at least need bits into val
int val = BitBuf;
while (BitCnt < need)
{
if (Left == 0)
{
Left = ProcessInput();
if (Left == 0)
throw new IndexOutOfRangeException();
}
// Load eight bits
val |= (int)(Input[InputPtr++]) << BitCnt;
Left--;
BitCnt += 8;
}
// Drop need bits and update buffer, always zero to seven bits left
BitBuf = val >> need;
BitCnt -= need;
// Return need bits, zeroing the bits above that
return val & ((1 << need) - 1);
}
/// <summary>
/// Process input for the current state
/// </summary>
/// <returns>Amount of data in Input</returns>
public uint ProcessInput()
{
Input = new List<byte>(InHow);
return (uint)Input.Count;
}
/// <summary>
/// Process output for the current state
/// </summary>
/// <returns>True if the output could be added, false otherwise</returns>
public bool ProcessOutput()
{
try
{
OutHow.AddRange(Output.Take((int)Next));
return true;
}
catch
{
return false;
}
}
}
}

View File

@@ -8,7 +8,7 @@
<LangVersion>latest</LangVersion>
<Nullable>enable</Nullable>
<TreatWarningsAsErrors>true</TreatWarningsAsErrors>
<Version>0.4.4</Version>
<Version>0.5.1</Version>
<!-- Package Properties -->
<Authors>Matt Nadareski</Authors>
@@ -18,7 +18,7 @@
<PackageReadmeFile>README.md</PackageReadmeFile>
<RepositoryUrl>https://github.com/SabreTools/SabreTools.Compression</RepositoryUrl>
<RepositoryType>git</RepositoryType>
<PackageTags>compression decompression lz mszip</PackageTags>
<PackageTags>compression decompression lz mszip zlib blast</PackageTags>
<PackageLicenseExpression>MIT</PackageLicenseExpression>
</PropertyGroup>
@@ -27,8 +27,8 @@
</ItemGroup>
<ItemGroup>
<PackageReference Include="SabreTools.IO" Version="1.3.5" />
<PackageReference Include="SabreTools.Models" Version="1.4.2" />
<PackageReference Include="SabreTools.IO" Version="1.4.5" />
<PackageReference Include="SabreTools.Models" Version="1.4.5" />
</ItemGroup>
</Project>

View File

@@ -0,0 +1,47 @@
namespace SabreTools.Compression.zlib
{
public static class zlibConst
{
public const int Z_NO_FLUSH = 0;
public const int Z_PARTIAL_FLUSH = 1;
public const int Z_SYNC_FLUSH = 2;
public const int Z_FULL_FLUSH = 3;
public const int Z_FINISH = 4;
public const int Z_BLOCK = 5;
public const int Z_TREES = 6;
public const int Z_OK = 0;
public const int Z_STREAM_END = 1;
public const int Z_NEED_DICT = 2;
public const int Z_ERRNO = (-1);
public const int Z_STREAM_ERROR = (-2);
public const int Z_DATA_ERROR = (-3);
public const int Z_MEM_ERROR = (-4);
public const int Z_BUF_ERROR = (-5);
public const int Z_VERSION_ERROR = (-6);
/// <summary>
/// Get the zlib result name from an integer
/// </summary>
/// <param name="result">Integer to translate to the result name</param>
/// <returns>Name of the result, the integer as a string otherwise</returns>
public static string ToZlibConstName(this int result)
{
return result switch
{
Z_OK => "Z_OK",
Z_STREAM_END => "Z_STREAM_END",
Z_NEED_DICT => "Z_NEED_DICT",
Z_ERRNO => "Z_ERRNO",
Z_STREAM_ERROR => "Z_STREAM_ERROR",
Z_DATA_ERROR => "Z_DATA_ERROR",
Z_MEM_ERROR => "Z_MEM_ERROR",
Z_BUF_ERROR => "Z_BUF_ERROR",
Z_VERSION_ERROR => "Z_VERSION_ERROR",
_ => result.ToString(),
};
}
}
}

36
publish-nix.sh Executable file
View File

@@ -0,0 +1,36 @@
#! /bin/bash
# This batch file assumes the following:
# - .NET 8.0 (or newer) SDK is installed and in PATH
#
# If any of these are not satisfied, the operation may fail
# in an unpredictable way and result in an incomplete output.
# Optional parameters
NO_BUILD=false
while getopts "b" OPTION
do
case $OPTION in
b)
NO_BUILD=true
;;
*)
echo "Invalid option provided"
exit 1
;;
esac
done
# Set the current directory as a variable
BUILD_FOLDER=$PWD
# Only build if requested
if [ $NO_BUILD = false ]
then
# Restore Nuget packages for all builds
echo "Restoring Nuget packages"
dotnet restore
# Create Nuget Package
dotnet pack SabreTools.Compression/SabreTools.Compression.csproj --output $BUILD_FOLDER
fi

26
publish-win.ps1 Normal file
View File

@@ -0,0 +1,26 @@
# This batch file assumes the following:
# - .NET 8.0 (or newer) SDK is installed and in PATH
#
# If any of these are not satisfied, the operation may fail
# in an unpredictable way and result in an incomplete output.
# Optional parameters
param(
[Parameter(Mandatory = $false)]
[Alias("NoBuild")]
[switch]$NO_BUILD
)
# Set the current directory as a variable
$BUILD_FOLDER = $PSScriptRoot
# Only build if requested
if (!$NO_BUILD.IsPresent)
{
# Restore Nuget packages for all builds
Write-Host "Restoring Nuget packages"
dotnet restore
# Create Nuget Package
dotnet pack SabreTools.Compression\SabreTools.Compression.csproj --output $BUILD_FOLDER
}