Files
BinaryObjectScanner/BinaryObjectScanner.Compression/Quantum/Decompressor.cs

499 lines
20 KiB
C#
Raw Normal View History

2023-01-02 00:03:03 -08:00
using System;
2023-01-01 23:45:13 -08:00
using System.Linq;
2023-03-07 16:59:14 -05:00
using BinaryObjectScanner.Models.Compression.Quantum;
using BinaryObjectScanner.Models.MicrosoftCabinet;
2023-01-01 23:45:13 -08:00
2023-03-07 16:59:14 -05:00
namespace BinaryObjectScanner.Compression.Quantum
2023-01-01 23:45:13 -08:00
{
2023-01-02 00:03:03 -08:00
/// <see href="https://github.com/wine-mirror/wine/blob/master/dlls/cabinet/cabinet.h"/>
/// <see href="https://github.com/wine-mirror/wine/blob/master/dlls/cabinet/fdi.c"/>
/// <see href="https://github.com/wine-mirror/wine/blob/master/include/fdi.h"/>
/// <see href="http://www.russotto.net/quantumcomp.html"/>
public static class Decompressor
2023-01-01 23:45:13 -08:00
{
/// <summary>
/// Decompress a byte array using a given State
/// </summary>
public static int Decompress(State state, int inlen, byte[] inbuf, int outlen, byte[] outbuf)
2023-01-01 23:45:13 -08:00
{
int inpos = 0, outpos = 0; // inbuf[0], outbuf[0]
2023-01-02 00:03:03 -08:00
int window = 0; // state.Window[0]
int runsrc, rundest;
2023-01-02 22:03:36 -08:00
uint windowPosition = state.WindowPosition;
uint windowSize = state.WindowSize;
2023-01-02 00:03:03 -08:00
2023-01-02 22:03:36 -08:00
int extra, togo = outlen, matchLength = 0, copyLength;
2023-01-02 00:03:03 -08:00
byte selector, sym;
2023-01-02 22:03:36 -08:00
uint matchOffset = 0;
2023-01-02 00:03:03 -08:00
// Make local copies of state variables
uint bitBuffer = state.BitBuffer;
int bitsLeft = state.BitsLeft;
2023-01-02 00:03:03 -08:00
ushort H = 0xFFFF, L = 0;
// Read initial value of C
ushort C = (ushort)Q_READ_BITS(16, inbuf, ref inpos, ref bitsLeft, ref bitBuffer);
2023-01-02 00:03:03 -08:00
// Apply 2^x-1 mask
2023-01-02 22:03:36 -08:00
windowPosition &= windowSize - 1;
2023-01-02 00:03:03 -08:00
while (togo > 0)
{
selector = (byte)GET_SYMBOL(state.SelectorModel, ref H, ref L, ref C, inbuf, ref inpos, ref bitsLeft, ref bitBuffer);
2023-01-02 00:03:03 -08:00
switch (selector)
{
// Selector 0 = literal model, 64 entries, 0x00-0x3F
2023-01-02 00:03:03 -08:00
case 0:
sym = (byte)GET_SYMBOL(state.Model0, ref H, ref L, ref C, inbuf, ref inpos, ref bitsLeft, ref bitBuffer);
2023-01-02 22:03:36 -08:00
state.Window[window + windowPosition++] = sym;
2023-01-02 00:03:03 -08:00
togo--;
break;
// Selector 1 = literal model, 64 entries, 0x40-0x7F
2023-01-02 00:03:03 -08:00
case 1:
sym = (byte)GET_SYMBOL(state.Model1, ref H, ref L, ref C, inbuf, ref inpos, ref bitsLeft, ref bitBuffer);
2023-01-02 22:03:36 -08:00
state.Window[window + windowPosition++] = sym;
2023-01-02 00:03:03 -08:00
togo--;
break;
// Selector 2 = literal model, 64 entries, 0x80-0xBF
2023-01-02 00:03:03 -08:00
case 2:
sym = (byte)GET_SYMBOL(state.Model2, ref H, ref L, ref C, inbuf, ref inpos, ref bitsLeft, ref bitBuffer);
2023-01-02 22:03:36 -08:00
state.Window[window + windowPosition++] = sym;
2023-01-02 00:03:03 -08:00
togo--;
break;
// Selector 3 = literal model, 64 entries, 0xC0-0xFF
2023-01-02 00:03:03 -08:00
case 3:
sym = (byte)GET_SYMBOL(state.Model3, ref H, ref L, ref C, inbuf, ref inpos, ref bitsLeft, ref bitBuffer);
2023-01-02 22:03:36 -08:00
state.Window[window + windowPosition++] = sym;
2023-01-02 00:03:03 -08:00
togo--;
break;
// Selector 4 = fixed length of 3
case 4:
sym = (byte)GET_SYMBOL(state.Model4, ref H, ref L, ref C, inbuf, ref inpos, ref bitsLeft, ref bitBuffer);
extra = (int)Q_READ_BITS(state.ExtraBits[sym], inbuf, ref inpos, ref bitsLeft, ref bitBuffer);
2023-01-02 23:35:09 -08:00
matchOffset = (uint)(state.PositionSlotBases[sym] + extra + 1);
2023-01-02 22:03:36 -08:00
matchLength = 3;
2023-01-02 00:03:03 -08:00
break;
// Selector 5 = fixed length of 4
case 5:
sym = (byte)GET_SYMBOL(state.Model5, ref H, ref L, ref C, inbuf, ref inpos, ref bitsLeft, ref bitBuffer);
extra = (int)Q_READ_BITS(state.ExtraBits[sym], inbuf, ref inpos, ref bitsLeft, ref bitBuffer);
2023-01-02 23:35:09 -08:00
matchOffset = (uint)(state.PositionSlotBases[sym] + extra + 1);
2023-01-02 22:03:36 -08:00
matchLength = 4;
2023-01-02 00:03:03 -08:00
break;
// Selector 6 = variable length
case 6:
sym = (byte)GET_SYMBOL(state.Model6Length, ref H, ref L, ref C, inbuf, ref inpos, ref bitsLeft, ref bitBuffer);
extra = (int)Q_READ_BITS(state.LengthExtraBits[sym], inbuf, ref inpos, ref bitsLeft, ref bitBuffer);
2023-01-02 23:35:09 -08:00
matchLength = state.LengthBases[sym] + extra + 5;
2023-01-02 22:03:36 -08:00
sym = (byte)GET_SYMBOL(state.Model6Position, ref H, ref L, ref C, inbuf, ref inpos, ref bitsLeft, ref bitBuffer);
extra = (int)Q_READ_BITS(state.ExtraBits[sym], inbuf, ref inpos, ref bitsLeft, ref bitBuffer);
2023-01-02 23:35:09 -08:00
matchOffset = (uint)(state.PositionSlotBases[sym] + extra + 1);
2023-01-02 00:03:03 -08:00
break;
default:
return inpos;
2023-01-02 00:03:03 -08:00
}
// If this is a match
2023-01-02 00:03:03 -08:00
if (selector >= 4)
{
2023-01-02 22:03:36 -08:00
rundest = (int)(window + windowPosition);
togo -= matchLength;
2023-01-02 00:03:03 -08:00
// Copy any wrapped around source data
2023-01-02 22:03:36 -08:00
if (windowPosition >= matchOffset)
2023-01-02 00:03:03 -08:00
{
// No wrap
2023-01-02 22:03:36 -08:00
runsrc = (int)(rundest - matchOffset);
2023-01-02 00:03:03 -08:00
}
else
{
2023-01-02 22:03:36 -08:00
runsrc = (int)(rundest + (windowSize - matchOffset));
copyLength = (int)(matchOffset - windowPosition);
if (copyLength < matchLength)
2023-01-02 00:03:03 -08:00
{
2023-01-02 22:03:36 -08:00
matchLength -= copyLength;
windowPosition += (uint)copyLength;
while (copyLength-- > 0)
2023-01-02 00:03:03 -08:00
{
state.Window[rundest++] = state.Window[rundest++];
}
runsrc = window;
}
}
2023-01-02 22:03:36 -08:00
windowPosition += (uint)matchLength;
2023-01-02 00:03:03 -08:00
// Copy match data - no worries about destination wraps
2023-01-02 22:03:36 -08:00
while (matchLength-- > 0)
2023-01-02 00:03:03 -08:00
{
state.Window[rundest++] = state.Window[runsrc++];
// Handle wraparounds that aren't supposed to happen
if (rundest >= state.Window.Length)
rundest = 0;
if (runsrc >= state.Window.Length)
runsrc = 0;
2023-01-02 00:03:03 -08:00
}
}
// If we hit the end of the window, copy to the output and wrap
if (windowPosition >= state.Window.Length)
{
Array.Copy(state.Window, 0, outbuf, outpos, Math.Min(windowSize, outlen));
outpos += (int)Math.Min(windowSize, outlen);
outlen -= (int)Math.Min(windowSize, outlen);
windowPosition = 0;
}
2023-01-02 00:03:03 -08:00
}
if (togo > 0)
return inpos;
2023-01-02 00:03:03 -08:00
if (outlen > 0)
{
int sourceIndex = (int)((windowPosition == 0 ? windowSize : windowPosition) - outlen);
Array.Copy(state.Window, sourceIndex, outbuf, outpos, outlen);
}
2023-01-02 00:03:03 -08:00
// Cache the decompression state variables
state.BitBuffer = bitBuffer;
state.BitsLeft = bitsLeft;
2023-01-02 22:03:36 -08:00
state.WindowPosition = windowPosition;
return inpos;
2023-01-01 23:45:13 -08:00
}
/// <summary>
/// Initialize a Quantum decompressor state
/// </summary>
public static bool InitState(State state, CFFOLDER folder)
{
int window = ((ushort)folder.CompressionType >> 8) & 0x1f;
int level = ((ushort)folder.CompressionType >> 4) & 0xF;
return InitState(state, window, level);
}
/// <summary>
/// Initialize a Quantum decompressor state
/// </summary>
public static bool InitState(State state, int window, int level)
2023-01-01 23:45:13 -08:00
{
2023-01-02 23:54:40 -08:00
uint windowSize = (uint)(1 << window);
int maxSize = window * 2;
2023-01-01 23:45:13 -08:00
// QTM supports window sizes of 2^10 (1Kb) through 2^21 (2Mb)
// If a previously allocated window is big enough, keep it
if (window < 10 || window > 21)
return false;
// If we don't have the proper window size
if (state.ActualSize < windowSize)
state.Window = null;
// If we have no window
if (state.Window == null)
{
state.Window = new byte[windowSize];
state.ActualSize = windowSize;
}
// Set the window size and position
state.WindowSize = windowSize;
state.WindowPosition = 0;
// Initialize arithmetic coding models
2023-01-02 23:35:09 -08:00
state.SelectorModel = CreateModel(state.SelectorModelSymbols, 7, 0);
2023-01-01 23:45:13 -08:00
2023-01-02 23:35:09 -08:00
state.Model0 = CreateModel(state.Model0Symbols, 0x40, 0x00);
state.Model1 = CreateModel(state.Model1Symbols, 0x40, 0x40);
state.Model2 = CreateModel(state.Model2Symbols, 0x40, 0x80);
state.Model3 = CreateModel(state.Model3Symbols, 0x40, 0xC0);
2023-01-01 23:45:13 -08:00
// Model 4 depends on table size, ranges from 20 to 24
2023-01-02 23:54:40 -08:00
state.Model4 = CreateModel(state.Model4Symbols, (maxSize < 24) ? maxSize : 24, 0);
2023-01-01 23:45:13 -08:00
// Model 5 depends on table size, ranges from 20 to 36
2023-01-02 23:54:40 -08:00
state.Model5 = CreateModel(symbols: state.Model5Symbols, (maxSize < 36) ? maxSize : 36, 0);
2023-01-01 23:45:13 -08:00
// Model 6 Position depends on table size, ranges from 20 to 42
2023-01-02 23:54:40 -08:00
state.Model6Position = CreateModel(state.Model6PositionSymbols, (maxSize < 42) ? maxSize : 42, 0);
state.Model6Length = CreateModel(state.Model6LengthSymbols, 27, 0);
2023-01-01 23:45:13 -08:00
return true;
}
/// <summary>
/// Initialize a Quantum model that decodes symbols from s to (s + n - 1)
/// </summary>
private static Model CreateModel(ModelSymbol[] symbols, int entryCount, int initialSymbol)
2023-01-01 23:45:13 -08:00
{
// Set the basic values
Model model = new Model
{
TimeToReorder = 4,
Entries = entryCount,
Symbols = symbols,
};
2023-01-01 23:45:13 -08:00
// Clear out the look-up table
2023-01-02 22:03:36 -08:00
model.LookupTable = Enumerable.Repeat<ushort>(0xFFFF, model.LookupTable.Length).ToArray();
2023-01-01 23:45:13 -08:00
// Loop through and build the look-up table
for (ushort i = 0; i < entryCount; i++)
{
// Set up a look-up entry for symbol
model.LookupTable[i + initialSymbol] = i;
// Create the symbol in the table
model.Symbols[i] = new ModelSymbol
{
Symbol = (ushort)(i + initialSymbol),
CumulativeFrequency = (ushort)(entryCount - i),
};
2023-01-01 23:45:13 -08:00
}
// Set the last symbol frequency to 0
model.Symbols[entryCount] = new ModelSymbol { CumulativeFrequency = 0 };
return model;
2023-01-01 23:45:13 -08:00
}
/// <summary>
2023-01-02 22:03:36 -08:00
/// Update the Quantum model for a particular symbol
2023-01-01 23:45:13 -08:00
/// </summary>
private static void UpdateModel(Model model, int symbol)
2023-01-01 23:45:13 -08:00
{
// Update the cumulative frequency for all symbols less than the provided
for (int i = 0; i < symbol; i++)
{
model.Symbols[i].CumulativeFrequency += 8;
}
// If the first symbol still has a cumulative frequency under 3800
if (model.Symbols[0].CumulativeFrequency <= 3800)
return;
// If we have more than 1 shift left in the model
if (--model.TimeToReorder != 0)
2023-01-01 23:45:13 -08:00
{
// Loop through the entries from highest to lowest,
// performing the shift on the cumulative frequencies
for (int i = model.Entries - 1; i >= 0; i--)
{
// -1, not -2; the 0 entry saves this
model.Symbols[i].CumulativeFrequency >>= 1;
if (model.Symbols[i].CumulativeFrequency <= model.Symbols[i + 1].CumulativeFrequency)
model.Symbols[i].CumulativeFrequency = (ushort)(model.Symbols[i + 1].CumulativeFrequency + 1);
}
}
// If we have no shifts left in the model
else
{
// Reset the shifts left value to 50
model.TimeToReorder = 50;
2023-01-01 23:45:13 -08:00
// Loop through the entries setting the cumulative frequencies
for (int i = 0; i < model.Entries; i++)
{
// No -1, want to include the 0 entry
// This converts cumfreqs into frequencies, then shifts right
model.Symbols[i].CumulativeFrequency -= model.Symbols[i + 1].CumulativeFrequency;
model.Symbols[i].CumulativeFrequency++; // Avoid losing things entirely
model.Symbols[i].CumulativeFrequency >>= 1;
}
// Now sort by frequencies, decreasing order -- this must be an
// inplace selection sort, or a sort with the same (in)stability
// characteristics
for (int i = 0; i < model.Entries - 1; i++)
{
for (int j = i + 1; j < model.Entries; j++)
{
if (model.Symbols[i].CumulativeFrequency < model.Symbols[j].CumulativeFrequency)
{
var temp = model.Symbols[i];
model.Symbols[i] = model.Symbols[j];
model.Symbols[j] = temp;
}
}
}
// Then convert frequencies back to cumfreq
for (int i = model.Entries - 1; i >= 0; i--)
{
model.Symbols[i].CumulativeFrequency += model.Symbols[i + 1].CumulativeFrequency;
}
// Then update the other part of the table
2023-01-02 22:03:36 -08:00
for (ushort i = 0; i < model.Entries; i++)
2023-01-01 23:45:13 -08:00
{
2023-01-02 22:03:36 -08:00
model.LookupTable[model.Symbols[i].Symbol] = i;
2023-01-01 23:45:13 -08:00
}
}
}
2023-01-02 22:03:36 -08:00
// Bitstream reading macros (Quantum / normal byte order)
2023-01-01 23:45:13 -08:00
#region Macros
2023-01-02 22:03:36 -08:00
/*
* These bit access routines work by using the area beyond the MSB and the
* LSB as a free source of zeroes. This avoids having to mask any bits.
* So we have to know the bit width of the bitbuffer variable. This is
* defined as Uint_BITS.
*
* Uint_BITS should be at least 16 bits. Unlike LZX's Huffman decoding,
* Quantum's arithmetic decoding only needs 1 bit at a time, it doesn't
* need an assured number. Retrieving larger bitstrings can be done with
* multiple reads and fills of the bitbuffer. The code should work fine
* for machines where Uint >= 32 bits.
*
* Also note that Quantum reads bytes in normal order; LZX is in
* little-endian order.
2023-01-01 23:45:13 -08:00
*/
/// <summary>
/// Should be used first to set up the system
/// </summary>
private static void Q_INIT_BITSTREAM(out int bitsleft, out uint bitbuf)
{
2023-01-02 22:03:36 -08:00
bitsleft = 0;
bitbuf = 0;
2023-01-01 23:45:13 -08:00
}
/// <summary>
/// Adds more data to the bit buffer, if there is room for another 16 bits.
/// </summary>
private static void Q_FILL_BUFFER(byte[] inbuf, ref int inpos, ref int bitsleft, ref uint bitbuf)
{
if (bitsleft > 8)
2023-01-02 22:03:36 -08:00
return;
2023-01-03 00:14:42 -08:00
byte b0 = inpos + 0 < inbuf.Length ? inbuf[inpos + 0] : (byte)0;
byte b1 = inpos + 1 < inbuf.Length ? inbuf[inpos + 1] : (byte)0;
bitbuf |= (uint)(((b0 << 8) | b1) << (16 - bitsleft));
2023-01-02 22:03:36 -08:00
bitsleft += 16;
inpos += 2;
2023-01-01 23:45:13 -08:00
}
/// <summary>
/// Extracts (without removing) N bits from the bit buffer
/// </summary>
private static uint Q_PEEK_BITS(int n, uint bitbuf)
{
return bitbuf >> (32 - n);
}
/// <summary>
/// Removes N bits from the bit buffer
/// </summary>
private static void Q_REMOVE_BITS(int n, ref int bitsleft, ref uint bitbuf)
{
bitbuf <<= n;
bitsleft -= n;
}
/// <summary>
/// Takes N bits from the buffer and puts them in v. Unlike LZX, this can loop
/// several times to get the requisite number of bits.
/// </summary>
2023-01-02 22:03:36 -08:00
private static uint Q_READ_BITS(int n, byte[] inbuf, ref int inpos, ref int bitsleft, ref uint bitbuf)
2023-01-01 23:45:13 -08:00
{
2023-01-02 22:03:36 -08:00
uint v = 0; int bitrun;
2023-01-01 23:45:13 -08:00
for (int bitsneed = n; bitsneed != 0; bitsneed -= bitrun)
{
Q_FILL_BUFFER(inbuf, ref inpos, ref bitsleft, ref bitbuf);
bitrun = (bitsneed > bitsleft) ? bitsleft : bitsneed;
2023-01-02 22:03:36 -08:00
v = (v << bitrun) | Q_PEEK_BITS(bitrun, bitbuf);
2023-01-01 23:45:13 -08:00
Q_REMOVE_BITS(bitrun, ref bitsleft, ref bitbuf);
}
return v;
}
/// <summary>
2023-01-02 22:03:36 -08:00
/// Fetches the next symbol from the stated model and puts it in symbol.
2023-01-01 23:45:13 -08:00
/// It may need to read the bitstream to do this.
/// </summary>
2023-01-02 00:03:03 -08:00
private static ushort GET_SYMBOL(Model model, ref ushort H, ref ushort L, ref ushort C, byte[] inbuf, ref int inpos, ref int bitsleft, ref uint bitbuf)
2023-01-01 23:45:13 -08:00
{
2023-01-03 00:12:31 -08:00
ushort symf = GetFrequency(model.Symbols[0].CumulativeFrequency, H, L, C);
2023-01-01 23:45:13 -08:00
int i;
for (i = 1; i < model.Entries; i++)
{
if (model.Symbols[i].CumulativeFrequency <= symf)
break;
}
2023-01-02 22:03:36 -08:00
ushort symbol = model.Symbols[i - 1].Symbol;
2023-01-03 00:12:31 -08:00
GetCode(model.Symbols[i - 1].CumulativeFrequency,
model.Symbols[i].CumulativeFrequency,
model.Symbols[0].CumulativeFrequency,
ref H, ref L, ref C,
inbuf, ref inpos, ref bitsleft, ref bitbuf);
2023-01-02 22:03:36 -08:00
2023-01-03 00:12:31 -08:00
UpdateModel(model, i);
return symbol;
}
/// <summary>
/// Get the frequency for a given range and total frequency
/// </summary>
private static ushort GetFrequency(ushort totalFrequency, ushort H, ushort L, ushort C)
{
uint range = (uint)(((H - L) & 0xFFFF) + 1);
uint freq = (uint)(((C - L + 1) * totalFrequency - 1) / range);
return (ushort)(freq & 0xFFFF);
}
/// <summary>
/// The decoder renormalization loop
/// </summary>
private static void GetCode(int previousFrequency,
int cumulativeFrequency,
int totalFrequency,
ref ushort H,
ref ushort L,
ref ushort C,
byte[] inbuf,
ref int inpos,
ref int bitsleft,
ref uint bitbuf)
{
uint range = (uint)((H - L) + 1);
H = (ushort)(L + ((previousFrequency * range) / totalFrequency) - 1);
L = (ushort)(L + (cumulativeFrequency * range) / totalFrequency);
2023-01-01 23:45:13 -08:00
while (true)
{
if ((L & 0x8000) != (H & 0x8000))
{
2023-01-03 00:12:31 -08:00
if ((L & 0x4000) == 0 || (H & 0x4000) != 0)
2023-01-01 23:45:13 -08:00
break;
2023-01-03 00:12:31 -08:00
// Underflow case
C ^= 0x4000;
L &= 0x3FFF;
H |= 0x4000;
2023-01-01 23:45:13 -08:00
}
2023-01-02 22:03:36 -08:00
L <<= 1;
H = (ushort)((H << 1) | 1);
2023-01-03 00:12:31 -08:00
C = (ushort)((C << 1) | Q_READ_BITS(1, inbuf, ref inpos, ref bitsleft, ref bitbuf));
2023-01-01 23:45:13 -08:00
}
}
#endregion
}
}