mirror of
https://github.com/adamhathcock/sharpcompress.git
synced 2026-02-04 05:25:00 +00:00
2056 lines
52 KiB
C#
2056 lines
52 KiB
C#
using System;
|
|
using System.IO;
|
|
using System.Threading;
|
|
using System.Threading.Tasks;
|
|
using SharpCompress.IO;
|
|
|
|
/*
|
|
* Copyright 2001,2004-2005 The Apache Software Foundation
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
/*
|
|
* This package is based on the work done by Keiron Liddle, Aftex Software
|
|
* <keiron@aftexsw.com> to whom the Ant project is very grateful for his
|
|
* great code.
|
|
*/
|
|
|
|
#nullable disable
|
|
|
|
namespace SharpCompress.Compressors.BZip2;
|
|
|
|
/**
|
|
* An output stream that compresses into the BZip2 format (with the file
|
|
* header chars) into another stream.
|
|
*
|
|
* @author <a href="mailto:keiron@aftexsw.com">Keiron Liddle</a>
|
|
*
|
|
* TODO: Update to BZip2 1.0.1
|
|
* <b>NB:</b> note this class has been modified to add a leading BZ to the
|
|
* start of the BZIP2 stream to make it compatible with other PGP programs.
|
|
*/
|
|
|
|
internal sealed class CBZip2OutputStream : Stream, IStreamStack
|
|
{
|
|
#if DEBUG_STREAMS
|
|
long IStreamStack.InstanceId { get; set; }
|
|
#endif
|
|
int IStreamStack.DefaultBufferSize { get; set; }
|
|
|
|
Stream IStreamStack.BaseStream() => bsStream;
|
|
|
|
int IStreamStack.BufferSize
|
|
{
|
|
get => 0;
|
|
set { }
|
|
}
|
|
int IStreamStack.BufferPosition
|
|
{
|
|
get => 0;
|
|
set { }
|
|
}
|
|
|
|
void IStreamStack.SetPosition(long position) { }
|
|
|
|
private const int SETMASK = (1 << 21);
|
|
private const int CLEARMASK = (~SETMASK);
|
|
private const int GREATER_ICOST = 15;
|
|
private const int LESSER_ICOST = 0;
|
|
private const int SMALL_THRESH = 20;
|
|
private const int DEPTH_THRESH = 10;
|
|
|
|
/*
|
|
If you are ever unlucky/improbable enough
|
|
to get a stack overflow whilst sorting,
|
|
increase the following constant and try
|
|
again. In practice I have never seen the
|
|
stack go above 27 elems, so the following
|
|
limit seems very generous.
|
|
*/
|
|
private const int QSORT_STACK_SIZE = 1000;
|
|
private bool finished;
|
|
|
|
private static void Panic()
|
|
{
|
|
//System.out.Println("panic");
|
|
//throw new CError();
|
|
}
|
|
|
|
private void MakeMaps()
|
|
{
|
|
int i;
|
|
nInUse = 0;
|
|
for (i = 0; i < 256; i++)
|
|
{
|
|
if (inUse[i])
|
|
{
|
|
seqToUnseq[nInUse] = (char)i;
|
|
unseqToSeq[i] = (char)nInUse;
|
|
nInUse++;
|
|
}
|
|
}
|
|
}
|
|
|
|
private static void HbMakeCodeLengths(char[] len, int[] freq, int alphaSize, int maxLen)
|
|
{
|
|
/*
|
|
Nodes and heap entries run from 1. Entry 0
|
|
for both the heap and nodes is a sentinel.
|
|
*/
|
|
int nNodes,
|
|
nHeap,
|
|
n1,
|
|
n2,
|
|
i,
|
|
j,
|
|
k;
|
|
bool tooLong;
|
|
|
|
Span<int> heap = stackalloc int[BZip2Constants.MAX_ALPHA_SIZE + 2]; // 1040 bytes
|
|
Span<int> weight = stackalloc int[BZip2Constants.MAX_ALPHA_SIZE * 2]; // 1040 bytes
|
|
Span<int> parent = stackalloc int[BZip2Constants.MAX_ALPHA_SIZE * 2]; // 1040 bytes
|
|
|
|
for (i = 0; i < alphaSize; i++)
|
|
{
|
|
weight[i + 1] = (freq[i] == 0 ? 1 : freq[i]) << 8;
|
|
}
|
|
|
|
while (true)
|
|
{
|
|
nNodes = alphaSize;
|
|
nHeap = 0;
|
|
|
|
heap[0] = 0;
|
|
weight[0] = 0;
|
|
parent[0] = -2;
|
|
|
|
for (i = 1; i <= alphaSize; i++)
|
|
{
|
|
parent[i] = -1;
|
|
nHeap++;
|
|
heap[nHeap] = i;
|
|
{
|
|
int zz,
|
|
tmp;
|
|
zz = nHeap;
|
|
tmp = heap[zz];
|
|
while (weight[tmp] < weight[heap[zz >> 1]])
|
|
{
|
|
heap[zz] = heap[zz >> 1];
|
|
zz >>= 1;
|
|
}
|
|
heap[zz] = tmp;
|
|
}
|
|
}
|
|
if (!(nHeap < (BZip2Constants.MAX_ALPHA_SIZE + 2)))
|
|
{
|
|
Panic();
|
|
}
|
|
|
|
while (nHeap > 1)
|
|
{
|
|
n1 = heap[1];
|
|
heap[1] = heap[nHeap];
|
|
nHeap--;
|
|
{
|
|
int zz = 0,
|
|
yy = 0,
|
|
tmp = 0;
|
|
zz = 1;
|
|
tmp = heap[zz];
|
|
while (true)
|
|
{
|
|
yy = zz << 1;
|
|
if (yy > nHeap)
|
|
{
|
|
break;
|
|
}
|
|
if (yy < nHeap && weight[heap[yy + 1]] < weight[heap[yy]])
|
|
{
|
|
yy++;
|
|
}
|
|
if (weight[tmp] < weight[heap[yy]])
|
|
{
|
|
break;
|
|
}
|
|
heap[zz] = heap[yy];
|
|
zz = yy;
|
|
}
|
|
heap[zz] = tmp;
|
|
}
|
|
n2 = heap[1];
|
|
heap[1] = heap[nHeap];
|
|
nHeap--;
|
|
{
|
|
int zz = 0,
|
|
yy = 0,
|
|
tmp = 0;
|
|
zz = 1;
|
|
tmp = heap[zz];
|
|
while (true)
|
|
{
|
|
yy = zz << 1;
|
|
if (yy > nHeap)
|
|
{
|
|
break;
|
|
}
|
|
if (yy < nHeap && weight[heap[yy + 1]] < weight[heap[yy]])
|
|
{
|
|
yy++;
|
|
}
|
|
if (weight[tmp] < weight[heap[yy]])
|
|
{
|
|
break;
|
|
}
|
|
heap[zz] = heap[yy];
|
|
zz = yy;
|
|
}
|
|
heap[zz] = tmp;
|
|
}
|
|
nNodes++;
|
|
parent[n1] = parent[n2] = nNodes;
|
|
|
|
weight[nNodes] = (int)(
|
|
(uint)((weight[n1] & 0xffffff00) + (weight[n2] & 0xffffff00))
|
|
| (uint)(
|
|
1
|
|
+ (
|
|
((weight[n1] & 0x000000ff) > (weight[n2] & 0x000000ff))
|
|
? (weight[n1] & 0x000000ff)
|
|
: (weight[n2] & 0x000000ff)
|
|
)
|
|
)
|
|
);
|
|
|
|
parent[nNodes] = -1;
|
|
nHeap++;
|
|
heap[nHeap] = nNodes;
|
|
{
|
|
int zz = 0,
|
|
tmp = 0;
|
|
zz = nHeap;
|
|
tmp = heap[zz];
|
|
while (weight[tmp] < weight[heap[zz >> 1]])
|
|
{
|
|
heap[zz] = heap[zz >> 1];
|
|
zz >>= 1;
|
|
}
|
|
heap[zz] = tmp;
|
|
}
|
|
}
|
|
if (!(nNodes < (BZip2Constants.MAX_ALPHA_SIZE * 2)))
|
|
{
|
|
Panic();
|
|
}
|
|
|
|
tooLong = false;
|
|
for (i = 1; i <= alphaSize; i++)
|
|
{
|
|
j = 0;
|
|
k = i;
|
|
while (parent[k] >= 0)
|
|
{
|
|
k = parent[k];
|
|
j++;
|
|
}
|
|
len[i - 1] = (char)j;
|
|
if (j > maxLen)
|
|
{
|
|
tooLong = true;
|
|
}
|
|
}
|
|
|
|
if (!tooLong)
|
|
{
|
|
break;
|
|
}
|
|
|
|
for (i = 1; i < alphaSize; i++)
|
|
{
|
|
j = weight[i] >> 8;
|
|
j = 1 + (j / 2);
|
|
weight[i] = j << 8;
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
index of the last char in the block, so
|
|
the block size == last + 1.
|
|
*/
|
|
private int last;
|
|
|
|
/*
|
|
index in zptr[] of original string after sorting.
|
|
*/
|
|
private int origPtr;
|
|
|
|
/*
|
|
always: in the range 0 .. 9.
|
|
The current block size is 100000 * this number.
|
|
*/
|
|
private readonly int blockSize100k;
|
|
|
|
private bool blockRandomised;
|
|
|
|
private int bytesOut;
|
|
private int bsBuff;
|
|
private int bsLive;
|
|
private readonly CRC mCrc = new();
|
|
|
|
private readonly bool[] inUse = new bool[256];
|
|
private int nInUse;
|
|
|
|
private readonly char[] seqToUnseq = new char[256];
|
|
private readonly char[] unseqToSeq = new char[256];
|
|
|
|
private readonly char[] selector = new char[BZip2Constants.MAX_SELECTORS];
|
|
private readonly char[] selectorMtf = new char[BZip2Constants.MAX_SELECTORS];
|
|
|
|
private char[] block;
|
|
private int[] quadrant;
|
|
private int[] zptr;
|
|
private short[] szptr;
|
|
private int[] ftab;
|
|
|
|
private int nMTF;
|
|
|
|
private readonly int[] mtfFreq = new int[BZip2Constants.MAX_ALPHA_SIZE];
|
|
|
|
/*
|
|
* Used when sorting. If too many long comparisons
|
|
* happen, we stop sorting, randomise the block
|
|
* slightly, and try again.
|
|
*/
|
|
private readonly int workFactor;
|
|
private int workDone;
|
|
private int workLimit;
|
|
private bool firstAttempt;
|
|
private int nBlocksRandomised;
|
|
|
|
private int currentChar = -1;
|
|
private int runLength;
|
|
|
|
public CBZip2OutputStream(Stream inStream)
|
|
: this(inStream, 9) { }
|
|
|
|
public CBZip2OutputStream(Stream inStream, int inBlockSize)
|
|
{
|
|
block = null;
|
|
quadrant = null;
|
|
zptr = null;
|
|
ftab = null;
|
|
|
|
inStream.WriteByte((byte)'B');
|
|
inStream.WriteByte((byte)'Z');
|
|
|
|
BsSetStream(inStream);
|
|
|
|
#if DEBUG_STREAMS
|
|
this.DebugConstruct(typeof(CBZip2OutputStream));
|
|
#endif
|
|
|
|
workFactor = 50;
|
|
if (inBlockSize > 9)
|
|
{
|
|
inBlockSize = 9;
|
|
}
|
|
if (inBlockSize < 1)
|
|
{
|
|
inBlockSize = 1;
|
|
}
|
|
blockSize100k = inBlockSize;
|
|
AllocateCompressStructures();
|
|
Initialize();
|
|
InitBlock();
|
|
}
|
|
|
|
/**
|
|
*
|
|
* modified by Oliver Merkel, 010128
|
|
*
|
|
*/
|
|
|
|
public override void WriteByte(byte bv)
|
|
{
|
|
var b = (256 + bv) % 256;
|
|
if (currentChar != -1)
|
|
{
|
|
if (currentChar == b)
|
|
{
|
|
runLength++;
|
|
if (runLength > 254)
|
|
{
|
|
WriteRun();
|
|
currentChar = -1;
|
|
runLength = 0;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
WriteRun();
|
|
runLength = 1;
|
|
currentChar = b;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
currentChar = b;
|
|
runLength++;
|
|
}
|
|
}
|
|
|
|
private void WriteRun()
|
|
{
|
|
if (last < allowableBlockSize)
|
|
{
|
|
inUse[currentChar] = true;
|
|
for (var i = 0; i < runLength; i++)
|
|
{
|
|
mCrc.UpdateCRC((char)currentChar);
|
|
}
|
|
switch (runLength)
|
|
{
|
|
case 1:
|
|
last++;
|
|
block[last + 1] = (char)currentChar;
|
|
break;
|
|
case 2:
|
|
last++;
|
|
block[last + 1] = (char)currentChar;
|
|
last++;
|
|
block[last + 1] = (char)currentChar;
|
|
break;
|
|
case 3:
|
|
last++;
|
|
block[last + 1] = (char)currentChar;
|
|
last++;
|
|
block[last + 1] = (char)currentChar;
|
|
last++;
|
|
block[last + 1] = (char)currentChar;
|
|
break;
|
|
default:
|
|
inUse[runLength - 4] = true;
|
|
last++;
|
|
block[last + 1] = (char)currentChar;
|
|
last++;
|
|
block[last + 1] = (char)currentChar;
|
|
last++;
|
|
block[last + 1] = (char)currentChar;
|
|
last++;
|
|
block[last + 1] = (char)currentChar;
|
|
last++;
|
|
block[last + 1] = (char)(runLength - 4);
|
|
break;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
EndBlock();
|
|
InitBlock();
|
|
WriteRun();
|
|
}
|
|
}
|
|
|
|
private bool disposed;
|
|
|
|
protected override void Dispose(bool disposing)
|
|
{
|
|
if (disposing)
|
|
{
|
|
if (disposed)
|
|
{
|
|
return;
|
|
}
|
|
|
|
Finish();
|
|
|
|
disposed = true;
|
|
#if DEBUG_STREAMS
|
|
this.DebugDispose(typeof(CBZip2OutputStream));
|
|
#endif
|
|
Dispose();
|
|
bsStream?.Dispose();
|
|
bsStream = null;
|
|
}
|
|
}
|
|
|
|
public void Finish()
|
|
{
|
|
if (finished)
|
|
{
|
|
return;
|
|
}
|
|
|
|
if (runLength > 0)
|
|
{
|
|
WriteRun();
|
|
}
|
|
currentChar = -1;
|
|
EndBlock();
|
|
EndCompression();
|
|
finished = true;
|
|
Flush();
|
|
}
|
|
|
|
public override void Flush() => bsStream.Flush();
|
|
|
|
private int blockCRC,
|
|
combinedCRC;
|
|
|
|
private void Initialize()
|
|
{
|
|
bytesOut = 0;
|
|
nBlocksRandomised = 0;
|
|
|
|
/* Write `magic' bytes h indicating file-format == huffmanised,
|
|
followed by a digit indicating blockSize100k.
|
|
*/
|
|
BsPutUChar('h');
|
|
BsPutUChar('0' + blockSize100k);
|
|
|
|
combinedCRC = 0;
|
|
}
|
|
|
|
private int allowableBlockSize;
|
|
|
|
private void InitBlock()
|
|
{
|
|
// blockNo++;
|
|
mCrc.InitialiseCRC();
|
|
last = -1;
|
|
|
|
// ch = 0;
|
|
|
|
for (var i = 0; i < 256; i++)
|
|
{
|
|
inUse[i] = false;
|
|
}
|
|
|
|
/* 20 is just a paranoia constant */
|
|
allowableBlockSize = (BZip2Constants.baseBlockSize * blockSize100k) - 20;
|
|
}
|
|
|
|
private void EndBlock()
|
|
{
|
|
blockCRC = mCrc.GetFinalCRC();
|
|
combinedCRC = (combinedCRC << 1) | (int)(((uint)combinedCRC) >> 31);
|
|
combinedCRC ^= blockCRC;
|
|
|
|
/* sort the block and establish posn of original string */
|
|
DoReversibleTransformation();
|
|
|
|
/*
|
|
A 6-byte block header, the value chosen arbitrarily
|
|
as 0x314159265359 :-). A 32 bit value does not really
|
|
give a strong enough guarantee that the value will not
|
|
appear by chance in the compressed datastream. Worst-case
|
|
probability of this event, for a 900k block, is about
|
|
2.0e-3 for 32 bits, 1.0e-5 for 40 bits and 4.0e-8 for 48 bits.
|
|
For a compressed file of size 100Gb -- about 100000 blocks --
|
|
only a 48-bit marker will do. NB: normal compression/
|
|
decompression do *not* rely on these statistical properties.
|
|
They are only important when trying to recover blocks from
|
|
damaged files.
|
|
*/
|
|
BsPutUChar(0x31);
|
|
BsPutUChar(0x41);
|
|
BsPutUChar(0x59);
|
|
BsPutUChar(0x26);
|
|
BsPutUChar(0x53);
|
|
BsPutUChar(0x59);
|
|
|
|
/* Now the block's CRC, so it is in a known place. */
|
|
BsPutint(blockCRC);
|
|
|
|
/* Now a single bit indicating randomisation. */
|
|
if (blockRandomised)
|
|
{
|
|
BsW(1, 1);
|
|
nBlocksRandomised++;
|
|
}
|
|
else
|
|
{
|
|
BsW(1, 0);
|
|
}
|
|
|
|
/* Finally, block's contents proper. */
|
|
MoveToFrontCodeAndSend();
|
|
}
|
|
|
|
private void EndCompression()
|
|
{
|
|
/*
|
|
Now another magic 48-bit number, 0x177245385090, to
|
|
indicate the end of the last block. (Sqrt(pi), if
|
|
you want to know. I did want to use e, but it contains
|
|
too much repetition -- 27 18 28 18 28 46 -- for me
|
|
to feel statistically comfortable. Call me paranoid.)
|
|
*/
|
|
BsPutUChar(0x17);
|
|
BsPutUChar(0x72);
|
|
BsPutUChar(0x45);
|
|
BsPutUChar(0x38);
|
|
BsPutUChar(0x50);
|
|
BsPutUChar(0x90);
|
|
|
|
BsPutint(combinedCRC);
|
|
|
|
BsFinishedWithStream();
|
|
}
|
|
|
|
private void HbAssignCodes(int[] code, char[] length, int minLen, int maxLen, int alphaSize)
|
|
{
|
|
int n,
|
|
vec,
|
|
i;
|
|
|
|
vec = 0;
|
|
for (n = minLen; n <= maxLen; n++)
|
|
{
|
|
for (i = 0; i < alphaSize; i++)
|
|
{
|
|
if (length[i] == n)
|
|
{
|
|
code[i] = vec;
|
|
vec++;
|
|
}
|
|
}
|
|
;
|
|
vec <<= 1;
|
|
}
|
|
}
|
|
|
|
private void BsSetStream(Stream f)
|
|
{
|
|
bsStream = f;
|
|
bsLive = 0;
|
|
bsBuff = 0;
|
|
bytesOut = 0;
|
|
}
|
|
|
|
private void BsFinishedWithStream()
|
|
{
|
|
while (bsLive > 0)
|
|
{
|
|
var ch = (bsBuff >> 24);
|
|
bsStream.WriteByte((byte)ch); // write 8-bit
|
|
bsBuff <<= 8;
|
|
bsLive -= 8;
|
|
bytesOut++;
|
|
}
|
|
}
|
|
|
|
private void BsW(int n, int v)
|
|
{
|
|
while (bsLive >= 8)
|
|
{
|
|
var ch = (bsBuff >> 24);
|
|
bsStream.WriteByte((byte)ch); // write 8-bit
|
|
bsBuff <<= 8;
|
|
bsLive -= 8;
|
|
bytesOut++;
|
|
}
|
|
bsBuff |= (v << (32 - bsLive - n));
|
|
bsLive += n;
|
|
}
|
|
|
|
private void BsPutUChar(int c) => BsW(8, c);
|
|
|
|
private void BsPutint(int u)
|
|
{
|
|
BsW(8, (u >> 24) & 0xff);
|
|
BsW(8, (u >> 16) & 0xff);
|
|
BsW(8, (u >> 8) & 0xff);
|
|
BsW(8, u & 0xff);
|
|
}
|
|
|
|
private void BsPutIntVS(int numBits, int c) => BsW(numBits, c);
|
|
|
|
private void SendMTFValues()
|
|
{
|
|
var len = CBZip2InputStream.InitCharArray(
|
|
BZip2Constants.N_GROUPS,
|
|
BZip2Constants.MAX_ALPHA_SIZE
|
|
);
|
|
|
|
int v,
|
|
t,
|
|
i,
|
|
j,
|
|
gs,
|
|
ge,
|
|
totc,
|
|
bt,
|
|
bc,
|
|
iter;
|
|
int nSelectors = 0,
|
|
alphaSize,
|
|
minLen,
|
|
maxLen,
|
|
selCtr;
|
|
int nGroups; //, nBytes;
|
|
|
|
alphaSize = nInUse + 2;
|
|
for (t = 0; t < BZip2Constants.N_GROUPS; t++)
|
|
{
|
|
for (v = 0; v < alphaSize; v++)
|
|
{
|
|
len[t][v] = (char)GREATER_ICOST;
|
|
}
|
|
}
|
|
|
|
/* Decide how many coding tables to use */
|
|
if (nMTF <= 0)
|
|
{
|
|
Panic();
|
|
}
|
|
|
|
if (nMTF < 200)
|
|
{
|
|
nGroups = 2;
|
|
}
|
|
else if (nMTF < 600)
|
|
{
|
|
nGroups = 3;
|
|
}
|
|
else if (nMTF < 1200)
|
|
{
|
|
nGroups = 4;
|
|
}
|
|
else if (nMTF < 2400)
|
|
{
|
|
nGroups = 5;
|
|
}
|
|
else
|
|
{
|
|
nGroups = 6;
|
|
}
|
|
|
|
/* Generate an initial set of coding tables */
|
|
{
|
|
int nPart,
|
|
remF,
|
|
tFreq,
|
|
aFreq;
|
|
|
|
nPart = nGroups;
|
|
remF = nMTF;
|
|
gs = 0;
|
|
while (nPart > 0)
|
|
{
|
|
tFreq = remF / nPart;
|
|
ge = gs - 1;
|
|
aFreq = 0;
|
|
while (aFreq < tFreq && ge < alphaSize - 1)
|
|
{
|
|
ge++;
|
|
aFreq += mtfFreq[ge];
|
|
}
|
|
|
|
if (ge > gs && nPart != nGroups && nPart != 1 && ((nGroups - nPart) % 2 == 1))
|
|
{
|
|
aFreq -= mtfFreq[ge];
|
|
ge--;
|
|
}
|
|
|
|
for (v = 0; v < alphaSize; v++)
|
|
{
|
|
if (v >= gs && v <= ge)
|
|
{
|
|
len[nPart - 1][v] = (char)LESSER_ICOST;
|
|
}
|
|
else
|
|
{
|
|
len[nPart - 1][v] = (char)GREATER_ICOST;
|
|
}
|
|
}
|
|
|
|
nPart--;
|
|
gs = ge + 1;
|
|
remF -= aFreq;
|
|
}
|
|
}
|
|
|
|
var rfreq = CBZip2InputStream.InitIntArray(
|
|
BZip2Constants.N_GROUPS,
|
|
BZip2Constants.MAX_ALPHA_SIZE
|
|
);
|
|
var fave = new int[BZip2Constants.N_GROUPS];
|
|
var cost = new short[BZip2Constants.N_GROUPS];
|
|
/*
|
|
Iterate up to N_ITERS times to improve the tables.
|
|
*/
|
|
for (iter = 0; iter < BZip2Constants.N_ITERS; iter++)
|
|
{
|
|
for (t = 0; t < nGroups; t++)
|
|
{
|
|
fave[t] = 0;
|
|
}
|
|
|
|
for (t = 0; t < nGroups; t++)
|
|
{
|
|
for (v = 0; v < alphaSize; v++)
|
|
{
|
|
rfreq[t][v] = 0;
|
|
}
|
|
}
|
|
|
|
nSelectors = 0;
|
|
totc = 0;
|
|
gs = 0;
|
|
while (true)
|
|
{
|
|
/* Set group start & end marks. */
|
|
if (gs >= nMTF)
|
|
{
|
|
break;
|
|
}
|
|
ge = gs + BZip2Constants.G_SIZE - 1;
|
|
if (ge >= nMTF)
|
|
{
|
|
ge = nMTF - 1;
|
|
}
|
|
|
|
/*
|
|
Calculate the cost of this group as coded
|
|
by each of the coding tables.
|
|
*/
|
|
for (t = 0; t < nGroups; t++)
|
|
{
|
|
cost[t] = 0;
|
|
}
|
|
|
|
if (nGroups == 6)
|
|
{
|
|
short cost0,
|
|
cost1,
|
|
cost2,
|
|
cost3,
|
|
cost4,
|
|
cost5;
|
|
cost0 = cost1 = cost2 = cost3 = cost4 = cost5 = 0;
|
|
for (i = gs; i <= ge; i++)
|
|
{
|
|
var icv = szptr[i];
|
|
cost0 += (short)len[0][icv];
|
|
cost1 += (short)len[1][icv];
|
|
cost2 += (short)len[2][icv];
|
|
cost3 += (short)len[3][icv];
|
|
cost4 += (short)len[4][icv];
|
|
cost5 += (short)len[5][icv];
|
|
}
|
|
cost[0] = cost0;
|
|
cost[1] = cost1;
|
|
cost[2] = cost2;
|
|
cost[3] = cost3;
|
|
cost[4] = cost4;
|
|
cost[5] = cost5;
|
|
}
|
|
else
|
|
{
|
|
for (i = gs; i <= ge; i++)
|
|
{
|
|
var icv = szptr[i];
|
|
for (t = 0; t < nGroups; t++)
|
|
{
|
|
cost[t] += (short)len[t][icv];
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
Find the coding table which is best for this group,
|
|
and record its identity in the selector table.
|
|
*/
|
|
bc = 999999999;
|
|
bt = -1;
|
|
for (t = 0; t < nGroups; t++)
|
|
{
|
|
if (cost[t] < bc)
|
|
{
|
|
bc = cost[t];
|
|
bt = t;
|
|
}
|
|
}
|
|
;
|
|
totc += bc;
|
|
fave[bt]++;
|
|
selector[nSelectors] = (char)bt;
|
|
nSelectors++;
|
|
|
|
/*
|
|
Increment the symbol frequencies for the selected table.
|
|
*/
|
|
for (i = gs; i <= ge; i++)
|
|
{
|
|
rfreq[bt][szptr[i]]++;
|
|
}
|
|
|
|
gs = ge + 1;
|
|
}
|
|
|
|
/*
|
|
Recompute the tables based on the accumulated frequencies.
|
|
*/
|
|
for (t = 0; t < nGroups; t++)
|
|
{
|
|
HbMakeCodeLengths(len[t], rfreq[t], alphaSize, 20);
|
|
}
|
|
}
|
|
|
|
rfreq = null;
|
|
fave = null;
|
|
cost = null;
|
|
|
|
if (!(nGroups < 8))
|
|
{
|
|
Panic();
|
|
}
|
|
if (!(nSelectors < 32768 && nSelectors <= (2 + (900000 / BZip2Constants.G_SIZE))))
|
|
{
|
|
Panic();
|
|
}
|
|
|
|
/* Compute MTF values for the selectors. */
|
|
{
|
|
var pos = new char[BZip2Constants.N_GROUPS];
|
|
char ll_i,
|
|
tmp2,
|
|
tmp;
|
|
for (i = 0; i < nGroups; i++)
|
|
{
|
|
pos[i] = (char)i;
|
|
}
|
|
for (i = 0; i < nSelectors; i++)
|
|
{
|
|
ll_i = selector[i];
|
|
j = 0;
|
|
tmp = pos[j];
|
|
while (ll_i != tmp)
|
|
{
|
|
j++;
|
|
tmp2 = tmp;
|
|
tmp = pos[j];
|
|
pos[j] = tmp2;
|
|
}
|
|
pos[0] = tmp;
|
|
selectorMtf[i] = (char)j;
|
|
}
|
|
}
|
|
|
|
var code = CBZip2InputStream.InitIntArray(
|
|
BZip2Constants.N_GROUPS,
|
|
BZip2Constants.MAX_ALPHA_SIZE
|
|
);
|
|
|
|
/* Assign actual codes for the tables. */
|
|
for (t = 0; t < nGroups; t++)
|
|
{
|
|
minLen = 32;
|
|
maxLen = 0;
|
|
for (i = 0; i < alphaSize; i++)
|
|
{
|
|
if (len[t][i] > maxLen)
|
|
{
|
|
maxLen = len[t][i];
|
|
}
|
|
if (len[t][i] < minLen)
|
|
{
|
|
minLen = len[t][i];
|
|
}
|
|
}
|
|
if (maxLen > 20)
|
|
{
|
|
Panic();
|
|
}
|
|
if (minLen < 1)
|
|
{
|
|
Panic();
|
|
}
|
|
HbAssignCodes(code[t], len[t], minLen, maxLen, alphaSize);
|
|
}
|
|
|
|
/* Transmit the mapping table. */
|
|
{
|
|
var inUse16 = new bool[16];
|
|
for (i = 0; i < 16; i++)
|
|
{
|
|
inUse16[i] = false;
|
|
for (j = 0; j < 16; j++)
|
|
{
|
|
if (inUse[(i * 16) + j])
|
|
{
|
|
inUse16[i] = true;
|
|
}
|
|
}
|
|
}
|
|
|
|
//nBytes = bytesOut;
|
|
for (i = 0; i < 16; i++)
|
|
{
|
|
if (inUse16[i])
|
|
{
|
|
BsW(1, 1);
|
|
}
|
|
else
|
|
{
|
|
BsW(1, 0);
|
|
}
|
|
}
|
|
|
|
for (i = 0; i < 16; i++)
|
|
{
|
|
if (inUse16[i])
|
|
{
|
|
for (j = 0; j < 16; j++)
|
|
{
|
|
if (inUse[(i * 16) + j])
|
|
{
|
|
BsW(1, 1);
|
|
}
|
|
else
|
|
{
|
|
BsW(1, 0);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/* Now the selectors. */
|
|
//nBytes = bytesOut;
|
|
BsW(3, nGroups);
|
|
BsW(15, nSelectors);
|
|
for (i = 0; i < nSelectors; i++)
|
|
{
|
|
for (j = 0; j < selectorMtf[i]; j++)
|
|
{
|
|
BsW(1, 1);
|
|
}
|
|
BsW(1, 0);
|
|
}
|
|
|
|
/* Now the coding tables. */
|
|
//nBytes = bytesOut;
|
|
|
|
for (t = 0; t < nGroups; t++)
|
|
{
|
|
int curr = len[t][0];
|
|
BsW(5, curr);
|
|
for (i = 0; i < alphaSize; i++)
|
|
{
|
|
while (curr < len[t][i])
|
|
{
|
|
BsW(2, 2);
|
|
curr++; /* 10 */
|
|
}
|
|
while (curr > len[t][i])
|
|
{
|
|
BsW(2, 3);
|
|
curr--; /* 11 */
|
|
}
|
|
BsW(1, 0);
|
|
}
|
|
}
|
|
|
|
/* And finally, the block data proper */
|
|
//nBytes = bytesOut;
|
|
selCtr = 0;
|
|
gs = 0;
|
|
while (true)
|
|
{
|
|
if (gs >= nMTF)
|
|
{
|
|
break;
|
|
}
|
|
ge = gs + BZip2Constants.G_SIZE - 1;
|
|
if (ge >= nMTF)
|
|
{
|
|
ge = nMTF - 1;
|
|
}
|
|
for (i = gs; i <= ge; i++)
|
|
{
|
|
BsW(len[selector[selCtr]][szptr[i]], code[selector[selCtr]][szptr[i]]);
|
|
}
|
|
|
|
gs = ge + 1;
|
|
selCtr++;
|
|
}
|
|
if (!(selCtr == nSelectors))
|
|
{
|
|
Panic();
|
|
}
|
|
}
|
|
|
|
private void MoveToFrontCodeAndSend()
|
|
{
|
|
BsPutIntVS(24, origPtr);
|
|
GenerateMTFValues();
|
|
SendMTFValues();
|
|
}
|
|
|
|
private Stream bsStream;
|
|
|
|
private void SimpleSort(int lo, int hi, int d)
|
|
{
|
|
int i,
|
|
j,
|
|
h,
|
|
bigN,
|
|
hp;
|
|
int v;
|
|
|
|
bigN = hi - lo + 1;
|
|
if (bigN < 2)
|
|
{
|
|
return;
|
|
}
|
|
|
|
hp = 0;
|
|
while (incs[hp] < bigN)
|
|
{
|
|
hp++;
|
|
}
|
|
hp--;
|
|
|
|
for (; hp >= 0; hp--)
|
|
{
|
|
h = incs[hp];
|
|
|
|
i = lo + h;
|
|
while (true)
|
|
{
|
|
/* copy 1 */
|
|
if (i > hi)
|
|
{
|
|
break;
|
|
}
|
|
v = zptr[i];
|
|
j = i;
|
|
while (FullGtU(zptr[j - h] + d, v + d))
|
|
{
|
|
zptr[j] = zptr[j - h];
|
|
j -= h;
|
|
if (j <= (lo + h - 1))
|
|
{
|
|
break;
|
|
}
|
|
}
|
|
zptr[j] = v;
|
|
i++;
|
|
|
|
/* copy 2 */
|
|
if (i > hi)
|
|
{
|
|
break;
|
|
}
|
|
v = zptr[i];
|
|
j = i;
|
|
while (FullGtU(zptr[j - h] + d, v + d))
|
|
{
|
|
zptr[j] = zptr[j - h];
|
|
j -= h;
|
|
if (j <= (lo + h - 1))
|
|
{
|
|
break;
|
|
}
|
|
}
|
|
zptr[j] = v;
|
|
i++;
|
|
|
|
/* copy 3 */
|
|
if (i > hi)
|
|
{
|
|
break;
|
|
}
|
|
v = zptr[i];
|
|
j = i;
|
|
while (FullGtU(zptr[j - h] + d, v + d))
|
|
{
|
|
zptr[j] = zptr[j - h];
|
|
j -= h;
|
|
if (j <= (lo + h - 1))
|
|
{
|
|
break;
|
|
}
|
|
}
|
|
zptr[j] = v;
|
|
i++;
|
|
|
|
if (workDone > workLimit && firstAttempt)
|
|
{
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
private void Vswap(int p1, int p2, int n)
|
|
{
|
|
var temp = 0;
|
|
while (n > 0)
|
|
{
|
|
temp = zptr[p1];
|
|
zptr[p1] = zptr[p2];
|
|
zptr[p2] = temp;
|
|
p1++;
|
|
p2++;
|
|
n--;
|
|
}
|
|
}
|
|
|
|
private char Med3(char a, char b, char c)
|
|
{
|
|
char t;
|
|
if (a > b)
|
|
{
|
|
t = a;
|
|
a = b;
|
|
b = t;
|
|
}
|
|
if (b > c)
|
|
{
|
|
t = b;
|
|
b = c;
|
|
c = t;
|
|
}
|
|
if (a > b)
|
|
{
|
|
b = a;
|
|
}
|
|
return b;
|
|
}
|
|
|
|
internal class StackElem
|
|
{
|
|
internal int ll;
|
|
internal int hh;
|
|
internal int dd;
|
|
}
|
|
|
|
private void QSort3(int loSt, int hiSt, int dSt)
|
|
{
|
|
int unLo,
|
|
unHi,
|
|
ltLo,
|
|
gtHi,
|
|
med,
|
|
n,
|
|
m;
|
|
int sp,
|
|
lo,
|
|
hi,
|
|
d;
|
|
var stack = new StackElem[QSORT_STACK_SIZE];
|
|
for (var count = 0; count < QSORT_STACK_SIZE; count++)
|
|
{
|
|
stack[count] = new StackElem();
|
|
}
|
|
|
|
sp = 0;
|
|
|
|
stack[sp].ll = loSt;
|
|
stack[sp].hh = hiSt;
|
|
stack[sp].dd = dSt;
|
|
sp++;
|
|
|
|
while (sp > 0)
|
|
{
|
|
if (sp >= QSORT_STACK_SIZE)
|
|
{
|
|
Panic();
|
|
}
|
|
|
|
sp--;
|
|
lo = stack[sp].ll;
|
|
hi = stack[sp].hh;
|
|
d = stack[sp].dd;
|
|
|
|
if (hi - lo < SMALL_THRESH || d > DEPTH_THRESH)
|
|
{
|
|
SimpleSort(lo, hi, d);
|
|
if (workDone > workLimit && firstAttempt)
|
|
{
|
|
return;
|
|
}
|
|
continue;
|
|
}
|
|
|
|
med = Med3(
|
|
block[zptr[lo] + d + 1],
|
|
block[zptr[hi] + d + 1],
|
|
block[zptr[(lo + hi) >> 1] + d + 1]
|
|
);
|
|
|
|
unLo = ltLo = lo;
|
|
unHi = gtHi = hi;
|
|
|
|
while (true)
|
|
{
|
|
while (true)
|
|
{
|
|
if (unLo > unHi)
|
|
{
|
|
break;
|
|
}
|
|
n = block[zptr[unLo] + d + 1] - med;
|
|
if (n == 0)
|
|
{
|
|
var temp = 0;
|
|
temp = zptr[unLo];
|
|
zptr[unLo] = zptr[ltLo];
|
|
zptr[ltLo] = temp;
|
|
ltLo++;
|
|
unLo++;
|
|
continue;
|
|
}
|
|
;
|
|
if (n > 0)
|
|
{
|
|
break;
|
|
}
|
|
unLo++;
|
|
}
|
|
while (true)
|
|
{
|
|
if (unLo > unHi)
|
|
{
|
|
break;
|
|
}
|
|
n = block[zptr[unHi] + d + 1] - med;
|
|
if (n == 0)
|
|
{
|
|
var temp = 0;
|
|
temp = zptr[unHi];
|
|
zptr[unHi] = zptr[gtHi];
|
|
zptr[gtHi] = temp;
|
|
gtHi--;
|
|
unHi--;
|
|
continue;
|
|
}
|
|
;
|
|
if (n < 0)
|
|
{
|
|
break;
|
|
}
|
|
unHi--;
|
|
}
|
|
if (unLo > unHi)
|
|
{
|
|
break;
|
|
}
|
|
var tempx = zptr[unLo];
|
|
zptr[unLo] = zptr[unHi];
|
|
zptr[unHi] = tempx;
|
|
unLo++;
|
|
unHi--;
|
|
}
|
|
|
|
if (gtHi < ltLo)
|
|
{
|
|
stack[sp].ll = lo;
|
|
stack[sp].hh = hi;
|
|
stack[sp].dd = d + 1;
|
|
sp++;
|
|
continue;
|
|
}
|
|
|
|
n = ((ltLo - lo) < (unLo - ltLo)) ? (ltLo - lo) : (unLo - ltLo);
|
|
Vswap(lo, unLo - n, n);
|
|
m = ((hi - gtHi) < (gtHi - unHi)) ? (hi - gtHi) : (gtHi - unHi);
|
|
Vswap(unLo, hi - m + 1, m);
|
|
|
|
n = lo + unLo - ltLo - 1;
|
|
m = hi - (gtHi - unHi) + 1;
|
|
|
|
stack[sp].ll = lo;
|
|
stack[sp].hh = n;
|
|
stack[sp].dd = d;
|
|
sp++;
|
|
|
|
stack[sp].ll = n + 1;
|
|
stack[sp].hh = m - 1;
|
|
stack[sp].dd = d + 1;
|
|
sp++;
|
|
|
|
stack[sp].ll = m;
|
|
stack[sp].hh = hi;
|
|
stack[sp].dd = d;
|
|
sp++;
|
|
}
|
|
}
|
|
|
|
private void MainSort()
|
|
{
|
|
int i,
|
|
j,
|
|
ss,
|
|
sb;
|
|
Span<int> runningOrder = stackalloc int[256];
|
|
Span<int> copy = stackalloc int[256];
|
|
var bigDone = new bool[256];
|
|
int c1,
|
|
c2;
|
|
int numQSorted;
|
|
|
|
/*
|
|
In the various block-sized structures, live data runs
|
|
from 0 to last+NUM_OVERSHOOT_BYTES inclusive. First,
|
|
set up the overshoot area for block.
|
|
*/
|
|
|
|
// if (verbosity >= 4) fprintf ( stderr, " sort initialise ...\n" );
|
|
for (i = 0; i < BZip2Constants.NUM_OVERSHOOT_BYTES; i++)
|
|
{
|
|
block[last + i + 2] = block[(i % (last + 1)) + 1];
|
|
}
|
|
for (i = 0; i <= last + BZip2Constants.NUM_OVERSHOOT_BYTES; i++)
|
|
{
|
|
quadrant[i] = 0;
|
|
}
|
|
|
|
block[0] = block[last + 1];
|
|
|
|
if (last < 4000)
|
|
{
|
|
/*
|
|
Use SimpleSort(), since the full sorting mechanism
|
|
has quite a large constant overhead.
|
|
*/
|
|
for (i = 0; i <= last; i++)
|
|
{
|
|
zptr[i] = i;
|
|
}
|
|
firstAttempt = false;
|
|
workDone = workLimit = 0;
|
|
SimpleSort(0, last, 0);
|
|
}
|
|
else
|
|
{
|
|
numQSorted = 0;
|
|
for (i = 0; i <= 255; i++)
|
|
{
|
|
bigDone[i] = false;
|
|
}
|
|
|
|
for (i = 0; i <= 65536; i++)
|
|
{
|
|
ftab[i] = 0;
|
|
}
|
|
|
|
c1 = block[0];
|
|
for (i = 0; i <= last; i++)
|
|
{
|
|
c2 = block[i + 1];
|
|
ftab[(c1 << 8) + c2]++;
|
|
c1 = c2;
|
|
}
|
|
|
|
for (i = 1; i <= 65536; i++)
|
|
{
|
|
ftab[i] += ftab[i - 1];
|
|
}
|
|
|
|
c1 = block[1];
|
|
for (i = 0; i < last; i++)
|
|
{
|
|
c2 = block[i + 2];
|
|
j = (c1 << 8) + c2;
|
|
c1 = c2;
|
|
ftab[j]--;
|
|
zptr[ftab[j]] = i;
|
|
}
|
|
|
|
j = ((block[last + 1]) << 8) + (block[1]);
|
|
ftab[j]--;
|
|
zptr[ftab[j]] = last;
|
|
|
|
/*
|
|
Now ftab contains the first loc of every small bucket.
|
|
Calculate the running order, from smallest to largest
|
|
big bucket.
|
|
*/
|
|
|
|
for (i = 0; i <= 255; i++)
|
|
{
|
|
runningOrder[i] = i;
|
|
}
|
|
|
|
{
|
|
int vv;
|
|
var h = 1;
|
|
do
|
|
{
|
|
h = (3 * h) + 1;
|
|
} while (h <= 256);
|
|
do
|
|
{
|
|
h /= 3;
|
|
for (i = h; i <= 255; i++)
|
|
{
|
|
vv = runningOrder[i];
|
|
j = i;
|
|
while (
|
|
(
|
|
ftab[((runningOrder[j - h]) + 1) << 8]
|
|
- ftab[(runningOrder[j - h]) << 8]
|
|
) > (ftab[((vv) + 1) << 8] - ftab[(vv) << 8])
|
|
)
|
|
{
|
|
runningOrder[j] = runningOrder[j - h];
|
|
j -= h;
|
|
if (j <= (h - 1))
|
|
{
|
|
break;
|
|
}
|
|
}
|
|
runningOrder[j] = vv;
|
|
}
|
|
} while (h != 1);
|
|
}
|
|
|
|
/*
|
|
The main sorting loop.
|
|
*/
|
|
for (i = 0; i <= 255; i++)
|
|
{
|
|
/*
|
|
Process big buckets, starting with the least full.
|
|
*/
|
|
ss = runningOrder[i];
|
|
|
|
/*
|
|
Complete the big bucket [ss] by quicksorting
|
|
any unsorted small buckets [ss, j]. Hopefully
|
|
previous pointer-scanning phases have already
|
|
completed many of the small buckets [ss, j], so
|
|
we don't have to sort them at all.
|
|
*/
|
|
for (j = 0; j <= 255; j++)
|
|
{
|
|
sb = (ss << 8) + j;
|
|
if (!((ftab[sb] & SETMASK) == SETMASK))
|
|
{
|
|
var lo = ftab[sb] & CLEARMASK;
|
|
var hi = (ftab[sb + 1] & CLEARMASK) - 1;
|
|
if (hi > lo)
|
|
{
|
|
QSort3(lo, hi, 2);
|
|
numQSorted += (hi - lo + 1);
|
|
if (workDone > workLimit && firstAttempt)
|
|
{
|
|
return;
|
|
}
|
|
}
|
|
ftab[sb] |= SETMASK;
|
|
}
|
|
}
|
|
|
|
/*
|
|
The ss big bucket is now done. Record this fact,
|
|
and update the quadrant descriptors. Remember to
|
|
update quadrants in the overshoot area too, if
|
|
necessary. The "if (i < 255)" test merely skips
|
|
this updating for the last bucket processed, since
|
|
updating for the last bucket is pointless.
|
|
*/
|
|
bigDone[ss] = true;
|
|
|
|
if (i < 255)
|
|
{
|
|
var bbStart = ftab[ss << 8] & CLEARMASK;
|
|
var bbSize = (ftab[(ss + 1) << 8] & CLEARMASK) - bbStart;
|
|
var shifts = 0;
|
|
|
|
while ((bbSize >> shifts) > 65534)
|
|
{
|
|
shifts++;
|
|
}
|
|
|
|
for (j = 0; j < bbSize; j++)
|
|
{
|
|
var a2update = zptr[bbStart + j];
|
|
var qVal = (j >> shifts);
|
|
quadrant[a2update] = qVal;
|
|
if (a2update < BZip2Constants.NUM_OVERSHOOT_BYTES)
|
|
{
|
|
quadrant[a2update + last + 1] = qVal;
|
|
}
|
|
}
|
|
|
|
if (!(((bbSize - 1) >> shifts) <= 65535))
|
|
{
|
|
Panic();
|
|
}
|
|
}
|
|
|
|
/*
|
|
Now scan this big bucket so as to synthesise the
|
|
sorted order for small buckets [t, ss] for all t != ss.
|
|
*/
|
|
for (j = 0; j <= 255; j++)
|
|
{
|
|
copy[j] = ftab[(j << 8) + ss] & CLEARMASK;
|
|
}
|
|
|
|
for (j = ftab[ss << 8] & CLEARMASK; j < (ftab[(ss + 1) << 8] & CLEARMASK); j++)
|
|
{
|
|
c1 = block[zptr[j]];
|
|
if (!bigDone[c1])
|
|
{
|
|
zptr[copy[c1]] = zptr[j] == 0 ? last : zptr[j] - 1;
|
|
copy[c1]++;
|
|
}
|
|
}
|
|
|
|
for (j = 0; j <= 255; j++)
|
|
{
|
|
ftab[(j << 8) + ss] |= SETMASK;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
private void RandomiseBlock()
|
|
{
|
|
int i;
|
|
var rNToGo = 0;
|
|
var rTPos = 0;
|
|
for (i = 0; i < 256; i++)
|
|
{
|
|
inUse[i] = false;
|
|
}
|
|
|
|
for (i = 0; i <= last; i++)
|
|
{
|
|
if (rNToGo == 0)
|
|
{
|
|
rNToGo = (char)BZip2Constants.rNums[rTPos];
|
|
rTPos++;
|
|
if (rTPos == 512)
|
|
{
|
|
rTPos = 0;
|
|
}
|
|
}
|
|
rNToGo--;
|
|
block[i + 1] ^= (char)((rNToGo == 1) ? 1 : 0);
|
|
|
|
// handle 16 bit signed numbers
|
|
block[i + 1] &= (char)0xFF;
|
|
|
|
inUse[block[i + 1]] = true;
|
|
}
|
|
}
|
|
|
|
private void DoReversibleTransformation()
|
|
{
|
|
int i;
|
|
|
|
workLimit = workFactor * last;
|
|
workDone = 0;
|
|
blockRandomised = false;
|
|
firstAttempt = true;
|
|
|
|
MainSort();
|
|
|
|
if (workDone > workLimit && firstAttempt)
|
|
{
|
|
RandomiseBlock();
|
|
workLimit = workDone = 0;
|
|
blockRandomised = true;
|
|
firstAttempt = false;
|
|
MainSort();
|
|
}
|
|
|
|
origPtr = -1;
|
|
for (i = 0; i <= last; i++)
|
|
{
|
|
if (zptr[i] == 0)
|
|
{
|
|
origPtr = i;
|
|
break;
|
|
}
|
|
}
|
|
;
|
|
|
|
if (origPtr == -1)
|
|
{
|
|
Panic();
|
|
}
|
|
}
|
|
|
|
private bool FullGtU(int i1, int i2)
|
|
{
|
|
int k;
|
|
char c1,
|
|
c2;
|
|
int s1,
|
|
s2;
|
|
|
|
c1 = block[i1 + 1];
|
|
c2 = block[i2 + 1];
|
|
if (c1 != c2)
|
|
{
|
|
return (c1 > c2);
|
|
}
|
|
i1++;
|
|
i2++;
|
|
|
|
c1 = block[i1 + 1];
|
|
c2 = block[i2 + 1];
|
|
if (c1 != c2)
|
|
{
|
|
return (c1 > c2);
|
|
}
|
|
i1++;
|
|
i2++;
|
|
|
|
c1 = block[i1 + 1];
|
|
c2 = block[i2 + 1];
|
|
if (c1 != c2)
|
|
{
|
|
return (c1 > c2);
|
|
}
|
|
i1++;
|
|
i2++;
|
|
|
|
c1 = block[i1 + 1];
|
|
c2 = block[i2 + 1];
|
|
if (c1 != c2)
|
|
{
|
|
return (c1 > c2);
|
|
}
|
|
i1++;
|
|
i2++;
|
|
|
|
c1 = block[i1 + 1];
|
|
c2 = block[i2 + 1];
|
|
if (c1 != c2)
|
|
{
|
|
return (c1 > c2);
|
|
}
|
|
i1++;
|
|
i2++;
|
|
|
|
c1 = block[i1 + 1];
|
|
c2 = block[i2 + 1];
|
|
if (c1 != c2)
|
|
{
|
|
return (c1 > c2);
|
|
}
|
|
i1++;
|
|
i2++;
|
|
|
|
k = last + 1;
|
|
|
|
do
|
|
{
|
|
c1 = block[i1 + 1];
|
|
c2 = block[i2 + 1];
|
|
if (c1 != c2)
|
|
{
|
|
return (c1 > c2);
|
|
}
|
|
s1 = quadrant[i1];
|
|
s2 = quadrant[i2];
|
|
if (s1 != s2)
|
|
{
|
|
return (s1 > s2);
|
|
}
|
|
i1++;
|
|
i2++;
|
|
|
|
c1 = block[i1 + 1];
|
|
c2 = block[i2 + 1];
|
|
if (c1 != c2)
|
|
{
|
|
return (c1 > c2);
|
|
}
|
|
s1 = quadrant[i1];
|
|
s2 = quadrant[i2];
|
|
if (s1 != s2)
|
|
{
|
|
return (s1 > s2);
|
|
}
|
|
i1++;
|
|
i2++;
|
|
|
|
c1 = block[i1 + 1];
|
|
c2 = block[i2 + 1];
|
|
if (c1 != c2)
|
|
{
|
|
return (c1 > c2);
|
|
}
|
|
s1 = quadrant[i1];
|
|
s2 = quadrant[i2];
|
|
if (s1 != s2)
|
|
{
|
|
return (s1 > s2);
|
|
}
|
|
i1++;
|
|
i2++;
|
|
|
|
c1 = block[i1 + 1];
|
|
c2 = block[i2 + 1];
|
|
if (c1 != c2)
|
|
{
|
|
return (c1 > c2);
|
|
}
|
|
s1 = quadrant[i1];
|
|
s2 = quadrant[i2];
|
|
if (s1 != s2)
|
|
{
|
|
return (s1 > s2);
|
|
}
|
|
i1++;
|
|
i2++;
|
|
|
|
if (i1 > last)
|
|
{
|
|
i1 -= last;
|
|
i1--;
|
|
}
|
|
;
|
|
if (i2 > last)
|
|
{
|
|
i2 -= last;
|
|
i2--;
|
|
}
|
|
;
|
|
|
|
k -= 4;
|
|
workDone++;
|
|
} while (k >= 0);
|
|
|
|
return false;
|
|
}
|
|
|
|
/*
|
|
Knuth's increments seem to work better
|
|
than Incerpi-Sedgewick here. Possibly
|
|
because the number of elems to sort is
|
|
usually small, typically <= 20.
|
|
*/
|
|
|
|
private readonly int[] incs =
|
|
{
|
|
1,
|
|
4,
|
|
13,
|
|
40,
|
|
121,
|
|
364,
|
|
1093,
|
|
3280,
|
|
9841,
|
|
29524,
|
|
88573,
|
|
265720,
|
|
797161,
|
|
2391484,
|
|
};
|
|
|
|
private void AllocateCompressStructures()
|
|
{
|
|
var n = BZip2Constants.baseBlockSize * blockSize100k;
|
|
block = new char[(n + 1 + BZip2Constants.NUM_OVERSHOOT_BYTES)];
|
|
quadrant = new int[(n + BZip2Constants.NUM_OVERSHOOT_BYTES)];
|
|
zptr = new int[n];
|
|
ftab = new int[65537];
|
|
|
|
if (block is null || quadrant is null || zptr is null || ftab is null)
|
|
{
|
|
//int totalDraw = (n + 1 + NUM_OVERSHOOT_BYTES) + (n + NUM_OVERSHOOT_BYTES) + n + 65537;
|
|
//compressOutOfMemory ( totalDraw, n );
|
|
}
|
|
|
|
/*
|
|
The back end needs a place to store the MTF values
|
|
whilst it calculates the coding tables. We could
|
|
put them in the zptr array. However, these values
|
|
will fit in a short, so we overlay szptr at the
|
|
start of zptr, in the hope of reducing the number
|
|
of cache misses induced by the multiple traversals
|
|
of the MTF values when calculating coding tables.
|
|
Seems to improve compression speed by about 1%.
|
|
*/
|
|
// szptr = zptr;
|
|
|
|
szptr = new short[2 * n];
|
|
}
|
|
|
|
private void GenerateMTFValues()
|
|
{
|
|
var yy = new char[256];
|
|
int i,
|
|
j;
|
|
char tmp;
|
|
char tmp2;
|
|
int zPend;
|
|
int wr;
|
|
int EOB;
|
|
|
|
MakeMaps();
|
|
EOB = nInUse + 1;
|
|
|
|
for (i = 0; i <= EOB; i++)
|
|
{
|
|
mtfFreq[i] = 0;
|
|
}
|
|
|
|
wr = 0;
|
|
zPend = 0;
|
|
for (i = 0; i < nInUse; i++)
|
|
{
|
|
yy[i] = (char)i;
|
|
}
|
|
|
|
for (i = 0; i <= last; i++)
|
|
{
|
|
char ll_i;
|
|
|
|
ll_i = unseqToSeq[block[zptr[i]]];
|
|
|
|
j = 0;
|
|
tmp = yy[j];
|
|
while (ll_i != tmp)
|
|
{
|
|
j++;
|
|
tmp2 = tmp;
|
|
tmp = yy[j];
|
|
yy[j] = tmp2;
|
|
}
|
|
;
|
|
yy[0] = tmp;
|
|
|
|
if (j == 0)
|
|
{
|
|
zPend++;
|
|
}
|
|
else
|
|
{
|
|
if (zPend > 0)
|
|
{
|
|
zPend--;
|
|
while (true)
|
|
{
|
|
switch (zPend % 2)
|
|
{
|
|
case 0:
|
|
szptr[wr] = BZip2Constants.RUNA;
|
|
wr++;
|
|
mtfFreq[BZip2Constants.RUNA]++;
|
|
break;
|
|
case 1:
|
|
szptr[wr] = BZip2Constants.RUNB;
|
|
wr++;
|
|
mtfFreq[BZip2Constants.RUNB]++;
|
|
break;
|
|
}
|
|
;
|
|
if (zPend < 2)
|
|
{
|
|
break;
|
|
}
|
|
zPend = (zPend - 2) / 2;
|
|
}
|
|
;
|
|
zPend = 0;
|
|
}
|
|
szptr[wr] = (short)(j + 1);
|
|
wr++;
|
|
mtfFreq[j + 1]++;
|
|
}
|
|
}
|
|
|
|
if (zPend > 0)
|
|
{
|
|
zPend--;
|
|
while (true)
|
|
{
|
|
switch (zPend % 2)
|
|
{
|
|
case 0:
|
|
szptr[wr] = BZip2Constants.RUNA;
|
|
wr++;
|
|
mtfFreq[BZip2Constants.RUNA]++;
|
|
break;
|
|
case 1:
|
|
szptr[wr] = BZip2Constants.RUNB;
|
|
wr++;
|
|
mtfFreq[BZip2Constants.RUNB]++;
|
|
break;
|
|
}
|
|
if (zPend < 2)
|
|
{
|
|
break;
|
|
}
|
|
zPend = (zPend - 2) / 2;
|
|
}
|
|
}
|
|
|
|
szptr[wr] = (short)EOB;
|
|
wr++;
|
|
mtfFreq[EOB]++;
|
|
|
|
nMTF = wr;
|
|
}
|
|
|
|
public override int Read(byte[] buffer, int offset, int count) => 0;
|
|
|
|
public override int ReadByte() => -1;
|
|
|
|
public override long Seek(long offset, SeekOrigin origin) => 0;
|
|
|
|
public override void SetLength(long value) { }
|
|
|
|
public override void Write(byte[] buffer, int offset, int count)
|
|
{
|
|
for (var k = 0; k < count; ++k)
|
|
{
|
|
WriteByte(buffer[k + offset]);
|
|
}
|
|
}
|
|
|
|
public override Task WriteAsync(
|
|
byte[] buffer,
|
|
int offset,
|
|
int count,
|
|
CancellationToken cancellationToken = default
|
|
)
|
|
{
|
|
for (var k = 0; k < count; ++k)
|
|
{
|
|
cancellationToken.ThrowIfCancellationRequested();
|
|
WriteByte(buffer[k + offset]);
|
|
}
|
|
return Task.CompletedTask;
|
|
}
|
|
|
|
public override bool CanRead => false;
|
|
|
|
public override bool CanSeek => false;
|
|
|
|
public override bool CanWrite => true;
|
|
|
|
public override long Length => 0;
|
|
|
|
public override long Position
|
|
{
|
|
get => 0;
|
|
set { }
|
|
}
|
|
}
|