mirror of
https://github.com/SabreTools/BinaryObjectScanner.git
synced 2026-02-13 05:35:24 +00:00
1188 lines
39 KiB
C#
1188 lines
39 KiB
C#
using static BinaryObjectScanner.Compression.bzip2.Constants;
|
|
|
|
namespace BinaryObjectScanner.Compression.bzip2
|
|
{
|
|
/// <summary>
|
|
/// Block sorting machinery
|
|
/// </summary>
|
|
/// <see href="https://github.com/ladislav-zezula/StormLib/blob/master/src/bzip2/blocksort.c"/>
|
|
internal static unsafe class blocksort
|
|
{
|
|
/// <summary>
|
|
/// Fallback O(N log(N)^2) sorting algorithm, for repetitive blocks
|
|
/// </summary>
|
|
public static void fallbackSimpleSort(uint* fmap, uint* eclass, int lo, int hi)
|
|
{
|
|
int i, j, tmp;
|
|
uint ec_tmp;
|
|
|
|
if (lo == hi) return;
|
|
|
|
if (hi - lo > 3)
|
|
{
|
|
for (i = hi - 4; i >= lo; i--)
|
|
{
|
|
tmp = (int)fmap[i];
|
|
ec_tmp = eclass[tmp];
|
|
for (j = i + 4; j <= hi && ec_tmp > eclass[fmap[j]]; j += 4)
|
|
fmap[j - 4] = fmap[j];
|
|
fmap[j - 4] = (uint)tmp;
|
|
}
|
|
}
|
|
|
|
for (i = hi - 1; i >= lo; i--)
|
|
{
|
|
tmp = (int)fmap[i];
|
|
ec_tmp = eclass[tmp];
|
|
for (j = i + 1; j <= hi && ec_tmp > eclass[fmap[j]]; j++)
|
|
fmap[j - 1] = fmap[j];
|
|
fmap[j - 1] = (uint)tmp;
|
|
}
|
|
}
|
|
|
|
public static void fallbackQSort3(uint* fmap, uint* eclass, int loSt, int hiSt)
|
|
{
|
|
int unLo, unHi, ltLo, gtHi, n, m;
|
|
int sp, lo = 0, hi = 0;
|
|
uint med, r, r3;
|
|
int[] stackLo = new int[FALLBACK_QSORT_STACK_SIZE];
|
|
int[] stackHi = new int[FALLBACK_QSORT_STACK_SIZE];
|
|
|
|
r = 0;
|
|
|
|
sp = 0;
|
|
fpush(loSt, hiSt, stackLo, stackHi, ref sp);
|
|
|
|
while (sp > 0)
|
|
{
|
|
//AssertH(sp < FALLBACK_QSORT_STACK_SIZE - 1, 1004);
|
|
|
|
fpop(ref lo, ref hi, stackLo, stackHi, ref sp);
|
|
if (hi - lo < FALLBACK_QSORT_SMALL_THRESH)
|
|
{
|
|
fallbackSimpleSort(fmap, eclass, lo, hi);
|
|
continue;
|
|
}
|
|
|
|
/* Random partitioning. Median of 3 sometimes fails to
|
|
avoid bad cases. Median of 9 seems to help but
|
|
looks rather expensive. This too seems to work but
|
|
is cheaper. Guidance for the magic constants
|
|
7621 and 32768 is taken from Sedgewick's algorithms
|
|
book, chapter 35.
|
|
*/
|
|
r = ((r * 7621) + 1) % 32768;
|
|
r3 = r % 3;
|
|
if (r3 == 0)
|
|
med = eclass[fmap[lo]];
|
|
else if (r3 == 1)
|
|
med = eclass[fmap[(lo + hi) >> 1]];
|
|
else
|
|
med = eclass[fmap[hi]];
|
|
|
|
unLo = ltLo = lo;
|
|
unHi = gtHi = hi;
|
|
|
|
while (true)
|
|
{
|
|
while (true)
|
|
{
|
|
if (unLo > unHi) break;
|
|
n = (int)eclass[fmap[unLo]] - (int)med;
|
|
if (n == 0)
|
|
{
|
|
fswap(ref fmap[unLo], ref fmap[ltLo]);
|
|
ltLo++; unLo++;
|
|
continue;
|
|
};
|
|
if (n > 0) break;
|
|
unLo++;
|
|
}
|
|
while (true)
|
|
{
|
|
if (unLo > unHi)
|
|
break;
|
|
|
|
n = (int)eclass[fmap[unHi]] - (int)med;
|
|
if (n == 0)
|
|
{
|
|
fswap(ref fmap[unHi], ref fmap[gtHi]);
|
|
gtHi--; unHi--;
|
|
continue;
|
|
};
|
|
|
|
if (n < 0)
|
|
break;
|
|
|
|
unHi--;
|
|
}
|
|
|
|
if (unLo > unHi)
|
|
break;
|
|
|
|
fswap(ref fmap[unLo], ref fmap[unHi]); unLo++; unHi--;
|
|
}
|
|
|
|
//AssertD(unHi == unLo - 1, "fallbackQSort3(2)");
|
|
|
|
if (gtHi < ltLo) continue;
|
|
|
|
n = fmin(ltLo - lo, unLo - ltLo); fvswap(fmap, lo, unLo - n, n);
|
|
m = fmin(hi - gtHi, gtHi - unHi); fvswap(fmap, unLo, hi - m + 1, m);
|
|
|
|
n = lo + unLo - ltLo - 1;
|
|
m = hi - (gtHi - unHi) + 1;
|
|
|
|
if (n - lo > hi - m)
|
|
{
|
|
fpush(lo, n, stackLo, stackHi, ref sp);
|
|
fpush(m, hi, stackLo, stackHi, ref sp);
|
|
}
|
|
else
|
|
{
|
|
fpush(m, hi, stackLo, stackHi, ref sp);
|
|
fpush(lo, n, stackLo, stackHi, ref sp);
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
Pre:
|
|
nblock > 0
|
|
eclass exists for [0 .. nblock-1]
|
|
((byte*)eclass) [0 .. nblock-1] holds block
|
|
ptr exists for [0 .. nblock-1]
|
|
Post:
|
|
((byte*)eclass) [0 .. nblock-1] holds block
|
|
All other areas of eclass destroyed
|
|
fmap [0 .. nblock-1] holds sorted order
|
|
bhtab [ 0 .. 2+(nblock/32) ] destroyed
|
|
*/
|
|
|
|
public static void fallbackSort(uint* fmap, uint* eclass, uint* bhtab, int nblock, int verb)
|
|
{
|
|
int[] ftab = new int[257];
|
|
int[] ftabCopy = new int[256];
|
|
int H, i, j, k, l, r, cc, cc1;
|
|
int nNotDone;
|
|
int nBhtab;
|
|
byte* eclass8 = (byte*)eclass;
|
|
|
|
/*--
|
|
Initial 1-char radix sort to generate
|
|
initial fmap and initial BH bits.
|
|
--*/
|
|
// if (verb >= 4)
|
|
// VPrintf0(" bucket sorting ...\n");
|
|
for (i = 0; i < 257; i++)
|
|
{
|
|
ftab[i] = 0;
|
|
}
|
|
|
|
for (i = 0; i < nblock; i++)
|
|
{
|
|
ftab[eclass8[i]]++;
|
|
}
|
|
|
|
for (i = 0; i < 256; i++)
|
|
{
|
|
ftabCopy[i] = ftab[i];
|
|
}
|
|
|
|
for (i = 1; i < 257; i++)
|
|
{
|
|
ftab[i] += ftab[i - 1];
|
|
}
|
|
|
|
for (i = 0; i < nblock; i++)
|
|
{
|
|
j = eclass8[i];
|
|
k = ftab[j] - 1;
|
|
ftab[j] = k;
|
|
fmap[k] = (uint)i;
|
|
}
|
|
|
|
nBhtab = 2 + (nblock / 32);
|
|
for (i = 0; i < nBhtab; i++)
|
|
{
|
|
bhtab[i] = 0;
|
|
}
|
|
|
|
for (i = 0; i < 256; i++)
|
|
{
|
|
SET_BH(ftab[i], bhtab);
|
|
}
|
|
|
|
/*--
|
|
Inductively refine the buckets. Kind-of an
|
|
"exponential radix sort" (!), inspired by the
|
|
Manber-Myers suffix array construction algorithm.
|
|
--*/
|
|
|
|
/*-- set sentinel bits for block-end detection --*/
|
|
for (i = 0; i < 32; i++)
|
|
{
|
|
SET_BH(nblock + 2 * i, bhtab);
|
|
CLEAR_BH(nblock + 2 * i + 1, bhtab);
|
|
}
|
|
|
|
/*-- the log(N) loop --*/
|
|
H = 1;
|
|
while (true)
|
|
{
|
|
// if (verb >= 4)
|
|
// VPrintf1(" depth %6d has ", H);
|
|
|
|
j = 0;
|
|
for (i = 0; i < nblock; i++)
|
|
{
|
|
if (ISSET_BH(i, bhtab))
|
|
j = i;
|
|
|
|
k = (int)(fmap[i] - H);
|
|
if (k < 0)
|
|
k += nblock;
|
|
|
|
eclass[k] = (uint)j;
|
|
}
|
|
|
|
nNotDone = 0;
|
|
r = -1;
|
|
while (true)
|
|
{
|
|
|
|
/*-- find the next non-singleton bucket --*/
|
|
k = r + 1;
|
|
while (ISSET_BH(k, bhtab) && UNALIGNED_BH(k) != 0)
|
|
{
|
|
k++;
|
|
}
|
|
|
|
if (ISSET_BH(k, bhtab))
|
|
{
|
|
while (WORD_BH(k, bhtab) == 0xffffffff)
|
|
{
|
|
k += 32;
|
|
}
|
|
|
|
while (ISSET_BH(k, bhtab))
|
|
{
|
|
k++;
|
|
}
|
|
}
|
|
|
|
l = k - 1;
|
|
if (l >= nblock)
|
|
break;
|
|
|
|
while (!ISSET_BH(k, bhtab) && UNALIGNED_BH(k) != 0)
|
|
{
|
|
k++;
|
|
}
|
|
|
|
if (!ISSET_BH(k, bhtab))
|
|
{
|
|
while (WORD_BH(k, bhtab) == 0x00000000)
|
|
{
|
|
k += 32;
|
|
}
|
|
|
|
while (!ISSET_BH(k, bhtab))
|
|
{
|
|
k++;
|
|
}
|
|
}
|
|
|
|
r = k - 1;
|
|
if (r >= nblock)
|
|
break;
|
|
|
|
/*-- now [l, r] bracket current bucket --*/
|
|
if (r > l)
|
|
{
|
|
nNotDone += (r - l + 1);
|
|
fallbackQSort3(fmap, eclass, l, r);
|
|
|
|
/*-- scan bucket and generate header bits-- */
|
|
cc = -1;
|
|
for (i = l; i <= r; i++)
|
|
{
|
|
cc1 = (int)eclass[fmap[i]];
|
|
if (cc != cc1)
|
|
{
|
|
SET_BH(i, bhtab);
|
|
cc = cc1;
|
|
};
|
|
}
|
|
}
|
|
}
|
|
|
|
// if (verb >= 4)
|
|
// VPrintf1("%6d unresolved strings\n", nNotDone);
|
|
|
|
H *= 2;
|
|
if (H > nblock || nNotDone == 0)
|
|
break;
|
|
}
|
|
|
|
/*--
|
|
Reconstruct the original block in
|
|
eclass8 [0 .. nblock-1], since the
|
|
previous phase destroyed it.
|
|
--*/
|
|
// if (verb >= 4)
|
|
// VPrintf0(" reconstructing block ...\n");
|
|
|
|
j = 0;
|
|
for (i = 0; i < nblock; i++)
|
|
{
|
|
while (ftabCopy[j] == 0)
|
|
{
|
|
j++;
|
|
}
|
|
|
|
ftabCopy[j]--;
|
|
eclass8[fmap[i]] = (byte)j;
|
|
}
|
|
|
|
//AssertH(j < 256, 1005);
|
|
}
|
|
|
|
/// <summary>
|
|
/// The main, O(N^2 log(N)) sorting algorithm.
|
|
/// Faster for "normal" non-repetitive blocks.
|
|
/// </summary>
|
|
public static bool mainGtU(uint i1, uint i2, byte* block, ushort* quadrant, uint nblock, int* budget)
|
|
{
|
|
uint k;
|
|
byte c1, c2;
|
|
ushort s1, s2;
|
|
|
|
//AssertD(i1 != i2, "mainGtU");
|
|
/* 1 */
|
|
c1 = block[i1]; c2 = block[i2];
|
|
if (c1 != c2) return (c1 > c2);
|
|
i1++; i2++;
|
|
/* 2 */
|
|
c1 = block[i1]; c2 = block[i2];
|
|
if (c1 != c2) return (c1 > c2);
|
|
i1++; i2++;
|
|
/* 3 */
|
|
c1 = block[i1]; c2 = block[i2];
|
|
if (c1 != c2) return (c1 > c2);
|
|
i1++; i2++;
|
|
/* 4 */
|
|
c1 = block[i1]; c2 = block[i2];
|
|
if (c1 != c2) return (c1 > c2);
|
|
i1++; i2++;
|
|
/* 5 */
|
|
c1 = block[i1]; c2 = block[i2];
|
|
if (c1 != c2) return (c1 > c2);
|
|
i1++; i2++;
|
|
/* 6 */
|
|
c1 = block[i1]; c2 = block[i2];
|
|
if (c1 != c2) return (c1 > c2);
|
|
i1++; i2++;
|
|
/* 7 */
|
|
c1 = block[i1]; c2 = block[i2];
|
|
if (c1 != c2) return (c1 > c2);
|
|
i1++; i2++;
|
|
/* 8 */
|
|
c1 = block[i1]; c2 = block[i2];
|
|
if (c1 != c2) return (c1 > c2);
|
|
i1++; i2++;
|
|
/* 9 */
|
|
c1 = block[i1]; c2 = block[i2];
|
|
if (c1 != c2) return (c1 > c2);
|
|
i1++; i2++;
|
|
/* 10 */
|
|
c1 = block[i1]; c2 = block[i2];
|
|
if (c1 != c2) return (c1 > c2);
|
|
i1++; i2++;
|
|
/* 11 */
|
|
c1 = block[i1]; c2 = block[i2];
|
|
if (c1 != c2) return (c1 > c2);
|
|
i1++; i2++;
|
|
/* 12 */
|
|
c1 = block[i1]; c2 = block[i2];
|
|
if (c1 != c2) return (c1 > c2);
|
|
i1++; i2++;
|
|
|
|
k = nblock + 8;
|
|
|
|
do
|
|
{
|
|
/* 1 */
|
|
c1 = block[i1]; c2 = block[i2];
|
|
if (c1 != c2) return (c1 > c2);
|
|
s1 = quadrant[i1]; s2 = quadrant[i2];
|
|
if (s1 != s2) return (s1 > s2);
|
|
i1++; i2++;
|
|
/* 2 */
|
|
c1 = block[i1]; c2 = block[i2];
|
|
if (c1 != c2) return (c1 > c2);
|
|
s1 = quadrant[i1]; s2 = quadrant[i2];
|
|
if (s1 != s2) return (s1 > s2);
|
|
i1++; i2++;
|
|
/* 3 */
|
|
c1 = block[i1]; c2 = block[i2];
|
|
if (c1 != c2) return (c1 > c2);
|
|
s1 = quadrant[i1]; s2 = quadrant[i2];
|
|
if (s1 != s2) return (s1 > s2);
|
|
i1++; i2++;
|
|
/* 4 */
|
|
c1 = block[i1]; c2 = block[i2];
|
|
if (c1 != c2) return (c1 > c2);
|
|
s1 = quadrant[i1]; s2 = quadrant[i2];
|
|
if (s1 != s2) return (s1 > s2);
|
|
i1++; i2++;
|
|
/* 5 */
|
|
c1 = block[i1]; c2 = block[i2];
|
|
if (c1 != c2) return (c1 > c2);
|
|
s1 = quadrant[i1]; s2 = quadrant[i2];
|
|
if (s1 != s2) return (s1 > s2);
|
|
i1++; i2++;
|
|
/* 6 */
|
|
c1 = block[i1]; c2 = block[i2];
|
|
if (c1 != c2) return (c1 > c2);
|
|
s1 = quadrant[i1]; s2 = quadrant[i2];
|
|
if (s1 != s2) return (s1 > s2);
|
|
i1++; i2++;
|
|
/* 7 */
|
|
c1 = block[i1]; c2 = block[i2];
|
|
if (c1 != c2) return (c1 > c2);
|
|
s1 = quadrant[i1]; s2 = quadrant[i2];
|
|
if (s1 != s2) return (s1 > s2);
|
|
i1++; i2++;
|
|
/* 8 */
|
|
c1 = block[i1]; c2 = block[i2];
|
|
if (c1 != c2) return (c1 > c2);
|
|
s1 = quadrant[i1]; s2 = quadrant[i2];
|
|
if (s1 != s2) return (s1 > s2);
|
|
i1++; i2++;
|
|
|
|
if (i1 >= nblock) i1 -= nblock;
|
|
if (i2 >= nblock) i2 -= nblock;
|
|
|
|
k -= 8;
|
|
(*budget)--;
|
|
}
|
|
while (k >= 0);
|
|
|
|
return false;
|
|
}
|
|
|
|
public static void mainSimpleSort(uint* ptr, byte* block, ushort* quadrant, int nblock, int lo, int hi, int d, int* budget)
|
|
{
|
|
int i, j, h, bigN, hp;
|
|
uint v;
|
|
|
|
bigN = hi - lo + 1;
|
|
if (bigN < 2)
|
|
return;
|
|
|
|
hp = 0;
|
|
while (incs[hp] < bigN) hp++;
|
|
hp--;
|
|
|
|
for (; hp >= 0; hp--)
|
|
{
|
|
h = incs[hp];
|
|
|
|
i = lo + h;
|
|
while (true)
|
|
{
|
|
/*-- copy 1 --*/
|
|
if (i > hi) break;
|
|
v = ptr[i];
|
|
j = i;
|
|
while (mainGtU((uint)(ptr[j - h] + d), (uint)(v + d), block, quadrant, (uint)nblock, budget))
|
|
{
|
|
ptr[j] = ptr[j - h];
|
|
j = j - h;
|
|
if (j <= (lo + h - 1)) break;
|
|
}
|
|
|
|
ptr[j] = v;
|
|
i++;
|
|
|
|
/*-- copy 2 --*/
|
|
if (i > hi) break;
|
|
v = ptr[i];
|
|
j = i;
|
|
while (mainGtU((uint)(ptr[j - h] + d), (uint)(v + d), block, quadrant, (uint)nblock, budget))
|
|
{
|
|
ptr[j] = ptr[j - h];
|
|
j = j - h;
|
|
if (j <= (lo + h - 1)) break;
|
|
}
|
|
|
|
ptr[j] = v;
|
|
i++;
|
|
|
|
/*-- copy 3 --*/
|
|
if (i > hi) break;
|
|
v = ptr[i];
|
|
j = i;
|
|
while (mainGtU((uint)(ptr[j - h] + d), (uint)(v + d), block, quadrant, (uint)nblock, budget))
|
|
{
|
|
ptr[j] = ptr[j - h];
|
|
j = j - h;
|
|
if (j <= (lo + h - 1))
|
|
break;
|
|
}
|
|
|
|
ptr[j] = v;
|
|
i++;
|
|
|
|
if (*budget < 0)
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
|
|
/*--
|
|
The following is an implementation of
|
|
an elegant 3-way quicksort for strings,
|
|
described in a paper "Fast Algorithms for
|
|
Sorting and Searching Strings", by Robert
|
|
Sedgewick and Jon L. Bentley.
|
|
--*/
|
|
public static byte mmed3(byte a, byte b, byte c)
|
|
{
|
|
byte t;
|
|
if (a > b)
|
|
{
|
|
t = a;
|
|
a = b;
|
|
b = t;
|
|
};
|
|
|
|
if (b > c)
|
|
{
|
|
b = c;
|
|
if (a > b)
|
|
b = a;
|
|
}
|
|
|
|
return b;
|
|
}
|
|
|
|
public static void mainQSort3(uint* ptr, byte* block, ushort* quadrant, int nblock, int loSt, int hiSt, int dSt, int* budget)
|
|
{
|
|
int unLo, unHi, ltLo, gtHi, n, m, med;
|
|
int sp, lo = 0, hi = 0, d = 0;
|
|
|
|
int[] stackLo = new int[MAIN_QSORT_STACK_SIZE];
|
|
int[] stackHi = new int[MAIN_QSORT_STACK_SIZE];
|
|
int[] stackD = new int[MAIN_QSORT_STACK_SIZE];
|
|
|
|
int[] nextLo = new int[3];
|
|
int[] nextHi = new int[3];
|
|
int[] nextD = new int[3];
|
|
|
|
sp = 0;
|
|
mpush(loSt, hiSt, dSt, stackLo, stackHi, stackD, ref sp);
|
|
|
|
while (sp > 0)
|
|
{
|
|
//AssertH(sp < MAIN_QSORT_STACK_SIZE - 2, 1001);
|
|
|
|
mpop(ref lo, ref hi, ref d, stackLo, stackHi, stackD, ref sp);
|
|
if (hi - lo < MAIN_QSORT_SMALL_THRESH ||
|
|
d > MAIN_QSORT_DEPTH_THRESH)
|
|
{
|
|
mainSimpleSort(ptr, block, quadrant, nblock, lo, hi, d, budget);
|
|
if (*budget < 0) return;
|
|
continue;
|
|
}
|
|
|
|
med = mmed3(block[ptr[lo] + d], block[ptr[hi] + d], block[ptr[(lo + hi) >> 1] + d]);
|
|
|
|
unLo = ltLo = lo;
|
|
unHi = gtHi = hi;
|
|
|
|
while (true)
|
|
{
|
|
while (true)
|
|
{
|
|
if (unLo > unHi)
|
|
break;
|
|
|
|
n = (block[ptr[unLo] + d]) - med;
|
|
if (n == 0)
|
|
{
|
|
mswap(ref ptr[unLo], ref ptr[ltLo]);
|
|
ltLo++; unLo++; continue;
|
|
};
|
|
|
|
if (n > 0)
|
|
break;
|
|
|
|
unLo++;
|
|
}
|
|
while (true)
|
|
{
|
|
if (unLo > unHi)
|
|
break;
|
|
|
|
n = (block[ptr[unHi] + d]) - med;
|
|
if (n == 0)
|
|
{
|
|
mswap(ref ptr[unHi], ref ptr[gtHi]);
|
|
gtHi--;
|
|
unHi--;
|
|
continue;
|
|
};
|
|
|
|
if (n < 0)
|
|
break;
|
|
|
|
unHi--;
|
|
}
|
|
|
|
if (unLo > unHi)
|
|
break;
|
|
|
|
mswap(ref ptr[unLo], ref ptr[unHi]);
|
|
unLo++;
|
|
unHi--;
|
|
}
|
|
|
|
//AssertD(unHi == unLo - 1, "mainQSort3(2)");
|
|
|
|
if (gtHi < ltLo)
|
|
{
|
|
mpush(lo, hi, d + 1, stackLo, stackHi, stackD, ref sp);
|
|
continue;
|
|
}
|
|
|
|
n = mmin(ltLo - lo, unLo - ltLo); mvswap(ptr, lo, unLo - n, n);
|
|
m = mmin(hi - gtHi, gtHi - unHi); mvswap(ptr, unLo, hi - m + 1, m);
|
|
|
|
n = lo + unLo - ltLo - 1;
|
|
m = hi - (gtHi - unHi) + 1;
|
|
|
|
nextLo[0] = lo; nextHi[0] = n; nextD[0] = d;
|
|
nextLo[1] = m; nextHi[1] = hi; nextD[1] = d;
|
|
nextLo[2] = n + 1; nextHi[2] = m - 1; nextD[2] = d + 1;
|
|
|
|
if (mnextsize(0, nextLo, nextHi) < mnextsize(1, nextLo, nextHi)) mnextswap(0, 1, nextLo, nextHi, nextD);
|
|
if (mnextsize(1, nextLo, nextHi) < mnextsize(2, nextLo, nextHi)) mnextswap(1, 2, nextLo, nextHi, nextD);
|
|
if (mnextsize(0, nextLo, nextHi) < mnextsize(1, nextLo, nextHi)) mnextswap(0, 1, nextLo, nextHi, nextD);
|
|
|
|
//AssertD(mnextsize(0) >= mnextsize(1), "mainQSort3(8)");
|
|
//AssertD(mnextsize(1) >= mnextsize(2), "mainQSort3(9)");
|
|
|
|
mpush(nextLo[0], nextHi[0], nextD[0], stackLo, stackHi, stackD, ref sp);
|
|
mpush(nextLo[1], nextHi[1], nextD[1], stackLo, stackHi, stackD, ref sp);
|
|
mpush(nextLo[2], nextHi[2], nextD[2], stackLo, stackHi, stackD, ref sp);
|
|
}
|
|
}
|
|
|
|
/*
|
|
Pre:
|
|
nblock > N_OVERSHOOT
|
|
block32 exists for [0 .. nblock-1 +N_OVERSHOOT]
|
|
((byte*)block32) [0 .. nblock-1] holds block
|
|
ptr exists for [0 .. nblock-1]
|
|
Post:
|
|
((byte*)block32) [0 .. nblock-1] holds block
|
|
All other areas of block32 destroyed
|
|
ftab [0 .. 65536 ] destroyed
|
|
ptr [0 .. nblock-1] holds sorted order
|
|
if (*budget < 0), sorting was abandoned
|
|
*/
|
|
|
|
public static void mainSort(uint* ptr, byte* block, ushort* quadrant, uint* ftab, int nblock, int verb, int* budget)
|
|
{
|
|
int i, j, k, ss, sb;
|
|
int[] runningOrder = new int[256];
|
|
bool[] bigDone = new bool[256];
|
|
int[] copyStart = new int[256];
|
|
int[] copyEnd = new int[256];
|
|
byte c1;
|
|
int numQSorted;
|
|
ushort s;
|
|
|
|
// if (verb >= 4) VPrintf0(" main sort initialise ...\n");
|
|
|
|
/*-- set up the 2-byte frequency table --*/
|
|
for (i = 65536; i >= 0; i--)
|
|
{
|
|
ftab[i] = 0;
|
|
}
|
|
|
|
j = block[0] << 8;
|
|
i = nblock - 1;
|
|
for (; i >= 3; i -= 4)
|
|
{
|
|
quadrant[i] = 0;
|
|
j = (j >> 8) | ((block[i]) << 8);
|
|
ftab[j]++;
|
|
|
|
quadrant[i - 1] = 0;
|
|
j = (j >> 8) | ((block[i - 1]) << 8);
|
|
ftab[j]++;
|
|
|
|
quadrant[i - 2] = 0;
|
|
j = (j >> 8) | ((block[i - 2]) << 8);
|
|
ftab[j]++;
|
|
|
|
quadrant[i - 3] = 0;
|
|
j = (j >> 8) | ((block[i - 3]) << 8);
|
|
ftab[j]++;
|
|
}
|
|
|
|
for (; i >= 0; i--)
|
|
{
|
|
quadrant[i] = 0;
|
|
j = (j >> 8) | ((block[i]) << 8);
|
|
ftab[j]++;
|
|
}
|
|
|
|
/*-- (emphasises close relationship of block & quadrant) --*/
|
|
for (i = 0; i < BZ_N_OVERSHOOT; i++)
|
|
{
|
|
block[nblock + i] = block[i];
|
|
quadrant[nblock + i] = 0;
|
|
}
|
|
|
|
// if (verb >= 4) VPrintf0(" bucket sorting ...\n");
|
|
|
|
/*-- Complete the initial radix sort --*/
|
|
for (i = 1; i <= 65536; i++) ftab[i] += ftab[i - 1];
|
|
|
|
s = (ushort)(block[0] << 8);
|
|
i = nblock - 1;
|
|
for (; i >= 3; i -= 4)
|
|
{
|
|
s = (ushort)((s >> 8) | (block[i] << 8));
|
|
j = (int)(ftab[s] - 1);
|
|
ftab[s] = (uint)j;
|
|
ptr[j] = (uint)i;
|
|
|
|
s = (ushort)((s >> 8) | (block[i - 1] << 8));
|
|
j = (int)(ftab[s] - 1);
|
|
ftab[s] = (uint)j;
|
|
ptr[j] = (uint)(i - 1);
|
|
|
|
s = (ushort)((s >> 8) | (block[i - 2] << 8));
|
|
j = (int)(ftab[s] - 1);
|
|
ftab[s] = (uint)j;
|
|
ptr[j] = (uint)(i - 2);
|
|
|
|
s = (ushort)((s >> 8) | (block[i - 3] << 8));
|
|
j = (int)(ftab[s] - 1);
|
|
ftab[s] = (uint)j;
|
|
ptr[j] = (uint)(i - 3);
|
|
}
|
|
|
|
for (; i >= 0; i--)
|
|
{
|
|
s = (ushort)((s >> 8) | (block[i] << 8));
|
|
j = (int)(ftab[s] - 1);
|
|
ftab[s] = (uint)j;
|
|
ptr[j] = (uint)i;
|
|
}
|
|
|
|
/*--
|
|
Now ftab contains the first loc of every small bucket.
|
|
Calculate the running order, from smallest to largest
|
|
big bucket.
|
|
--*/
|
|
for (i = 0; i <= 255; i++)
|
|
{
|
|
bigDone[i] = false;
|
|
runningOrder[i] = i;
|
|
}
|
|
|
|
{
|
|
int vv;
|
|
int h = 1;
|
|
do
|
|
{
|
|
h = 3 * h + 1;
|
|
}
|
|
while (h <= 256);
|
|
|
|
do
|
|
{
|
|
h = h / 3;
|
|
for (i = h; i <= 255; i++)
|
|
{
|
|
vv = runningOrder[i];
|
|
j = i;
|
|
while (BIGFREQ(runningOrder[j - h], ftab) > BIGFREQ(vv, ftab))
|
|
{
|
|
runningOrder[j] = runningOrder[j - h];
|
|
j = j - h;
|
|
if (j <= (h - 1))
|
|
goto zero;
|
|
}
|
|
|
|
zero:
|
|
runningOrder[j] = vv;
|
|
}
|
|
} while (h != 1);
|
|
}
|
|
|
|
/*--
|
|
The main sorting loop.
|
|
--*/
|
|
|
|
numQSorted = 0;
|
|
|
|
for (i = 0; i <= 255; i++)
|
|
{
|
|
|
|
/*--
|
|
Process big buckets, starting with the least full.
|
|
Basically this is a 3-step process in which we call
|
|
mainQSort3 to sort the small buckets [ss, j], but
|
|
also make a big effort to avoid the calls if we can.
|
|
--*/
|
|
ss = runningOrder[i];
|
|
|
|
/*--
|
|
Step 1:
|
|
Complete the big bucket [ss] by quicksorting
|
|
any unsorted small buckets [ss, j], for j != ss.
|
|
Hopefully previous pointer-scanning phases have already
|
|
completed many of the small buckets [ss, j], so
|
|
we don't have to sort them at all.
|
|
--*/
|
|
for (j = 0; j <= 255; j++)
|
|
{
|
|
if (j != ss)
|
|
{
|
|
sb = (ss << 8) + j;
|
|
if ((ftab[sb] & SETMASK) == 0)
|
|
{
|
|
int lo = (int)(ftab[sb] & CLEARMASK);
|
|
int hi = (int)((ftab[sb + 1] & CLEARMASK) - 1);
|
|
if (hi > lo)
|
|
{
|
|
// if (verb >= 4)
|
|
// VPrintf4(" qsort [0x%x, 0x%x] "
|
|
|
|
// "done %d this %d\n",
|
|
// ss, j, numQSorted, hi - lo + 1);
|
|
|
|
mainQSort3(
|
|
ptr, block, quadrant, nblock,
|
|
lo, hi, BZ_N_RADIX, budget
|
|
);
|
|
numQSorted += (hi - lo + 1);
|
|
if (*budget < 0) return;
|
|
}
|
|
}
|
|
|
|
ftab[sb] |= SETMASK;
|
|
}
|
|
}
|
|
|
|
//AssertH(!bigDone[ss], 1006);
|
|
|
|
/*--
|
|
Step 2:
|
|
Now scan this big bucket [ss] so as to synthesise the
|
|
sorted order for small buckets [t, ss] for all t,
|
|
including, magically, the bucket [ss,ss] too.
|
|
This will avoid doing Real Work in subsequent Step 1's.
|
|
--*/
|
|
{
|
|
for (j = 0; j <= 255; j++)
|
|
{
|
|
copyStart[j] = (int)(ftab[(j << 8) + ss] & CLEARMASK);
|
|
copyEnd[j] = (int)((ftab[(j << 8) + ss + 1] & CLEARMASK) - 1);
|
|
}
|
|
|
|
for (j = (int)(ftab[ss << 8] & CLEARMASK); j < copyStart[ss]; j++)
|
|
{
|
|
k = (int)(ptr[j] - 1);
|
|
if (k < 0)
|
|
k += nblock;
|
|
|
|
c1 = block[k];
|
|
if (!bigDone[c1])
|
|
ptr[copyStart[c1]++] = (uint)k;
|
|
}
|
|
|
|
for (j = (int)((ftab[(ss + 1) << 8] & CLEARMASK) - 1); j > copyEnd[ss]; j--)
|
|
{
|
|
k = (int)(ptr[j] - 1);
|
|
if (k < 0)
|
|
k += nblock;
|
|
|
|
c1 = block[k];
|
|
if (!bigDone[c1])
|
|
ptr[copyEnd[c1]--] = (uint)k;
|
|
}
|
|
}
|
|
|
|
// AssertH((copyStart[ss] - 1 == copyEnd[ss])
|
|
// ||
|
|
// /* Extremely rare case missing in bzip2-1.0.0 and 1.0.1.
|
|
// Necessity for this case is demonstrated by compressing
|
|
// a sequence of approximately 48.5 million of character
|
|
// 251; 1.0.0/1.0.1 will then die here. */
|
|
// (copyStart[ss] == 0 && copyEnd[ss] == nblock - 1),
|
|
// 1007)
|
|
|
|
|
|
for (j = 0; j <= 255; j++)
|
|
{
|
|
ftab[(j << 8) + ss] |= SETMASK;
|
|
}
|
|
|
|
/*--
|
|
Step 3:
|
|
The [ss] big bucket is now done. Record this fact,
|
|
and update the quadrant descriptors. Remember to
|
|
update quadrants in the overshoot area too, if
|
|
necessary. The "if (i < 255)" test merely skips
|
|
this updating for the last bucket processed, since
|
|
updating for the last bucket is pointless.
|
|
The quadrant array provides a way to incrementally
|
|
cache sort orderings, as they appear, so as to
|
|
make subsequent comparisons in fullGtU() complete
|
|
faster. For repetitive blocks this makes a big
|
|
difference (but not big enough to be able to avoid
|
|
the fallback sorting mechanism, exponential radix sort).
|
|
The precise meaning is: at all times:
|
|
for 0 <= i < nblock and 0 <= j <= nblock
|
|
if block[i] != block[j],
|
|
then the relative values of quadrant[i] and
|
|
quadrant[j] are meaningless.
|
|
else {
|
|
if quadrant[i] < quadrant[j]
|
|
then the string starting at i lexicographically
|
|
precedes the string starting at j
|
|
else if quadrant[i] > quadrant[j]
|
|
then the string starting at j lexicographically
|
|
precedes the string starting at i
|
|
else
|
|
the relative ordering of the strings starting
|
|
at i and j has not yet been determined.
|
|
}
|
|
--*/
|
|
bigDone[ss] = true;
|
|
|
|
if (i < 255)
|
|
{
|
|
int bbStart = (int)(ftab[ss << 8] & CLEARMASK);
|
|
int bbSize = (int)((ftab[(ss + 1) << 8] & CLEARMASK) - bbStart);
|
|
int shifts = 0;
|
|
|
|
while ((bbSize >> shifts) > 65534) shifts++;
|
|
|
|
for (j = bbSize - 1; j >= 0; j--)
|
|
{
|
|
int a2update = (int)ptr[bbStart + j];
|
|
ushort qVal = (ushort)(j >> shifts);
|
|
quadrant[a2update] = qVal;
|
|
if (a2update < BZ_N_OVERSHOOT)
|
|
quadrant[a2update + nblock] = qVal;
|
|
}
|
|
|
|
// AssertH(((bbSize - 1) >> shifts) <= 65535, 1002);
|
|
}
|
|
|
|
}
|
|
|
|
// if (verb >= 4)
|
|
// VPrintf3(" %d pointers, %d sorted, %d scanned\n",
|
|
// nblock, numQSorted, nblock - numQSorted);
|
|
}
|
|
|
|
/*
|
|
Pre:
|
|
nblock > 0
|
|
arr2 exists for [0 .. nblock-1 +N_OVERSHOOT]
|
|
((byte*)arr2) [0 .. nblock-1] holds block
|
|
arr1 exists for [0 .. nblock-1]
|
|
Post:
|
|
((byte*)arr2) [0 .. nblock-1] holds block
|
|
All other areas of block destroyed
|
|
ftab [ 0 .. 65536 ] destroyed
|
|
arr1 [0 .. nblock-1] holds sorted order
|
|
*/
|
|
|
|
public static void BZ2_blockSort(EState s)
|
|
{
|
|
uint* ptr = s.ptr;
|
|
byte* block = s.block;
|
|
uint* ftab = s.ftab;
|
|
int nblock = s.nblock;
|
|
int verb = s.verbosity;
|
|
int wfact = s.workFactor;
|
|
ushort* quadrant;
|
|
int budget;
|
|
int budgetInit;
|
|
int i;
|
|
|
|
if (nblock < 10000)
|
|
{
|
|
fallbackSort(s.arr1, s.arr2, ftab, nblock, verb);
|
|
}
|
|
else
|
|
{
|
|
/* Calculate the location for quadrant, remembering to get
|
|
the alignment right. Assumes that &(block[0]) is at least
|
|
2-byte aligned -- this should be ok since block is really
|
|
the first section of arr2.
|
|
*/
|
|
i = nblock + BZ_N_OVERSHOOT;
|
|
if ((i & 1) != 0) i++;
|
|
quadrant = (ushort*)(&(block[i]));
|
|
|
|
/* (wfact-1) / 3 puts the default-factor-30
|
|
transition point at very roughly the same place as
|
|
with v0.1 and v0.9.0.
|
|
Not that it particularly matters any more, since the
|
|
resulting compressed stream is now the same regardless
|
|
of whether or not we use the main sort or fallback sort.
|
|
*/
|
|
if (wfact < 1) wfact = 1;
|
|
if (wfact > 100) wfact = 100;
|
|
budgetInit = nblock * ((wfact - 1) / 3);
|
|
budget = budgetInit;
|
|
|
|
mainSort(ptr, block, quadrant, ftab, nblock, verb, &budget);
|
|
// if (verb >= 3)
|
|
// VPrintf3(" %d work, %d block, ratio %5.2f\n",
|
|
// budgetInit - budget,
|
|
// nblock,
|
|
// (float)(budgetInit - budget) /
|
|
// (float)(nblock == 0 ? 1 : nblock));
|
|
if (budget < 0)
|
|
{
|
|
// if (verb >= 2)
|
|
// VPrintf0(" too repetitive; using fallback"
|
|
|
|
// " sorting algorithm\n");
|
|
fallbackSort(s.arr1, s.arr2, ftab, nblock, verb);
|
|
}
|
|
}
|
|
|
|
s.origPtr = -1;
|
|
for (i = 0; i < s.nblock; i++)
|
|
if (ptr[i] == 0)
|
|
{ s.origPtr = i; break; };
|
|
|
|
//AssertH(s.origPtr != -1, 1003);
|
|
}
|
|
|
|
#region Macros
|
|
|
|
private static void fswap(ref int zz1, ref int zz2)
|
|
{
|
|
int zztmp = zz1;
|
|
zz1 = zz2;
|
|
zz2 = zztmp;
|
|
}
|
|
|
|
private static void fswap(ref uint zz1, ref uint zz2)
|
|
{
|
|
uint zztmp = zz1;
|
|
zz1 = zz2;
|
|
zz2 = zztmp;
|
|
}
|
|
|
|
private static void fvswap(uint* fmap, int zzp1, int zzp2, int zzn)
|
|
{
|
|
int yyp1 = (zzp1);
|
|
int yyp2 = (zzp2);
|
|
int yyn = (zzn);
|
|
while (yyn > 0)
|
|
{
|
|
fswap(ref fmap[yyp1], ref fmap[yyp2]);
|
|
yyp1++; yyp2++; yyn--;
|
|
}
|
|
}
|
|
|
|
private static int fmin(int a, int b) => (a < b) ? a : b;
|
|
|
|
private static void fpush(int lz, int hz, int[] stackLo, int[] stackHi, ref int sp)
|
|
{
|
|
stackLo[sp] = lz;
|
|
stackHi[sp] = hz;
|
|
sp++;
|
|
}
|
|
|
|
private static void fpop(ref int lz, ref int hz, int[] stackLo, int[] stackHi, ref int sp)
|
|
{
|
|
sp--;
|
|
lz = stackLo[sp];
|
|
hz = stackHi[sp];
|
|
}
|
|
|
|
private static void SET_BH(int zz, uint* bhtab)
|
|
{
|
|
bhtab[zz >> 5] |= (uint)(1 << (zz & 31));
|
|
}
|
|
|
|
private static void CLEAR_BH(int zz, uint* bhtab)
|
|
{
|
|
bhtab[zz >> 5] &= (uint)~(1 << (zz & 31));
|
|
}
|
|
|
|
private static bool ISSET_BH(int zz, uint* bhtab) => (bhtab[zz >> 5] & (1 << (zz & 31))) != 0;
|
|
|
|
private static uint WORD_BH(int zz, uint* bhtab) => bhtab[(zz) >> 5];
|
|
|
|
private static int UNALIGNED_BH(int zz) => zz & 0x01f;
|
|
|
|
private static void mswap(ref uint zz1, ref uint zz2)
|
|
{
|
|
uint zztmp = zz1;
|
|
zz1 = zz2;
|
|
zz2 = zztmp;
|
|
}
|
|
|
|
private static void mvswap(uint* ptr, int zzp1, int zzp2, int zzn)
|
|
{
|
|
int yyp1 = (zzp1);
|
|
int yyp2 = (zzp2);
|
|
int yyn = (zzn);
|
|
while (yyn > 0)
|
|
{
|
|
mswap(ref ptr[yyp1], ref ptr[yyp2]);
|
|
yyp1++; yyp2++; yyn--;
|
|
}
|
|
}
|
|
|
|
private static int mmin(int a, int b) => (a < b) ? a : b;
|
|
|
|
private static void mpush(int lz, int hz, int dz, int[] stackLo, int[] stackHi, int[] stackD, ref int sp)
|
|
{
|
|
stackLo[sp] = lz;
|
|
stackHi[sp] = hz;
|
|
stackD[sp] = dz;
|
|
sp++;
|
|
}
|
|
|
|
private static void mpop(ref int lz, ref int hz, ref int dz, int[] stackLo, int[] stackHi, int[] stackD, ref int sp)
|
|
{
|
|
sp--;
|
|
lz = stackLo[sp];
|
|
hz = stackHi[sp];
|
|
dz = stackD[sp];
|
|
}
|
|
|
|
private static int mnextsize(int az, int[] nextLo, int[] nextHi) => nextHi[az] - nextLo[az];
|
|
|
|
private static void mnextswap(int az, int bz, int[] nextLo, int[] nextHi, int[] nextD)
|
|
{
|
|
int tz;
|
|
tz = nextLo[az]; nextLo[az] = nextLo[bz]; nextLo[bz] = tz;
|
|
tz = nextHi[az]; nextHi[az] = nextHi[bz]; nextHi[bz] = tz;
|
|
tz = nextD[az]; nextD[az] = nextD[bz]; nextD[bz] = tz;
|
|
}
|
|
|
|
private static uint BIGFREQ(int b, uint* ftab) => ftab[(b + 1) << 8] - ftab[b << 8];
|
|
|
|
#endregion
|
|
}
|
|
} |