171e7ee59SPeter Avalos
271e7ee59SPeter Avalos /*-------------------------------------------------------------*/
371e7ee59SPeter Avalos /*--- Block sorting machinery ---*/
471e7ee59SPeter Avalos /*--- blocksort.c ---*/
571e7ee59SPeter Avalos /*-------------------------------------------------------------*/
671e7ee59SPeter Avalos
771e7ee59SPeter Avalos /* ------------------------------------------------------------------
871e7ee59SPeter Avalos This file is part of bzip2/libbzip2, a program and library for
971e7ee59SPeter Avalos lossless, block-sorting data compression.
1071e7ee59SPeter Avalos
11*86954436SDaniel Fojt bzip2/libbzip2 version 1.0.8 of 13 July 2019
12*86954436SDaniel Fojt Copyright (C) 1996-2019 Julian Seward <jseward@acm.org>
1371e7ee59SPeter Avalos
1471e7ee59SPeter Avalos Please read the WARNING, DISCLAIMER and PATENTS sections in the
1571e7ee59SPeter Avalos README file.
1671e7ee59SPeter Avalos
1771e7ee59SPeter Avalos This program is released under the terms of the license contained
1871e7ee59SPeter Avalos in the file LICENSE.
1971e7ee59SPeter Avalos ------------------------------------------------------------------ */
2071e7ee59SPeter Avalos
2171e7ee59SPeter Avalos
2271e7ee59SPeter Avalos #include "bzlib_private.h"
2371e7ee59SPeter Avalos
2471e7ee59SPeter Avalos /*---------------------------------------------*/
2571e7ee59SPeter Avalos /*--- Fallback O(N log(N)^2) sorting ---*/
2671e7ee59SPeter Avalos /*--- algorithm, for repetitive blocks ---*/
2771e7ee59SPeter Avalos /*---------------------------------------------*/
2871e7ee59SPeter Avalos
2971e7ee59SPeter Avalos /*---------------------------------------------*/
3071e7ee59SPeter Avalos static
3171e7ee59SPeter Avalos __inline__
fallbackSimpleSort(UInt32 * fmap,UInt32 * eclass,Int32 lo,Int32 hi)3271e7ee59SPeter Avalos void fallbackSimpleSort ( UInt32* fmap,
3371e7ee59SPeter Avalos UInt32* eclass,
3471e7ee59SPeter Avalos Int32 lo,
3571e7ee59SPeter Avalos Int32 hi )
3671e7ee59SPeter Avalos {
3771e7ee59SPeter Avalos Int32 i, j, tmp;
3871e7ee59SPeter Avalos UInt32 ec_tmp;
3971e7ee59SPeter Avalos
4071e7ee59SPeter Avalos if (lo == hi) return;
4171e7ee59SPeter Avalos
4271e7ee59SPeter Avalos if (hi - lo > 3) {
4371e7ee59SPeter Avalos for ( i = hi-4; i >= lo; i-- ) {
4471e7ee59SPeter Avalos tmp = fmap[i];
4571e7ee59SPeter Avalos ec_tmp = eclass[tmp];
4671e7ee59SPeter Avalos for ( j = i+4; j <= hi && ec_tmp > eclass[fmap[j]]; j += 4 )
4771e7ee59SPeter Avalos fmap[j-4] = fmap[j];
4871e7ee59SPeter Avalos fmap[j-4] = tmp;
4971e7ee59SPeter Avalos }
5071e7ee59SPeter Avalos }
5171e7ee59SPeter Avalos
5271e7ee59SPeter Avalos for ( i = hi-1; i >= lo; i-- ) {
5371e7ee59SPeter Avalos tmp = fmap[i];
5471e7ee59SPeter Avalos ec_tmp = eclass[tmp];
5571e7ee59SPeter Avalos for ( j = i+1; j <= hi && ec_tmp > eclass[fmap[j]]; j++ )
5671e7ee59SPeter Avalos fmap[j-1] = fmap[j];
5771e7ee59SPeter Avalos fmap[j-1] = tmp;
5871e7ee59SPeter Avalos }
5971e7ee59SPeter Avalos }
6071e7ee59SPeter Avalos
6171e7ee59SPeter Avalos
6271e7ee59SPeter Avalos /*---------------------------------------------*/
6371e7ee59SPeter Avalos #define fswap(zz1, zz2) \
6471e7ee59SPeter Avalos { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; }
6571e7ee59SPeter Avalos
6671e7ee59SPeter Avalos #define fvswap(zzp1, zzp2, zzn) \
6771e7ee59SPeter Avalos { \
6871e7ee59SPeter Avalos Int32 yyp1 = (zzp1); \
6971e7ee59SPeter Avalos Int32 yyp2 = (zzp2); \
7071e7ee59SPeter Avalos Int32 yyn = (zzn); \
7171e7ee59SPeter Avalos while (yyn > 0) { \
7271e7ee59SPeter Avalos fswap(fmap[yyp1], fmap[yyp2]); \
7371e7ee59SPeter Avalos yyp1++; yyp2++; yyn--; \
7471e7ee59SPeter Avalos } \
7571e7ee59SPeter Avalos }
7671e7ee59SPeter Avalos
7771e7ee59SPeter Avalos
7871e7ee59SPeter Avalos #define fmin(a,b) ((a) < (b)) ? (a) : (b)
7971e7ee59SPeter Avalos
8071e7ee59SPeter Avalos #define fpush(lz,hz) { stackLo[sp] = lz; \
8171e7ee59SPeter Avalos stackHi[sp] = hz; \
8271e7ee59SPeter Avalos sp++; }
8371e7ee59SPeter Avalos
8471e7ee59SPeter Avalos #define fpop(lz,hz) { sp--; \
8571e7ee59SPeter Avalos lz = stackLo[sp]; \
8671e7ee59SPeter Avalos hz = stackHi[sp]; }
8771e7ee59SPeter Avalos
8871e7ee59SPeter Avalos #define FALLBACK_QSORT_SMALL_THRESH 10
8971e7ee59SPeter Avalos #define FALLBACK_QSORT_STACK_SIZE 100
9071e7ee59SPeter Avalos
9171e7ee59SPeter Avalos
9271e7ee59SPeter Avalos static
fallbackQSort3(UInt32 * fmap,UInt32 * eclass,Int32 loSt,Int32 hiSt)9371e7ee59SPeter Avalos void fallbackQSort3 ( UInt32* fmap,
9471e7ee59SPeter Avalos UInt32* eclass,
9571e7ee59SPeter Avalos Int32 loSt,
9671e7ee59SPeter Avalos Int32 hiSt )
9771e7ee59SPeter Avalos {
9871e7ee59SPeter Avalos Int32 unLo, unHi, ltLo, gtHi, n, m;
9971e7ee59SPeter Avalos Int32 sp, lo, hi;
10071e7ee59SPeter Avalos UInt32 med, r, r3;
10171e7ee59SPeter Avalos Int32 stackLo[FALLBACK_QSORT_STACK_SIZE];
10271e7ee59SPeter Avalos Int32 stackHi[FALLBACK_QSORT_STACK_SIZE];
10371e7ee59SPeter Avalos
10471e7ee59SPeter Avalos r = 0;
10571e7ee59SPeter Avalos
10671e7ee59SPeter Avalos sp = 0;
10771e7ee59SPeter Avalos fpush ( loSt, hiSt );
10871e7ee59SPeter Avalos
10971e7ee59SPeter Avalos while (sp > 0) {
11071e7ee59SPeter Avalos
11171e7ee59SPeter Avalos AssertH ( sp < FALLBACK_QSORT_STACK_SIZE - 1, 1004 );
11271e7ee59SPeter Avalos
11371e7ee59SPeter Avalos fpop ( lo, hi );
11471e7ee59SPeter Avalos if (hi - lo < FALLBACK_QSORT_SMALL_THRESH) {
11571e7ee59SPeter Avalos fallbackSimpleSort ( fmap, eclass, lo, hi );
11671e7ee59SPeter Avalos continue;
11771e7ee59SPeter Avalos }
11871e7ee59SPeter Avalos
11971e7ee59SPeter Avalos /* Random partitioning. Median of 3 sometimes fails to
12071e7ee59SPeter Avalos avoid bad cases. Median of 9 seems to help but
12171e7ee59SPeter Avalos looks rather expensive. This too seems to work but
12271e7ee59SPeter Avalos is cheaper. Guidance for the magic constants
12371e7ee59SPeter Avalos 7621 and 32768 is taken from Sedgewick's algorithms
12471e7ee59SPeter Avalos book, chapter 35.
12571e7ee59SPeter Avalos */
12671e7ee59SPeter Avalos r = ((r * 7621) + 1) % 32768;
12771e7ee59SPeter Avalos r3 = r % 3;
12871e7ee59SPeter Avalos if (r3 == 0) med = eclass[fmap[lo]]; else
12971e7ee59SPeter Avalos if (r3 == 1) med = eclass[fmap[(lo+hi)>>1]]; else
13071e7ee59SPeter Avalos med = eclass[fmap[hi]];
13171e7ee59SPeter Avalos
13271e7ee59SPeter Avalos unLo = ltLo = lo;
13371e7ee59SPeter Avalos unHi = gtHi = hi;
13471e7ee59SPeter Avalos
13571e7ee59SPeter Avalos while (1) {
13671e7ee59SPeter Avalos while (1) {
13771e7ee59SPeter Avalos if (unLo > unHi) break;
13871e7ee59SPeter Avalos n = (Int32)eclass[fmap[unLo]] - (Int32)med;
13971e7ee59SPeter Avalos if (n == 0) {
14071e7ee59SPeter Avalos fswap(fmap[unLo], fmap[ltLo]);
14171e7ee59SPeter Avalos ltLo++; unLo++;
14271e7ee59SPeter Avalos continue;
14371e7ee59SPeter Avalos };
14471e7ee59SPeter Avalos if (n > 0) break;
14571e7ee59SPeter Avalos unLo++;
14671e7ee59SPeter Avalos }
14771e7ee59SPeter Avalos while (1) {
14871e7ee59SPeter Avalos if (unLo > unHi) break;
14971e7ee59SPeter Avalos n = (Int32)eclass[fmap[unHi]] - (Int32)med;
15071e7ee59SPeter Avalos if (n == 0) {
15171e7ee59SPeter Avalos fswap(fmap[unHi], fmap[gtHi]);
15271e7ee59SPeter Avalos gtHi--; unHi--;
15371e7ee59SPeter Avalos continue;
15471e7ee59SPeter Avalos };
15571e7ee59SPeter Avalos if (n < 0) break;
15671e7ee59SPeter Avalos unHi--;
15771e7ee59SPeter Avalos }
15871e7ee59SPeter Avalos if (unLo > unHi) break;
15971e7ee59SPeter Avalos fswap(fmap[unLo], fmap[unHi]); unLo++; unHi--;
16071e7ee59SPeter Avalos }
16171e7ee59SPeter Avalos
16271e7ee59SPeter Avalos AssertD ( unHi == unLo-1, "fallbackQSort3(2)" );
16371e7ee59SPeter Avalos
16471e7ee59SPeter Avalos if (gtHi < ltLo) continue;
16571e7ee59SPeter Avalos
16671e7ee59SPeter Avalos n = fmin(ltLo-lo, unLo-ltLo); fvswap(lo, unLo-n, n);
16771e7ee59SPeter Avalos m = fmin(hi-gtHi, gtHi-unHi); fvswap(unLo, hi-m+1, m);
16871e7ee59SPeter Avalos
16971e7ee59SPeter Avalos n = lo + unLo - ltLo - 1;
17071e7ee59SPeter Avalos m = hi - (gtHi - unHi) + 1;
17171e7ee59SPeter Avalos
17271e7ee59SPeter Avalos if (n - lo > hi - m) {
17371e7ee59SPeter Avalos fpush ( lo, n );
17471e7ee59SPeter Avalos fpush ( m, hi );
17571e7ee59SPeter Avalos } else {
17671e7ee59SPeter Avalos fpush ( m, hi );
17771e7ee59SPeter Avalos fpush ( lo, n );
17871e7ee59SPeter Avalos }
17971e7ee59SPeter Avalos }
18071e7ee59SPeter Avalos }
18171e7ee59SPeter Avalos
18271e7ee59SPeter Avalos #undef fmin
18371e7ee59SPeter Avalos #undef fpush
18471e7ee59SPeter Avalos #undef fpop
18571e7ee59SPeter Avalos #undef fswap
18671e7ee59SPeter Avalos #undef fvswap
18771e7ee59SPeter Avalos #undef FALLBACK_QSORT_SMALL_THRESH
18871e7ee59SPeter Avalos #undef FALLBACK_QSORT_STACK_SIZE
18971e7ee59SPeter Avalos
19071e7ee59SPeter Avalos
19171e7ee59SPeter Avalos /*---------------------------------------------*/
19271e7ee59SPeter Avalos /* Pre:
19371e7ee59SPeter Avalos nblock > 0
19471e7ee59SPeter Avalos eclass exists for [0 .. nblock-1]
19571e7ee59SPeter Avalos ((UChar*)eclass) [0 .. nblock-1] holds block
19671e7ee59SPeter Avalos ptr exists for [0 .. nblock-1]
19771e7ee59SPeter Avalos
19871e7ee59SPeter Avalos Post:
19971e7ee59SPeter Avalos ((UChar*)eclass) [0 .. nblock-1] holds block
20071e7ee59SPeter Avalos All other areas of eclass destroyed
20171e7ee59SPeter Avalos fmap [0 .. nblock-1] holds sorted order
20271e7ee59SPeter Avalos bhtab [ 0 .. 2+(nblock/32) ] destroyed
20371e7ee59SPeter Avalos */
20471e7ee59SPeter Avalos
205*86954436SDaniel Fojt #define SET_BH(zz) bhtab[(zz) >> 5] |= ((UInt32)1 << ((zz) & 31))
206*86954436SDaniel Fojt #define CLEAR_BH(zz) bhtab[(zz) >> 5] &= ~((UInt32)1 << ((zz) & 31))
207*86954436SDaniel Fojt #define ISSET_BH(zz) (bhtab[(zz) >> 5] & ((UInt32)1 << ((zz) & 31)))
20871e7ee59SPeter Avalos #define WORD_BH(zz) bhtab[(zz) >> 5]
20971e7ee59SPeter Avalos #define UNALIGNED_BH(zz) ((zz) & 0x01f)
21071e7ee59SPeter Avalos
21171e7ee59SPeter Avalos static
fallbackSort(UInt32 * fmap,UInt32 * eclass,UInt32 * bhtab,Int32 nblock,Int32 verb)21271e7ee59SPeter Avalos void fallbackSort ( UInt32* fmap,
21371e7ee59SPeter Avalos UInt32* eclass,
21471e7ee59SPeter Avalos UInt32* bhtab,
21571e7ee59SPeter Avalos Int32 nblock,
21671e7ee59SPeter Avalos Int32 verb )
21771e7ee59SPeter Avalos {
21871e7ee59SPeter Avalos Int32 ftab[257];
21971e7ee59SPeter Avalos Int32 ftabCopy[256];
22071e7ee59SPeter Avalos Int32 H, i, j, k, l, r, cc, cc1;
22171e7ee59SPeter Avalos Int32 nNotDone;
22271e7ee59SPeter Avalos Int32 nBhtab;
22371e7ee59SPeter Avalos UChar* eclass8 = (UChar*)eclass;
22471e7ee59SPeter Avalos
22571e7ee59SPeter Avalos /*--
22671e7ee59SPeter Avalos Initial 1-char radix sort to generate
22771e7ee59SPeter Avalos initial fmap and initial BH bits.
22871e7ee59SPeter Avalos --*/
22971e7ee59SPeter Avalos if (verb >= 4)
23071e7ee59SPeter Avalos VPrintf0 ( " bucket sorting ...\n" );
23171e7ee59SPeter Avalos for (i = 0; i < 257; i++) ftab[i] = 0;
23271e7ee59SPeter Avalos for (i = 0; i < nblock; i++) ftab[eclass8[i]]++;
23371e7ee59SPeter Avalos for (i = 0; i < 256; i++) ftabCopy[i] = ftab[i];
23471e7ee59SPeter Avalos for (i = 1; i < 257; i++) ftab[i] += ftab[i-1];
23571e7ee59SPeter Avalos
23671e7ee59SPeter Avalos for (i = 0; i < nblock; i++) {
23771e7ee59SPeter Avalos j = eclass8[i];
23871e7ee59SPeter Avalos k = ftab[j] - 1;
23971e7ee59SPeter Avalos ftab[j] = k;
24071e7ee59SPeter Avalos fmap[k] = i;
24171e7ee59SPeter Avalos }
24271e7ee59SPeter Avalos
24371e7ee59SPeter Avalos nBhtab = 2 + (nblock / 32);
24471e7ee59SPeter Avalos for (i = 0; i < nBhtab; i++) bhtab[i] = 0;
24571e7ee59SPeter Avalos for (i = 0; i < 256; i++) SET_BH(ftab[i]);
24671e7ee59SPeter Avalos
24771e7ee59SPeter Avalos /*--
24871e7ee59SPeter Avalos Inductively refine the buckets. Kind-of an
24971e7ee59SPeter Avalos "exponential radix sort" (!), inspired by the
25071e7ee59SPeter Avalos Manber-Myers suffix array construction algorithm.
25171e7ee59SPeter Avalos --*/
25271e7ee59SPeter Avalos
25371e7ee59SPeter Avalos /*-- set sentinel bits for block-end detection --*/
25471e7ee59SPeter Avalos for (i = 0; i < 32; i++) {
25571e7ee59SPeter Avalos SET_BH(nblock + 2*i);
25671e7ee59SPeter Avalos CLEAR_BH(nblock + 2*i + 1);
25771e7ee59SPeter Avalos }
25871e7ee59SPeter Avalos
25971e7ee59SPeter Avalos /*-- the log(N) loop --*/
26071e7ee59SPeter Avalos H = 1;
26171e7ee59SPeter Avalos while (1) {
26271e7ee59SPeter Avalos
26371e7ee59SPeter Avalos if (verb >= 4)
26471e7ee59SPeter Avalos VPrintf1 ( " depth %6d has ", H );
26571e7ee59SPeter Avalos
26671e7ee59SPeter Avalos j = 0;
26771e7ee59SPeter Avalos for (i = 0; i < nblock; i++) {
26871e7ee59SPeter Avalos if (ISSET_BH(i)) j = i;
26971e7ee59SPeter Avalos k = fmap[i] - H; if (k < 0) k += nblock;
27071e7ee59SPeter Avalos eclass[k] = j;
27171e7ee59SPeter Avalos }
27271e7ee59SPeter Avalos
27371e7ee59SPeter Avalos nNotDone = 0;
27471e7ee59SPeter Avalos r = -1;
27571e7ee59SPeter Avalos while (1) {
27671e7ee59SPeter Avalos
27771e7ee59SPeter Avalos /*-- find the next non-singleton bucket --*/
27871e7ee59SPeter Avalos k = r + 1;
27971e7ee59SPeter Avalos while (ISSET_BH(k) && UNALIGNED_BH(k)) k++;
28071e7ee59SPeter Avalos if (ISSET_BH(k)) {
28171e7ee59SPeter Avalos while (WORD_BH(k) == 0xffffffff) k += 32;
28271e7ee59SPeter Avalos while (ISSET_BH(k)) k++;
28371e7ee59SPeter Avalos }
28471e7ee59SPeter Avalos l = k - 1;
28571e7ee59SPeter Avalos if (l >= nblock) break;
28671e7ee59SPeter Avalos while (!ISSET_BH(k) && UNALIGNED_BH(k)) k++;
28771e7ee59SPeter Avalos if (!ISSET_BH(k)) {
28871e7ee59SPeter Avalos while (WORD_BH(k) == 0x00000000) k += 32;
28971e7ee59SPeter Avalos while (!ISSET_BH(k)) k++;
29071e7ee59SPeter Avalos }
29171e7ee59SPeter Avalos r = k - 1;
29271e7ee59SPeter Avalos if (r >= nblock) break;
29371e7ee59SPeter Avalos
29471e7ee59SPeter Avalos /*-- now [l, r] bracket current bucket --*/
29571e7ee59SPeter Avalos if (r > l) {
29671e7ee59SPeter Avalos nNotDone += (r - l + 1);
29771e7ee59SPeter Avalos fallbackQSort3 ( fmap, eclass, l, r );
29871e7ee59SPeter Avalos
29971e7ee59SPeter Avalos /*-- scan bucket and generate header bits-- */
30071e7ee59SPeter Avalos cc = -1;
30171e7ee59SPeter Avalos for (i = l; i <= r; i++) {
30271e7ee59SPeter Avalos cc1 = eclass[fmap[i]];
30371e7ee59SPeter Avalos if (cc != cc1) { SET_BH(i); cc = cc1; };
30471e7ee59SPeter Avalos }
30571e7ee59SPeter Avalos }
30671e7ee59SPeter Avalos }
30771e7ee59SPeter Avalos
30871e7ee59SPeter Avalos if (verb >= 4)
30971e7ee59SPeter Avalos VPrintf1 ( "%6d unresolved strings\n", nNotDone );
31071e7ee59SPeter Avalos
31171e7ee59SPeter Avalos H *= 2;
31271e7ee59SPeter Avalos if (H > nblock || nNotDone == 0) break;
31371e7ee59SPeter Avalos }
31471e7ee59SPeter Avalos
31571e7ee59SPeter Avalos /*--
31671e7ee59SPeter Avalos Reconstruct the original block in
31771e7ee59SPeter Avalos eclass8 [0 .. nblock-1], since the
31871e7ee59SPeter Avalos previous phase destroyed it.
31971e7ee59SPeter Avalos --*/
32071e7ee59SPeter Avalos if (verb >= 4)
32171e7ee59SPeter Avalos VPrintf0 ( " reconstructing block ...\n" );
32271e7ee59SPeter Avalos j = 0;
32371e7ee59SPeter Avalos for (i = 0; i < nblock; i++) {
32471e7ee59SPeter Avalos while (ftabCopy[j] == 0) j++;
32571e7ee59SPeter Avalos ftabCopy[j]--;
32671e7ee59SPeter Avalos eclass8[fmap[i]] = (UChar)j;
32771e7ee59SPeter Avalos }
32871e7ee59SPeter Avalos AssertH ( j < 256, 1005 );
32971e7ee59SPeter Avalos }
33071e7ee59SPeter Avalos
33171e7ee59SPeter Avalos #undef SET_BH
33271e7ee59SPeter Avalos #undef CLEAR_BH
33371e7ee59SPeter Avalos #undef ISSET_BH
33471e7ee59SPeter Avalos #undef WORD_BH
33571e7ee59SPeter Avalos #undef UNALIGNED_BH
33671e7ee59SPeter Avalos
33771e7ee59SPeter Avalos
33871e7ee59SPeter Avalos /*---------------------------------------------*/
33971e7ee59SPeter Avalos /*--- The main, O(N^2 log(N)) sorting ---*/
34071e7ee59SPeter Avalos /*--- algorithm. Faster for "normal" ---*/
34171e7ee59SPeter Avalos /*--- non-repetitive blocks. ---*/
34271e7ee59SPeter Avalos /*---------------------------------------------*/
34371e7ee59SPeter Avalos
34471e7ee59SPeter Avalos /*---------------------------------------------*/
34571e7ee59SPeter Avalos static
34671e7ee59SPeter Avalos __inline__
mainGtU(UInt32 i1,UInt32 i2,UChar * block,UInt16 * quadrant,UInt32 nblock,Int32 * budget)34771e7ee59SPeter Avalos Bool mainGtU ( UInt32 i1,
34871e7ee59SPeter Avalos UInt32 i2,
34971e7ee59SPeter Avalos UChar* block,
35071e7ee59SPeter Avalos UInt16* quadrant,
35171e7ee59SPeter Avalos UInt32 nblock,
35271e7ee59SPeter Avalos Int32* budget )
35371e7ee59SPeter Avalos {
35471e7ee59SPeter Avalos Int32 k;
35571e7ee59SPeter Avalos UChar c1, c2;
35671e7ee59SPeter Avalos UInt16 s1, s2;
35771e7ee59SPeter Avalos
35871e7ee59SPeter Avalos AssertD ( i1 != i2, "mainGtU" );
35971e7ee59SPeter Avalos /* 1 */
36071e7ee59SPeter Avalos c1 = block[i1]; c2 = block[i2];
36171e7ee59SPeter Avalos if (c1 != c2) return (c1 > c2);
36271e7ee59SPeter Avalos i1++; i2++;
36371e7ee59SPeter Avalos /* 2 */
36471e7ee59SPeter Avalos c1 = block[i1]; c2 = block[i2];
36571e7ee59SPeter Avalos if (c1 != c2) return (c1 > c2);
36671e7ee59SPeter Avalos i1++; i2++;
36771e7ee59SPeter Avalos /* 3 */
36871e7ee59SPeter Avalos c1 = block[i1]; c2 = block[i2];
36971e7ee59SPeter Avalos if (c1 != c2) return (c1 > c2);
37071e7ee59SPeter Avalos i1++; i2++;
37171e7ee59SPeter Avalos /* 4 */
37271e7ee59SPeter Avalos c1 = block[i1]; c2 = block[i2];
37371e7ee59SPeter Avalos if (c1 != c2) return (c1 > c2);
37471e7ee59SPeter Avalos i1++; i2++;
37571e7ee59SPeter Avalos /* 5 */
37671e7ee59SPeter Avalos c1 = block[i1]; c2 = block[i2];
37771e7ee59SPeter Avalos if (c1 != c2) return (c1 > c2);
37871e7ee59SPeter Avalos i1++; i2++;
37971e7ee59SPeter Avalos /* 6 */
38071e7ee59SPeter Avalos c1 = block[i1]; c2 = block[i2];
38171e7ee59SPeter Avalos if (c1 != c2) return (c1 > c2);
38271e7ee59SPeter Avalos i1++; i2++;
38371e7ee59SPeter Avalos /* 7 */
38471e7ee59SPeter Avalos c1 = block[i1]; c2 = block[i2];
38571e7ee59SPeter Avalos if (c1 != c2) return (c1 > c2);
38671e7ee59SPeter Avalos i1++; i2++;
38771e7ee59SPeter Avalos /* 8 */
38871e7ee59SPeter Avalos c1 = block[i1]; c2 = block[i2];
38971e7ee59SPeter Avalos if (c1 != c2) return (c1 > c2);
39071e7ee59SPeter Avalos i1++; i2++;
39171e7ee59SPeter Avalos /* 9 */
39271e7ee59SPeter Avalos c1 = block[i1]; c2 = block[i2];
39371e7ee59SPeter Avalos if (c1 != c2) return (c1 > c2);
39471e7ee59SPeter Avalos i1++; i2++;
39571e7ee59SPeter Avalos /* 10 */
39671e7ee59SPeter Avalos c1 = block[i1]; c2 = block[i2];
39771e7ee59SPeter Avalos if (c1 != c2) return (c1 > c2);
39871e7ee59SPeter Avalos i1++; i2++;
39971e7ee59SPeter Avalos /* 11 */
40071e7ee59SPeter Avalos c1 = block[i1]; c2 = block[i2];
40171e7ee59SPeter Avalos if (c1 != c2) return (c1 > c2);
40271e7ee59SPeter Avalos i1++; i2++;
40371e7ee59SPeter Avalos /* 12 */
40471e7ee59SPeter Avalos c1 = block[i1]; c2 = block[i2];
40571e7ee59SPeter Avalos if (c1 != c2) return (c1 > c2);
40671e7ee59SPeter Avalos i1++; i2++;
40771e7ee59SPeter Avalos
40871e7ee59SPeter Avalos k = nblock + 8;
40971e7ee59SPeter Avalos
41071e7ee59SPeter Avalos do {
41171e7ee59SPeter Avalos /* 1 */
41271e7ee59SPeter Avalos c1 = block[i1]; c2 = block[i2];
41371e7ee59SPeter Avalos if (c1 != c2) return (c1 > c2);
41471e7ee59SPeter Avalos s1 = quadrant[i1]; s2 = quadrant[i2];
41571e7ee59SPeter Avalos if (s1 != s2) return (s1 > s2);
41671e7ee59SPeter Avalos i1++; i2++;
41771e7ee59SPeter Avalos /* 2 */
41871e7ee59SPeter Avalos c1 = block[i1]; c2 = block[i2];
41971e7ee59SPeter Avalos if (c1 != c2) return (c1 > c2);
42071e7ee59SPeter Avalos s1 = quadrant[i1]; s2 = quadrant[i2];
42171e7ee59SPeter Avalos if (s1 != s2) return (s1 > s2);
42271e7ee59SPeter Avalos i1++; i2++;
42371e7ee59SPeter Avalos /* 3 */
42471e7ee59SPeter Avalos c1 = block[i1]; c2 = block[i2];
42571e7ee59SPeter Avalos if (c1 != c2) return (c1 > c2);
42671e7ee59SPeter Avalos s1 = quadrant[i1]; s2 = quadrant[i2];
42771e7ee59SPeter Avalos if (s1 != s2) return (s1 > s2);
42871e7ee59SPeter Avalos i1++; i2++;
42971e7ee59SPeter Avalos /* 4 */
43071e7ee59SPeter Avalos c1 = block[i1]; c2 = block[i2];
43171e7ee59SPeter Avalos if (c1 != c2) return (c1 > c2);
43271e7ee59SPeter Avalos s1 = quadrant[i1]; s2 = quadrant[i2];
43371e7ee59SPeter Avalos if (s1 != s2) return (s1 > s2);
43471e7ee59SPeter Avalos i1++; i2++;
43571e7ee59SPeter Avalos /* 5 */
43671e7ee59SPeter Avalos c1 = block[i1]; c2 = block[i2];
43771e7ee59SPeter Avalos if (c1 != c2) return (c1 > c2);
43871e7ee59SPeter Avalos s1 = quadrant[i1]; s2 = quadrant[i2];
43971e7ee59SPeter Avalos if (s1 != s2) return (s1 > s2);
44071e7ee59SPeter Avalos i1++; i2++;
44171e7ee59SPeter Avalos /* 6 */
44271e7ee59SPeter Avalos c1 = block[i1]; c2 = block[i2];
44371e7ee59SPeter Avalos if (c1 != c2) return (c1 > c2);
44471e7ee59SPeter Avalos s1 = quadrant[i1]; s2 = quadrant[i2];
44571e7ee59SPeter Avalos if (s1 != s2) return (s1 > s2);
44671e7ee59SPeter Avalos i1++; i2++;
44771e7ee59SPeter Avalos /* 7 */
44871e7ee59SPeter Avalos c1 = block[i1]; c2 = block[i2];
44971e7ee59SPeter Avalos if (c1 != c2) return (c1 > c2);
45071e7ee59SPeter Avalos s1 = quadrant[i1]; s2 = quadrant[i2];
45171e7ee59SPeter Avalos if (s1 != s2) return (s1 > s2);
45271e7ee59SPeter Avalos i1++; i2++;
45371e7ee59SPeter Avalos /* 8 */
45471e7ee59SPeter Avalos c1 = block[i1]; c2 = block[i2];
45571e7ee59SPeter Avalos if (c1 != c2) return (c1 > c2);
45671e7ee59SPeter Avalos s1 = quadrant[i1]; s2 = quadrant[i2];
45771e7ee59SPeter Avalos if (s1 != s2) return (s1 > s2);
45871e7ee59SPeter Avalos i1++; i2++;
45971e7ee59SPeter Avalos
46071e7ee59SPeter Avalos if (i1 >= nblock) i1 -= nblock;
46171e7ee59SPeter Avalos if (i2 >= nblock) i2 -= nblock;
46271e7ee59SPeter Avalos
46371e7ee59SPeter Avalos k -= 8;
46471e7ee59SPeter Avalos (*budget)--;
46571e7ee59SPeter Avalos }
46671e7ee59SPeter Avalos while (k >= 0);
46771e7ee59SPeter Avalos
46871e7ee59SPeter Avalos return False;
46971e7ee59SPeter Avalos }
47071e7ee59SPeter Avalos
47171e7ee59SPeter Avalos
47271e7ee59SPeter Avalos /*---------------------------------------------*/
47371e7ee59SPeter Avalos /*--
47471e7ee59SPeter Avalos Knuth's increments seem to work better
47571e7ee59SPeter Avalos than Incerpi-Sedgewick here. Possibly
47671e7ee59SPeter Avalos because the number of elems to sort is
47771e7ee59SPeter Avalos usually small, typically <= 20.
47871e7ee59SPeter Avalos --*/
47971e7ee59SPeter Avalos static
48071e7ee59SPeter Avalos Int32 incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280,
48171e7ee59SPeter Avalos 9841, 29524, 88573, 265720,
48271e7ee59SPeter Avalos 797161, 2391484 };
48371e7ee59SPeter Avalos
48471e7ee59SPeter Avalos static
mainSimpleSort(UInt32 * ptr,UChar * block,UInt16 * quadrant,Int32 nblock,Int32 lo,Int32 hi,Int32 d,Int32 * budget)48571e7ee59SPeter Avalos void mainSimpleSort ( UInt32* ptr,
48671e7ee59SPeter Avalos UChar* block,
48771e7ee59SPeter Avalos UInt16* quadrant,
48871e7ee59SPeter Avalos Int32 nblock,
48971e7ee59SPeter Avalos Int32 lo,
49071e7ee59SPeter Avalos Int32 hi,
49171e7ee59SPeter Avalos Int32 d,
49271e7ee59SPeter Avalos Int32* budget )
49371e7ee59SPeter Avalos {
49471e7ee59SPeter Avalos Int32 i, j, h, bigN, hp;
49571e7ee59SPeter Avalos UInt32 v;
49671e7ee59SPeter Avalos
49771e7ee59SPeter Avalos bigN = hi - lo + 1;
49871e7ee59SPeter Avalos if (bigN < 2) return;
49971e7ee59SPeter Avalos
50071e7ee59SPeter Avalos hp = 0;
50171e7ee59SPeter Avalos while (incs[hp] < bigN) hp++;
50271e7ee59SPeter Avalos hp--;
50371e7ee59SPeter Avalos
50471e7ee59SPeter Avalos for (; hp >= 0; hp--) {
50571e7ee59SPeter Avalos h = incs[hp];
50671e7ee59SPeter Avalos
50771e7ee59SPeter Avalos i = lo + h;
50871e7ee59SPeter Avalos while (True) {
50971e7ee59SPeter Avalos
51071e7ee59SPeter Avalos /*-- copy 1 --*/
51171e7ee59SPeter Avalos if (i > hi) break;
51271e7ee59SPeter Avalos v = ptr[i];
51371e7ee59SPeter Avalos j = i;
51471e7ee59SPeter Avalos while ( mainGtU (
51571e7ee59SPeter Avalos ptr[j-h]+d, v+d, block, quadrant, nblock, budget
51671e7ee59SPeter Avalos ) ) {
51771e7ee59SPeter Avalos ptr[j] = ptr[j-h];
51871e7ee59SPeter Avalos j = j - h;
51971e7ee59SPeter Avalos if (j <= (lo + h - 1)) break;
52071e7ee59SPeter Avalos }
52171e7ee59SPeter Avalos ptr[j] = v;
52271e7ee59SPeter Avalos i++;
52371e7ee59SPeter Avalos
52471e7ee59SPeter Avalos /*-- copy 2 --*/
52571e7ee59SPeter Avalos if (i > hi) break;
52671e7ee59SPeter Avalos v = ptr[i];
52771e7ee59SPeter Avalos j = i;
52871e7ee59SPeter Avalos while ( mainGtU (
52971e7ee59SPeter Avalos ptr[j-h]+d, v+d, block, quadrant, nblock, budget
53071e7ee59SPeter Avalos ) ) {
53171e7ee59SPeter Avalos ptr[j] = ptr[j-h];
53271e7ee59SPeter Avalos j = j - h;
53371e7ee59SPeter Avalos if (j <= (lo + h - 1)) break;
53471e7ee59SPeter Avalos }
53571e7ee59SPeter Avalos ptr[j] = v;
53671e7ee59SPeter Avalos i++;
53771e7ee59SPeter Avalos
53871e7ee59SPeter Avalos /*-- copy 3 --*/
53971e7ee59SPeter Avalos if (i > hi) break;
54071e7ee59SPeter Avalos v = ptr[i];
54171e7ee59SPeter Avalos j = i;
54271e7ee59SPeter Avalos while ( mainGtU (
54371e7ee59SPeter Avalos ptr[j-h]+d, v+d, block, quadrant, nblock, budget
54471e7ee59SPeter Avalos ) ) {
54571e7ee59SPeter Avalos ptr[j] = ptr[j-h];
54671e7ee59SPeter Avalos j = j - h;
54771e7ee59SPeter Avalos if (j <= (lo + h - 1)) break;
54871e7ee59SPeter Avalos }
54971e7ee59SPeter Avalos ptr[j] = v;
55071e7ee59SPeter Avalos i++;
55171e7ee59SPeter Avalos
55271e7ee59SPeter Avalos if (*budget < 0) return;
55371e7ee59SPeter Avalos }
55471e7ee59SPeter Avalos }
55571e7ee59SPeter Avalos }
55671e7ee59SPeter Avalos
55771e7ee59SPeter Avalos
55871e7ee59SPeter Avalos /*---------------------------------------------*/
55971e7ee59SPeter Avalos /*--
56071e7ee59SPeter Avalos The following is an implementation of
56171e7ee59SPeter Avalos an elegant 3-way quicksort for strings,
56271e7ee59SPeter Avalos described in a paper "Fast Algorithms for
56371e7ee59SPeter Avalos Sorting and Searching Strings", by Robert
56471e7ee59SPeter Avalos Sedgewick and Jon L. Bentley.
56571e7ee59SPeter Avalos --*/
56671e7ee59SPeter Avalos
56771e7ee59SPeter Avalos #define mswap(zz1, zz2) \
56871e7ee59SPeter Avalos { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; }
56971e7ee59SPeter Avalos
57071e7ee59SPeter Avalos #define mvswap(zzp1, zzp2, zzn) \
57171e7ee59SPeter Avalos { \
57271e7ee59SPeter Avalos Int32 yyp1 = (zzp1); \
57371e7ee59SPeter Avalos Int32 yyp2 = (zzp2); \
57471e7ee59SPeter Avalos Int32 yyn = (zzn); \
57571e7ee59SPeter Avalos while (yyn > 0) { \
57671e7ee59SPeter Avalos mswap(ptr[yyp1], ptr[yyp2]); \
57771e7ee59SPeter Avalos yyp1++; yyp2++; yyn--; \
57871e7ee59SPeter Avalos } \
57971e7ee59SPeter Avalos }
58071e7ee59SPeter Avalos
58171e7ee59SPeter Avalos static
58271e7ee59SPeter Avalos __inline__
mmed3(UChar a,UChar b,UChar c)58371e7ee59SPeter Avalos UChar mmed3 ( UChar a, UChar b, UChar c )
58471e7ee59SPeter Avalos {
58571e7ee59SPeter Avalos UChar t;
58671e7ee59SPeter Avalos if (a > b) { t = a; a = b; b = t; };
58771e7ee59SPeter Avalos if (b > c) {
58871e7ee59SPeter Avalos b = c;
58971e7ee59SPeter Avalos if (a > b) b = a;
59071e7ee59SPeter Avalos }
59171e7ee59SPeter Avalos return b;
59271e7ee59SPeter Avalos }
59371e7ee59SPeter Avalos
59471e7ee59SPeter Avalos #define mmin(a,b) ((a) < (b)) ? (a) : (b)
59571e7ee59SPeter Avalos
59671e7ee59SPeter Avalos #define mpush(lz,hz,dz) { stackLo[sp] = lz; \
59771e7ee59SPeter Avalos stackHi[sp] = hz; \
59871e7ee59SPeter Avalos stackD [sp] = dz; \
59971e7ee59SPeter Avalos sp++; }
60071e7ee59SPeter Avalos
60171e7ee59SPeter Avalos #define mpop(lz,hz,dz) { sp--; \
60271e7ee59SPeter Avalos lz = stackLo[sp]; \
60371e7ee59SPeter Avalos hz = stackHi[sp]; \
60471e7ee59SPeter Avalos dz = stackD [sp]; }
60571e7ee59SPeter Avalos
60671e7ee59SPeter Avalos
60771e7ee59SPeter Avalos #define mnextsize(az) (nextHi[az]-nextLo[az])
60871e7ee59SPeter Avalos
60971e7ee59SPeter Avalos #define mnextswap(az,bz) \
61071e7ee59SPeter Avalos { Int32 tz; \
61171e7ee59SPeter Avalos tz = nextLo[az]; nextLo[az] = nextLo[bz]; nextLo[bz] = tz; \
61271e7ee59SPeter Avalos tz = nextHi[az]; nextHi[az] = nextHi[bz]; nextHi[bz] = tz; \
61371e7ee59SPeter Avalos tz = nextD [az]; nextD [az] = nextD [bz]; nextD [bz] = tz; }
61471e7ee59SPeter Avalos
61571e7ee59SPeter Avalos
61671e7ee59SPeter Avalos #define MAIN_QSORT_SMALL_THRESH 20
61771e7ee59SPeter Avalos #define MAIN_QSORT_DEPTH_THRESH (BZ_N_RADIX + BZ_N_QSORT)
61871e7ee59SPeter Avalos #define MAIN_QSORT_STACK_SIZE 100
61971e7ee59SPeter Avalos
62071e7ee59SPeter Avalos static
mainQSort3(UInt32 * ptr,UChar * block,UInt16 * quadrant,Int32 nblock,Int32 loSt,Int32 hiSt,Int32 dSt,Int32 * budget)62171e7ee59SPeter Avalos void mainQSort3 ( UInt32* ptr,
62271e7ee59SPeter Avalos UChar* block,
62371e7ee59SPeter Avalos UInt16* quadrant,
62471e7ee59SPeter Avalos Int32 nblock,
62571e7ee59SPeter Avalos Int32 loSt,
62671e7ee59SPeter Avalos Int32 hiSt,
62771e7ee59SPeter Avalos Int32 dSt,
62871e7ee59SPeter Avalos Int32* budget )
62971e7ee59SPeter Avalos {
63071e7ee59SPeter Avalos Int32 unLo, unHi, ltLo, gtHi, n, m, med;
63171e7ee59SPeter Avalos Int32 sp, lo, hi, d;
63271e7ee59SPeter Avalos
63371e7ee59SPeter Avalos Int32 stackLo[MAIN_QSORT_STACK_SIZE];
63471e7ee59SPeter Avalos Int32 stackHi[MAIN_QSORT_STACK_SIZE];
63571e7ee59SPeter Avalos Int32 stackD [MAIN_QSORT_STACK_SIZE];
63671e7ee59SPeter Avalos
63771e7ee59SPeter Avalos Int32 nextLo[3];
63871e7ee59SPeter Avalos Int32 nextHi[3];
63971e7ee59SPeter Avalos Int32 nextD [3];
64071e7ee59SPeter Avalos
64171e7ee59SPeter Avalos sp = 0;
64271e7ee59SPeter Avalos mpush ( loSt, hiSt, dSt );
64371e7ee59SPeter Avalos
64471e7ee59SPeter Avalos while (sp > 0) {
64571e7ee59SPeter Avalos
64671e7ee59SPeter Avalos AssertH ( sp < MAIN_QSORT_STACK_SIZE - 2, 1001 );
64771e7ee59SPeter Avalos
64871e7ee59SPeter Avalos mpop ( lo, hi, d );
64971e7ee59SPeter Avalos if (hi - lo < MAIN_QSORT_SMALL_THRESH ||
65071e7ee59SPeter Avalos d > MAIN_QSORT_DEPTH_THRESH) {
65171e7ee59SPeter Avalos mainSimpleSort ( ptr, block, quadrant, nblock, lo, hi, d, budget );
65271e7ee59SPeter Avalos if (*budget < 0) return;
65371e7ee59SPeter Avalos continue;
65471e7ee59SPeter Avalos }
65571e7ee59SPeter Avalos
65671e7ee59SPeter Avalos med = (Int32)
65771e7ee59SPeter Avalos mmed3 ( block[ptr[ lo ]+d],
65871e7ee59SPeter Avalos block[ptr[ hi ]+d],
65971e7ee59SPeter Avalos block[ptr[ (lo+hi)>>1 ]+d] );
66071e7ee59SPeter Avalos
66171e7ee59SPeter Avalos unLo = ltLo = lo;
66271e7ee59SPeter Avalos unHi = gtHi = hi;
66371e7ee59SPeter Avalos
66471e7ee59SPeter Avalos while (True) {
66571e7ee59SPeter Avalos while (True) {
66671e7ee59SPeter Avalos if (unLo > unHi) break;
66771e7ee59SPeter Avalos n = ((Int32)block[ptr[unLo]+d]) - med;
66871e7ee59SPeter Avalos if (n == 0) {
66971e7ee59SPeter Avalos mswap(ptr[unLo], ptr[ltLo]);
67071e7ee59SPeter Avalos ltLo++; unLo++; continue;
67171e7ee59SPeter Avalos };
67271e7ee59SPeter Avalos if (n > 0) break;
67371e7ee59SPeter Avalos unLo++;
67471e7ee59SPeter Avalos }
67571e7ee59SPeter Avalos while (True) {
67671e7ee59SPeter Avalos if (unLo > unHi) break;
67771e7ee59SPeter Avalos n = ((Int32)block[ptr[unHi]+d]) - med;
67871e7ee59SPeter Avalos if (n == 0) {
67971e7ee59SPeter Avalos mswap(ptr[unHi], ptr[gtHi]);
68071e7ee59SPeter Avalos gtHi--; unHi--; continue;
68171e7ee59SPeter Avalos };
68271e7ee59SPeter Avalos if (n < 0) break;
68371e7ee59SPeter Avalos unHi--;
68471e7ee59SPeter Avalos }
68571e7ee59SPeter Avalos if (unLo > unHi) break;
68671e7ee59SPeter Avalos mswap(ptr[unLo], ptr[unHi]); unLo++; unHi--;
68771e7ee59SPeter Avalos }
68871e7ee59SPeter Avalos
68971e7ee59SPeter Avalos AssertD ( unHi == unLo-1, "mainQSort3(2)" );
69071e7ee59SPeter Avalos
69171e7ee59SPeter Avalos if (gtHi < ltLo) {
69271e7ee59SPeter Avalos mpush(lo, hi, d+1 );
69371e7ee59SPeter Avalos continue;
69471e7ee59SPeter Avalos }
69571e7ee59SPeter Avalos
69671e7ee59SPeter Avalos n = mmin(ltLo-lo, unLo-ltLo); mvswap(lo, unLo-n, n);
69771e7ee59SPeter Avalos m = mmin(hi-gtHi, gtHi-unHi); mvswap(unLo, hi-m+1, m);
69871e7ee59SPeter Avalos
69971e7ee59SPeter Avalos n = lo + unLo - ltLo - 1;
70071e7ee59SPeter Avalos m = hi - (gtHi - unHi) + 1;
70171e7ee59SPeter Avalos
70271e7ee59SPeter Avalos nextLo[0] = lo; nextHi[0] = n; nextD[0] = d;
70371e7ee59SPeter Avalos nextLo[1] = m; nextHi[1] = hi; nextD[1] = d;
70471e7ee59SPeter Avalos nextLo[2] = n+1; nextHi[2] = m-1; nextD[2] = d+1;
70571e7ee59SPeter Avalos
70671e7ee59SPeter Avalos if (mnextsize(0) < mnextsize(1)) mnextswap(0,1);
70771e7ee59SPeter Avalos if (mnextsize(1) < mnextsize(2)) mnextswap(1,2);
70871e7ee59SPeter Avalos if (mnextsize(0) < mnextsize(1)) mnextswap(0,1);
70971e7ee59SPeter Avalos
71071e7ee59SPeter Avalos AssertD (mnextsize(0) >= mnextsize(1), "mainQSort3(8)" );
71171e7ee59SPeter Avalos AssertD (mnextsize(1) >= mnextsize(2), "mainQSort3(9)" );
71271e7ee59SPeter Avalos
71371e7ee59SPeter Avalos mpush (nextLo[0], nextHi[0], nextD[0]);
71471e7ee59SPeter Avalos mpush (nextLo[1], nextHi[1], nextD[1]);
71571e7ee59SPeter Avalos mpush (nextLo[2], nextHi[2], nextD[2]);
71671e7ee59SPeter Avalos }
71771e7ee59SPeter Avalos }
71871e7ee59SPeter Avalos
71971e7ee59SPeter Avalos #undef mswap
72071e7ee59SPeter Avalos #undef mvswap
72171e7ee59SPeter Avalos #undef mpush
72271e7ee59SPeter Avalos #undef mpop
72371e7ee59SPeter Avalos #undef mmin
72471e7ee59SPeter Avalos #undef mnextsize
72571e7ee59SPeter Avalos #undef mnextswap
72671e7ee59SPeter Avalos #undef MAIN_QSORT_SMALL_THRESH
72771e7ee59SPeter Avalos #undef MAIN_QSORT_DEPTH_THRESH
72871e7ee59SPeter Avalos #undef MAIN_QSORT_STACK_SIZE
72971e7ee59SPeter Avalos
73071e7ee59SPeter Avalos
73171e7ee59SPeter Avalos /*---------------------------------------------*/
73271e7ee59SPeter Avalos /* Pre:
73371e7ee59SPeter Avalos nblock > N_OVERSHOOT
73471e7ee59SPeter Avalos block32 exists for [0 .. nblock-1 +N_OVERSHOOT]
73571e7ee59SPeter Avalos ((UChar*)block32) [0 .. nblock-1] holds block
73671e7ee59SPeter Avalos ptr exists for [0 .. nblock-1]
73771e7ee59SPeter Avalos
73871e7ee59SPeter Avalos Post:
73971e7ee59SPeter Avalos ((UChar*)block32) [0 .. nblock-1] holds block
74071e7ee59SPeter Avalos All other areas of block32 destroyed
74171e7ee59SPeter Avalos ftab [0 .. 65536 ] destroyed
74271e7ee59SPeter Avalos ptr [0 .. nblock-1] holds sorted order
74371e7ee59SPeter Avalos if (*budget < 0), sorting was abandoned
74471e7ee59SPeter Avalos */
74571e7ee59SPeter Avalos
74671e7ee59SPeter Avalos #define BIGFREQ(b) (ftab[((b)+1) << 8] - ftab[(b) << 8])
74771e7ee59SPeter Avalos #define SETMASK (1 << 21)
74871e7ee59SPeter Avalos #define CLEARMASK (~(SETMASK))
74971e7ee59SPeter Avalos
75071e7ee59SPeter Avalos static
mainSort(UInt32 * ptr,UChar * block,UInt16 * quadrant,UInt32 * ftab,Int32 nblock,Int32 verb,Int32 * budget)75171e7ee59SPeter Avalos void mainSort ( UInt32* ptr,
75271e7ee59SPeter Avalos UChar* block,
75371e7ee59SPeter Avalos UInt16* quadrant,
75471e7ee59SPeter Avalos UInt32* ftab,
75571e7ee59SPeter Avalos Int32 nblock,
75671e7ee59SPeter Avalos Int32 verb,
75771e7ee59SPeter Avalos Int32* budget )
75871e7ee59SPeter Avalos {
75971e7ee59SPeter Avalos Int32 i, j, k, ss, sb;
76071e7ee59SPeter Avalos Int32 runningOrder[256];
76171e7ee59SPeter Avalos Bool bigDone[256];
76271e7ee59SPeter Avalos Int32 copyStart[256];
76371e7ee59SPeter Avalos Int32 copyEnd [256];
76471e7ee59SPeter Avalos UChar c1;
76571e7ee59SPeter Avalos Int32 numQSorted;
76671e7ee59SPeter Avalos UInt16 s;
76771e7ee59SPeter Avalos if (verb >= 4) VPrintf0 ( " main sort initialise ...\n" );
76871e7ee59SPeter Avalos
76971e7ee59SPeter Avalos /*-- set up the 2-byte frequency table --*/
77071e7ee59SPeter Avalos for (i = 65536; i >= 0; i--) ftab[i] = 0;
77171e7ee59SPeter Avalos
77271e7ee59SPeter Avalos j = block[0] << 8;
77371e7ee59SPeter Avalos i = nblock-1;
77471e7ee59SPeter Avalos for (; i >= 3; i -= 4) {
77571e7ee59SPeter Avalos quadrant[i] = 0;
77671e7ee59SPeter Avalos j = (j >> 8) | ( ((UInt16)block[i]) << 8);
77771e7ee59SPeter Avalos ftab[j]++;
77871e7ee59SPeter Avalos quadrant[i-1] = 0;
77971e7ee59SPeter Avalos j = (j >> 8) | ( ((UInt16)block[i-1]) << 8);
78071e7ee59SPeter Avalos ftab[j]++;
78171e7ee59SPeter Avalos quadrant[i-2] = 0;
78271e7ee59SPeter Avalos j = (j >> 8) | ( ((UInt16)block[i-2]) << 8);
78371e7ee59SPeter Avalos ftab[j]++;
78471e7ee59SPeter Avalos quadrant[i-3] = 0;
78571e7ee59SPeter Avalos j = (j >> 8) | ( ((UInt16)block[i-3]) << 8);
78671e7ee59SPeter Avalos ftab[j]++;
78771e7ee59SPeter Avalos }
78871e7ee59SPeter Avalos for (; i >= 0; i--) {
78971e7ee59SPeter Avalos quadrant[i] = 0;
79071e7ee59SPeter Avalos j = (j >> 8) | ( ((UInt16)block[i]) << 8);
79171e7ee59SPeter Avalos ftab[j]++;
79271e7ee59SPeter Avalos }
79371e7ee59SPeter Avalos
79471e7ee59SPeter Avalos /*-- (emphasises close relationship of block & quadrant) --*/
79571e7ee59SPeter Avalos for (i = 0; i < BZ_N_OVERSHOOT; i++) {
79671e7ee59SPeter Avalos block [nblock+i] = block[i];
79771e7ee59SPeter Avalos quadrant[nblock+i] = 0;
79871e7ee59SPeter Avalos }
79971e7ee59SPeter Avalos
80071e7ee59SPeter Avalos if (verb >= 4) VPrintf0 ( " bucket sorting ...\n" );
80171e7ee59SPeter Avalos
80271e7ee59SPeter Avalos /*-- Complete the initial radix sort --*/
80371e7ee59SPeter Avalos for (i = 1; i <= 65536; i++) ftab[i] += ftab[i-1];
80471e7ee59SPeter Avalos
80571e7ee59SPeter Avalos s = block[0] << 8;
80671e7ee59SPeter Avalos i = nblock-1;
80771e7ee59SPeter Avalos for (; i >= 3; i -= 4) {
80871e7ee59SPeter Avalos s = (s >> 8) | (block[i] << 8);
80971e7ee59SPeter Avalos j = ftab[s] -1;
81071e7ee59SPeter Avalos ftab[s] = j;
81171e7ee59SPeter Avalos ptr[j] = i;
81271e7ee59SPeter Avalos s = (s >> 8) | (block[i-1] << 8);
81371e7ee59SPeter Avalos j = ftab[s] -1;
81471e7ee59SPeter Avalos ftab[s] = j;
81571e7ee59SPeter Avalos ptr[j] = i-1;
81671e7ee59SPeter Avalos s = (s >> 8) | (block[i-2] << 8);
81771e7ee59SPeter Avalos j = ftab[s] -1;
81871e7ee59SPeter Avalos ftab[s] = j;
81971e7ee59SPeter Avalos ptr[j] = i-2;
82071e7ee59SPeter Avalos s = (s >> 8) | (block[i-3] << 8);
82171e7ee59SPeter Avalos j = ftab[s] -1;
82271e7ee59SPeter Avalos ftab[s] = j;
82371e7ee59SPeter Avalos ptr[j] = i-3;
82471e7ee59SPeter Avalos }
82571e7ee59SPeter Avalos for (; i >= 0; i--) {
82671e7ee59SPeter Avalos s = (s >> 8) | (block[i] << 8);
82771e7ee59SPeter Avalos j = ftab[s] -1;
82871e7ee59SPeter Avalos ftab[s] = j;
82971e7ee59SPeter Avalos ptr[j] = i;
83071e7ee59SPeter Avalos }
83171e7ee59SPeter Avalos
83271e7ee59SPeter Avalos /*--
83371e7ee59SPeter Avalos Now ftab contains the first loc of every small bucket.
83471e7ee59SPeter Avalos Calculate the running order, from smallest to largest
83571e7ee59SPeter Avalos big bucket.
83671e7ee59SPeter Avalos --*/
83771e7ee59SPeter Avalos for (i = 0; i <= 255; i++) {
83871e7ee59SPeter Avalos bigDone [i] = False;
83971e7ee59SPeter Avalos runningOrder[i] = i;
84071e7ee59SPeter Avalos }
84171e7ee59SPeter Avalos
84271e7ee59SPeter Avalos {
84371e7ee59SPeter Avalos Int32 vv;
84471e7ee59SPeter Avalos Int32 h = 1;
84571e7ee59SPeter Avalos do h = 3 * h + 1; while (h <= 256);
84671e7ee59SPeter Avalos do {
84771e7ee59SPeter Avalos h = h / 3;
84871e7ee59SPeter Avalos for (i = h; i <= 255; i++) {
84971e7ee59SPeter Avalos vv = runningOrder[i];
85071e7ee59SPeter Avalos j = i;
85171e7ee59SPeter Avalos while ( BIGFREQ(runningOrder[j-h]) > BIGFREQ(vv) ) {
85271e7ee59SPeter Avalos runningOrder[j] = runningOrder[j-h];
85371e7ee59SPeter Avalos j = j - h;
85471e7ee59SPeter Avalos if (j <= (h - 1)) goto zero;
85571e7ee59SPeter Avalos }
85671e7ee59SPeter Avalos zero:
85771e7ee59SPeter Avalos runningOrder[j] = vv;
85871e7ee59SPeter Avalos }
85971e7ee59SPeter Avalos } while (h != 1);
86071e7ee59SPeter Avalos }
86171e7ee59SPeter Avalos
86271e7ee59SPeter Avalos /*--
86371e7ee59SPeter Avalos The main sorting loop.
86471e7ee59SPeter Avalos --*/
86571e7ee59SPeter Avalos
86671e7ee59SPeter Avalos numQSorted = 0;
86771e7ee59SPeter Avalos
86871e7ee59SPeter Avalos for (i = 0; i <= 255; i++) {
86971e7ee59SPeter Avalos
87071e7ee59SPeter Avalos /*--
87171e7ee59SPeter Avalos Process big buckets, starting with the least full.
87271e7ee59SPeter Avalos Basically this is a 3-step process in which we call
87371e7ee59SPeter Avalos mainQSort3 to sort the small buckets [ss, j], but
87471e7ee59SPeter Avalos also make a big effort to avoid the calls if we can.
87571e7ee59SPeter Avalos --*/
87671e7ee59SPeter Avalos ss = runningOrder[i];
87771e7ee59SPeter Avalos
87871e7ee59SPeter Avalos /*--
87971e7ee59SPeter Avalos Step 1:
88071e7ee59SPeter Avalos Complete the big bucket [ss] by quicksorting
88171e7ee59SPeter Avalos any unsorted small buckets [ss, j], for j != ss.
88271e7ee59SPeter Avalos Hopefully previous pointer-scanning phases have already
88371e7ee59SPeter Avalos completed many of the small buckets [ss, j], so
88471e7ee59SPeter Avalos we don't have to sort them at all.
88571e7ee59SPeter Avalos --*/
88671e7ee59SPeter Avalos for (j = 0; j <= 255; j++) {
88771e7ee59SPeter Avalos if (j != ss) {
88871e7ee59SPeter Avalos sb = (ss << 8) + j;
88971e7ee59SPeter Avalos if ( ! (ftab[sb] & SETMASK) ) {
89071e7ee59SPeter Avalos Int32 lo = ftab[sb] & CLEARMASK;
89171e7ee59SPeter Avalos Int32 hi = (ftab[sb+1] & CLEARMASK) - 1;
89271e7ee59SPeter Avalos if (hi > lo) {
89371e7ee59SPeter Avalos if (verb >= 4)
89471e7ee59SPeter Avalos VPrintf4 ( " qsort [0x%x, 0x%x] "
89571e7ee59SPeter Avalos "done %d this %d\n",
89671e7ee59SPeter Avalos ss, j, numQSorted, hi - lo + 1 );
89771e7ee59SPeter Avalos mainQSort3 (
89871e7ee59SPeter Avalos ptr, block, quadrant, nblock,
89971e7ee59SPeter Avalos lo, hi, BZ_N_RADIX, budget
90071e7ee59SPeter Avalos );
90171e7ee59SPeter Avalos numQSorted += (hi - lo + 1);
90271e7ee59SPeter Avalos if (*budget < 0) return;
90371e7ee59SPeter Avalos }
90471e7ee59SPeter Avalos }
90571e7ee59SPeter Avalos ftab[sb] |= SETMASK;
90671e7ee59SPeter Avalos }
90771e7ee59SPeter Avalos }
90871e7ee59SPeter Avalos
90971e7ee59SPeter Avalos AssertH ( !bigDone[ss], 1006 );
91071e7ee59SPeter Avalos
91171e7ee59SPeter Avalos /*--
91271e7ee59SPeter Avalos Step 2:
91371e7ee59SPeter Avalos Now scan this big bucket [ss] so as to synthesise the
91471e7ee59SPeter Avalos sorted order for small buckets [t, ss] for all t,
91571e7ee59SPeter Avalos including, magically, the bucket [ss,ss] too.
91671e7ee59SPeter Avalos This will avoid doing Real Work in subsequent Step 1's.
91771e7ee59SPeter Avalos --*/
91871e7ee59SPeter Avalos {
91971e7ee59SPeter Avalos for (j = 0; j <= 255; j++) {
92071e7ee59SPeter Avalos copyStart[j] = ftab[(j << 8) + ss] & CLEARMASK;
92171e7ee59SPeter Avalos copyEnd [j] = (ftab[(j << 8) + ss + 1] & CLEARMASK) - 1;
92271e7ee59SPeter Avalos }
92371e7ee59SPeter Avalos for (j = ftab[ss << 8] & CLEARMASK; j < copyStart[ss]; j++) {
92471e7ee59SPeter Avalos k = ptr[j]-1; if (k < 0) k += nblock;
92571e7ee59SPeter Avalos c1 = block[k];
92671e7ee59SPeter Avalos if (!bigDone[c1])
92771e7ee59SPeter Avalos ptr[ copyStart[c1]++ ] = k;
92871e7ee59SPeter Avalos }
92971e7ee59SPeter Avalos for (j = (ftab[(ss+1) << 8] & CLEARMASK) - 1; j > copyEnd[ss]; j--) {
93071e7ee59SPeter Avalos k = ptr[j]-1; if (k < 0) k += nblock;
93171e7ee59SPeter Avalos c1 = block[k];
93271e7ee59SPeter Avalos if (!bigDone[c1])
93371e7ee59SPeter Avalos ptr[ copyEnd[c1]-- ] = k;
93471e7ee59SPeter Avalos }
93571e7ee59SPeter Avalos }
93671e7ee59SPeter Avalos
93771e7ee59SPeter Avalos AssertH ( (copyStart[ss]-1 == copyEnd[ss])
93871e7ee59SPeter Avalos ||
93971e7ee59SPeter Avalos /* Extremely rare case missing in bzip2-1.0.0 and 1.0.1.
94071e7ee59SPeter Avalos Necessity for this case is demonstrated by compressing
94171e7ee59SPeter Avalos a sequence of approximately 48.5 million of character
94271e7ee59SPeter Avalos 251; 1.0.0/1.0.1 will then die here. */
94371e7ee59SPeter Avalos (copyStart[ss] == 0 && copyEnd[ss] == nblock-1),
94471e7ee59SPeter Avalos 1007 )
94571e7ee59SPeter Avalos
94671e7ee59SPeter Avalos for (j = 0; j <= 255; j++) ftab[(j << 8) + ss] |= SETMASK;
94771e7ee59SPeter Avalos
94871e7ee59SPeter Avalos /*--
94971e7ee59SPeter Avalos Step 3:
95071e7ee59SPeter Avalos The [ss] big bucket is now done. Record this fact,
95171e7ee59SPeter Avalos and update the quadrant descriptors. Remember to
95271e7ee59SPeter Avalos update quadrants in the overshoot area too, if
95371e7ee59SPeter Avalos necessary. The "if (i < 255)" test merely skips
95471e7ee59SPeter Avalos this updating for the last bucket processed, since
95571e7ee59SPeter Avalos updating for the last bucket is pointless.
95671e7ee59SPeter Avalos
95771e7ee59SPeter Avalos The quadrant array provides a way to incrementally
95871e7ee59SPeter Avalos cache sort orderings, as they appear, so as to
95971e7ee59SPeter Avalos make subsequent comparisons in fullGtU() complete
96071e7ee59SPeter Avalos faster. For repetitive blocks this makes a big
96171e7ee59SPeter Avalos difference (but not big enough to be able to avoid
96271e7ee59SPeter Avalos the fallback sorting mechanism, exponential radix sort).
96371e7ee59SPeter Avalos
96471e7ee59SPeter Avalos The precise meaning is: at all times:
96571e7ee59SPeter Avalos
96671e7ee59SPeter Avalos for 0 <= i < nblock and 0 <= j <= nblock
96771e7ee59SPeter Avalos
96871e7ee59SPeter Avalos if block[i] != block[j],
96971e7ee59SPeter Avalos
97071e7ee59SPeter Avalos then the relative values of quadrant[i] and
97171e7ee59SPeter Avalos quadrant[j] are meaningless.
97271e7ee59SPeter Avalos
97371e7ee59SPeter Avalos else {
97471e7ee59SPeter Avalos if quadrant[i] < quadrant[j]
97571e7ee59SPeter Avalos then the string starting at i lexicographically
97671e7ee59SPeter Avalos precedes the string starting at j
97771e7ee59SPeter Avalos
97871e7ee59SPeter Avalos else if quadrant[i] > quadrant[j]
97971e7ee59SPeter Avalos then the string starting at j lexicographically
98071e7ee59SPeter Avalos precedes the string starting at i
98171e7ee59SPeter Avalos
98271e7ee59SPeter Avalos else
98371e7ee59SPeter Avalos the relative ordering of the strings starting
98471e7ee59SPeter Avalos at i and j has not yet been determined.
98571e7ee59SPeter Avalos }
98671e7ee59SPeter Avalos --*/
98771e7ee59SPeter Avalos bigDone[ss] = True;
98871e7ee59SPeter Avalos
98971e7ee59SPeter Avalos if (i < 255) {
99071e7ee59SPeter Avalos Int32 bbStart = ftab[ss << 8] & CLEARMASK;
99171e7ee59SPeter Avalos Int32 bbSize = (ftab[(ss+1) << 8] & CLEARMASK) - bbStart;
99271e7ee59SPeter Avalos Int32 shifts = 0;
99371e7ee59SPeter Avalos
99471e7ee59SPeter Avalos while ((bbSize >> shifts) > 65534) shifts++;
99571e7ee59SPeter Avalos
99671e7ee59SPeter Avalos for (j = bbSize-1; j >= 0; j--) {
99771e7ee59SPeter Avalos Int32 a2update = ptr[bbStart + j];
99871e7ee59SPeter Avalos UInt16 qVal = (UInt16)(j >> shifts);
99971e7ee59SPeter Avalos quadrant[a2update] = qVal;
100071e7ee59SPeter Avalos if (a2update < BZ_N_OVERSHOOT)
100171e7ee59SPeter Avalos quadrant[a2update + nblock] = qVal;
100271e7ee59SPeter Avalos }
100371e7ee59SPeter Avalos AssertH ( ((bbSize-1) >> shifts) <= 65535, 1002 );
100471e7ee59SPeter Avalos }
100571e7ee59SPeter Avalos
100671e7ee59SPeter Avalos }
100771e7ee59SPeter Avalos
100871e7ee59SPeter Avalos if (verb >= 4)
100971e7ee59SPeter Avalos VPrintf3 ( " %d pointers, %d sorted, %d scanned\n",
101071e7ee59SPeter Avalos nblock, numQSorted, nblock - numQSorted );
101171e7ee59SPeter Avalos }
101271e7ee59SPeter Avalos
101371e7ee59SPeter Avalos #undef BIGFREQ
101471e7ee59SPeter Avalos #undef SETMASK
101571e7ee59SPeter Avalos #undef CLEARMASK
101671e7ee59SPeter Avalos
101771e7ee59SPeter Avalos
101871e7ee59SPeter Avalos /*---------------------------------------------*/
101971e7ee59SPeter Avalos /* Pre:
102071e7ee59SPeter Avalos nblock > 0
102171e7ee59SPeter Avalos arr2 exists for [0 .. nblock-1 +N_OVERSHOOT]
102271e7ee59SPeter Avalos ((UChar*)arr2) [0 .. nblock-1] holds block
102371e7ee59SPeter Avalos arr1 exists for [0 .. nblock-1]
102471e7ee59SPeter Avalos
102571e7ee59SPeter Avalos Post:
102671e7ee59SPeter Avalos ((UChar*)arr2) [0 .. nblock-1] holds block
102771e7ee59SPeter Avalos All other areas of block destroyed
102871e7ee59SPeter Avalos ftab [ 0 .. 65536 ] destroyed
102971e7ee59SPeter Avalos arr1 [0 .. nblock-1] holds sorted order
103071e7ee59SPeter Avalos */
BZ2_blockSort(EState * s)103171e7ee59SPeter Avalos void BZ2_blockSort ( EState* s )
103271e7ee59SPeter Avalos {
103371e7ee59SPeter Avalos UInt32* ptr = s->ptr;
103471e7ee59SPeter Avalos UChar* block = s->block;
103571e7ee59SPeter Avalos UInt32* ftab = s->ftab;
103671e7ee59SPeter Avalos Int32 nblock = s->nblock;
103771e7ee59SPeter Avalos Int32 verb = s->verbosity;
103871e7ee59SPeter Avalos Int32 wfact = s->workFactor;
103971e7ee59SPeter Avalos UInt16* quadrant;
104071e7ee59SPeter Avalos Int32 budget;
104171e7ee59SPeter Avalos Int32 budgetInit;
104271e7ee59SPeter Avalos Int32 i;
104371e7ee59SPeter Avalos
104471e7ee59SPeter Avalos if (nblock < 10000) {
104571e7ee59SPeter Avalos fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb );
104671e7ee59SPeter Avalos } else {
104771e7ee59SPeter Avalos /* Calculate the location for quadrant, remembering to get
104871e7ee59SPeter Avalos the alignment right. Assumes that &(block[0]) is at least
104971e7ee59SPeter Avalos 2-byte aligned -- this should be ok since block is really
105071e7ee59SPeter Avalos the first section of arr2.
105171e7ee59SPeter Avalos */
105271e7ee59SPeter Avalos i = nblock+BZ_N_OVERSHOOT;
105371e7ee59SPeter Avalos if (i & 1) i++;
105471e7ee59SPeter Avalos quadrant = (UInt16*)(&(block[i]));
105571e7ee59SPeter Avalos
105671e7ee59SPeter Avalos /* (wfact-1) / 3 puts the default-factor-30
105771e7ee59SPeter Avalos transition point at very roughly the same place as
105871e7ee59SPeter Avalos with v0.1 and v0.9.0.
105971e7ee59SPeter Avalos Not that it particularly matters any more, since the
106071e7ee59SPeter Avalos resulting compressed stream is now the same regardless
106171e7ee59SPeter Avalos of whether or not we use the main sort or fallback sort.
106271e7ee59SPeter Avalos */
106371e7ee59SPeter Avalos if (wfact < 1 ) wfact = 1;
106471e7ee59SPeter Avalos if (wfact > 100) wfact = 100;
106571e7ee59SPeter Avalos budgetInit = nblock * ((wfact-1) / 3);
106671e7ee59SPeter Avalos budget = budgetInit;
106771e7ee59SPeter Avalos
106871e7ee59SPeter Avalos mainSort ( ptr, block, quadrant, ftab, nblock, verb, &budget );
106971e7ee59SPeter Avalos if (verb >= 3)
107071e7ee59SPeter Avalos VPrintf3 ( " %d work, %d block, ratio %5.2f\n",
107171e7ee59SPeter Avalos budgetInit - budget,
107271e7ee59SPeter Avalos nblock,
107371e7ee59SPeter Avalos (float)(budgetInit - budget) /
107471e7ee59SPeter Avalos (float)(nblock==0 ? 1 : nblock) );
107571e7ee59SPeter Avalos if (budget < 0) {
107671e7ee59SPeter Avalos if (verb >= 2)
107771e7ee59SPeter Avalos VPrintf0 ( " too repetitive; using fallback"
107871e7ee59SPeter Avalos " sorting algorithm\n" );
107971e7ee59SPeter Avalos fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb );
108071e7ee59SPeter Avalos }
108171e7ee59SPeter Avalos }
108271e7ee59SPeter Avalos
108371e7ee59SPeter Avalos s->origPtr = -1;
108471e7ee59SPeter Avalos for (i = 0; i < s->nblock; i++)
108571e7ee59SPeter Avalos if (ptr[i] == 0)
108671e7ee59SPeter Avalos { s->origPtr = i; break; };
108771e7ee59SPeter Avalos
108871e7ee59SPeter Avalos AssertH( s->origPtr != -1, 1003 );
108971e7ee59SPeter Avalos }
109071e7ee59SPeter Avalos
109171e7ee59SPeter Avalos
109271e7ee59SPeter Avalos /*-------------------------------------------------------------*/
109371e7ee59SPeter Avalos /*--- end blocksort.c ---*/
109471e7ee59SPeter Avalos /*-------------------------------------------------------------*/
1095