1
2 /*-------------------------------------------------------------*/
3 /*--- Block sorting machinery ---*/
4 /*--- blocksort.c ---*/
5 /*-------------------------------------------------------------*/
6
7 /* ------------------------------------------------------------------
8 This file is part of bzip2/libbzip2, a program and library for
9 lossless, block-sorting data compression.
10
11 bzip2/libbzip2 version 1.0.5 of 10 December 2007
12 Copyright (C) 1996-2007 Julian Seward <jseward@bzip.org>
13
14 Please read the WARNING, DISCLAIMER and PATENTS sections in the
15 README file.
16
17 This program is released under the terms of the license contained
18 in the file LICENSE.
19 ------------------------------------------------------------------ */
20
21
22 #include "bzlib_private.h"
23
24 ABC_NAMESPACE_IMPL_START
25
26 /*---------------------------------------------*/
27 /*--- Fallback O(N log(N)^2) sorting ---*/
28 /*--- algorithm, for repetitive blocks ---*/
29 /*---------------------------------------------*/
30
31 /*---------------------------------------------*/
32 static
33 __inline__
fallbackSimpleSort(UInt32 * fmap,UInt32 * eclass,Int32 lo,Int32 hi)34 void fallbackSimpleSort ( UInt32* fmap,
35 UInt32* eclass,
36 Int32 lo,
37 Int32 hi )
38 {
39 Int32 i, j, tmp;
40 UInt32 ec_tmp;
41
42 if (lo == hi) return;
43
44 if (hi - lo > 3) {
45 for ( i = hi-4; i >= lo; i-- ) {
46 tmp = fmap[i];
47 ec_tmp = eclass[tmp];
48 for ( j = i+4; j <= hi && ec_tmp > eclass[fmap[j]]; j += 4 )
49 fmap[j-4] = fmap[j];
50 fmap[j-4] = tmp;
51 }
52 }
53
54 for ( i = hi-1; i >= lo; i-- ) {
55 tmp = fmap[i];
56 ec_tmp = eclass[tmp];
57 for ( j = i+1; j <= hi && ec_tmp > eclass[fmap[j]]; j++ )
58 fmap[j-1] = fmap[j];
59 fmap[j-1] = tmp;
60 }
61 }
62
63
64 /*---------------------------------------------*/
65 #define fswap(zz1, zz2) \
66 { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; }
67
68 #define fvswap(zzp1, zzp2, zzn) \
69 { \
70 Int32 yyp1 = (zzp1); \
71 Int32 yyp2 = (zzp2); \
72 Int32 yyn = (zzn); \
73 while (yyn > 0) { \
74 fswap(fmap[yyp1], fmap[yyp2]); \
75 yyp1++; yyp2++; yyn--; \
76 } \
77 }
78
79
80 #define fmin(a,b) ((a) < (b)) ? (a) : (b)
81
82 #define fpush(lz,hz) { stackLo[sp] = lz; \
83 stackHi[sp] = hz; \
84 sp++; }
85
86 #define fpop(lz,hz) { sp--; \
87 lz = stackLo[sp]; \
88 hz = stackHi[sp]; }
89
90 #define FALLBACK_QSORT_SMALL_THRESH 10
91 #define FALLBACK_QSORT_STACK_SIZE 100
92
93
94 static
fallbackQSort3(UInt32 * fmap,UInt32 * eclass,Int32 loSt,Int32 hiSt)95 void fallbackQSort3 ( UInt32* fmap,
96 UInt32* eclass,
97 Int32 loSt,
98 Int32 hiSt )
99 {
100 Int32 unLo, unHi, ltLo, gtHi, n, m;
101 Int32 sp, lo, hi;
102 UInt32 med, r, r3;
103 Int32 stackLo[FALLBACK_QSORT_STACK_SIZE];
104 Int32 stackHi[FALLBACK_QSORT_STACK_SIZE];
105
106 r = 0;
107
108 sp = 0;
109 fpush ( loSt, hiSt );
110
111 while (sp > 0) {
112
113 AssertH ( sp < FALLBACK_QSORT_STACK_SIZE - 1, 1004 );
114
115 fpop ( lo, hi );
116 if (hi - lo < FALLBACK_QSORT_SMALL_THRESH) {
117 fallbackSimpleSort ( fmap, eclass, lo, hi );
118 continue;
119 }
120
121 /* Random partitioning. Median of 3 sometimes fails to
122 avoid bad cases. Median of 9 seems to help but
123 looks rather expensive. This too seems to work but
124 is cheaper. Guidance for the magic constants
125 7621 and 32768 is taken from Sedgewick's algorithms
126 book, chapter 35.
127 */
128 r = ((r * 7621) + 1) % 32768;
129 r3 = r % 3;
130 if (r3 == 0) med = eclass[fmap[lo]]; else
131 if (r3 == 1) med = eclass[fmap[(lo+hi)>>1]]; else
132 med = eclass[fmap[hi]];
133
134 unLo = ltLo = lo;
135 unHi = gtHi = hi;
136
137 while (1) {
138 while (1) {
139 if (unLo > unHi) break;
140 n = (Int32)eclass[fmap[unLo]] - (Int32)med;
141 if (n == 0) {
142 fswap(fmap[unLo], fmap[ltLo]);
143 ltLo++; unLo++;
144 continue;
145 };
146 if (n > 0) break;
147 unLo++;
148 }
149 while (1) {
150 if (unLo > unHi) break;
151 n = (Int32)eclass[fmap[unHi]] - (Int32)med;
152 if (n == 0) {
153 fswap(fmap[unHi], fmap[gtHi]);
154 gtHi--; unHi--;
155 continue;
156 };
157 if (n < 0) break;
158 unHi--;
159 }
160 if (unLo > unHi) break;
161 fswap(fmap[unLo], fmap[unHi]); unLo++; unHi--;
162 }
163
164 AssertD ( unHi == unLo-1, "fallbackQSort3(2)" );
165
166 if (gtHi < ltLo) continue;
167
168 n = fmin(ltLo-lo, unLo-ltLo); fvswap(lo, unLo-n, n);
169 m = fmin(hi-gtHi, gtHi-unHi); fvswap(unLo, hi-m+1, m);
170
171 n = lo + unLo - ltLo - 1;
172 m = hi - (gtHi - unHi) + 1;
173
174 if (n - lo > hi - m) {
175 fpush ( lo, n );
176 fpush ( m, hi );
177 } else {
178 fpush ( m, hi );
179 fpush ( lo, n );
180 }
181 }
182 }
183
184 #undef fmin
185 #undef fpush
186 #undef fpop
187 #undef fswap
188 #undef fvswap
189 #undef FALLBACK_QSORT_SMALL_THRESH
190 #undef FALLBACK_QSORT_STACK_SIZE
191
192
193 /*---------------------------------------------*/
194 /* Pre:
195 nblock > 0
196 eclass exists for [0 .. nblock-1]
197 ((UChar*)eclass) [0 .. nblock-1] holds block
198 ptr exists for [0 .. nblock-1]
199
200 Post:
201 ((UChar*)eclass) [0 .. nblock-1] holds block
202 All other areas of eclass destroyed
203 fmap [0 .. nblock-1] holds sorted order
204 bhtab [ 0 .. 2+(nblock/32) ] destroyed
205 */
206
207 #define SET_BH(zz) bhtab[(zz) >> 5] |= (1 << ((zz) & 31))
208 #define CLEAR_BH(zz) bhtab[(zz) >> 5] &= ~(1 << ((zz) & 31))
209 #define ISSET_BH(zz) (bhtab[(zz) >> 5] & (1 << ((zz) & 31)))
210 #define WORD_BH(zz) bhtab[(zz) >> 5]
211 #define UNALIGNED_BH(zz) ((zz) & 0x01f)
212
213 static
fallbackSort(UInt32 * fmap,UInt32 * eclass,UInt32 * bhtab,Int32 nblock,Int32 verb)214 void fallbackSort ( UInt32* fmap,
215 UInt32* eclass,
216 UInt32* bhtab,
217 Int32 nblock,
218 Int32 verb )
219 {
220 Int32 ftab[257];
221 Int32 ftabCopy[256];
222 Int32 H, i, j, k, l, r, cc, cc1;
223 Int32 nNotDone;
224 Int32 nBhtab;
225 UChar* eclass8 = (UChar*)eclass;
226
227 /*--
228 Initial 1-char radix sort to generate
229 initial fmap and initial BH bits.
230 --*/
231 if (verb >= 4)
232 VPrintf0 ( " bucket sorting ...\n" );
233 for (i = 0; i < 257; i++) ftab[i] = 0;
234 for (i = 0; i < nblock; i++) ftab[eclass8[i]]++;
235 for (i = 0; i < 256; i++) ftabCopy[i] = ftab[i];
236 for (i = 1; i < 257; i++) ftab[i] += ftab[i-1];
237
238 for (i = 0; i < nblock; i++) {
239 j = eclass8[i];
240 k = ftab[j] - 1;
241 ftab[j] = k;
242 fmap[k] = i;
243 }
244
245 nBhtab = 2 + (nblock / 32);
246 for (i = 0; i < nBhtab; i++) bhtab[i] = 0;
247 for (i = 0; i < 256; i++) SET_BH(ftab[i]);
248
249 /*--
250 Inductively refine the buckets. Kind-of an
251 "exponential radix sort" (!), inspired by the
252 Manber-Myers suffix array construction algorithm.
253 --*/
254
255 /*-- set sentinel bits for block-end detection --*/
256 for (i = 0; i < 32; i++) {
257 SET_BH(nblock + 2*i);
258 CLEAR_BH(nblock + 2*i + 1);
259 }
260
261 /*-- the log(N) loop --*/
262 H = 1;
263 while (1) {
264
265 if (verb >= 4)
266 VPrintf1 ( " depth %6d has ", H );
267
268 j = 0;
269 for (i = 0; i < nblock; i++) {
270 if (ISSET_BH(i)) j = i;
271 k = fmap[i] - H; if (k < 0) k += nblock;
272 eclass[k] = j;
273 }
274
275 nNotDone = 0;
276 r = -1;
277 while (1) {
278
279 /*-- find the next non-singleton bucket --*/
280 k = r + 1;
281 while (ISSET_BH(k) && UNALIGNED_BH(k)) k++;
282 if (ISSET_BH(k)) {
283 while (WORD_BH(k) == 0xffffffff) k += 32;
284 while (ISSET_BH(k)) k++;
285 }
286 l = k - 1;
287 if (l >= nblock) break;
288 while (!ISSET_BH(k) && UNALIGNED_BH(k)) k++;
289 if (!ISSET_BH(k)) {
290 while (WORD_BH(k) == 0x00000000) k += 32;
291 while (!ISSET_BH(k)) k++;
292 }
293 r = k - 1;
294 if (r >= nblock) break;
295
296 /*-- now [l, r] bracket current bucket --*/
297 if (r > l) {
298 nNotDone += (r - l + 1);
299 fallbackQSort3 ( fmap, eclass, l, r );
300
301 /*-- scan bucket and generate header bits-- */
302 cc = -1;
303 for (i = l; i <= r; i++) {
304 cc1 = eclass[fmap[i]];
305 if (cc != cc1) { SET_BH(i); cc = cc1; };
306 }
307 }
308 }
309
310 if (verb >= 4)
311 VPrintf1 ( "%6d unresolved strings\n", nNotDone );
312
313 H *= 2;
314 if (H > nblock || nNotDone == 0) break;
315 }
316
317 /*--
318 Reconstruct the original block in
319 eclass8 [0 .. nblock-1], since the
320 previous phase destroyed it.
321 --*/
322 if (verb >= 4)
323 VPrintf0 ( " reconstructing block ...\n" );
324 j = 0;
325 for (i = 0; i < nblock; i++) {
326 while (ftabCopy[j] == 0) j++;
327 ftabCopy[j]--;
328 eclass8[fmap[i]] = (UChar)j;
329 }
330 AssertH ( j < 256, 1005 );
331 }
332
333 #undef SET_BH
334 #undef CLEAR_BH
335 #undef ISSET_BH
336 #undef WORD_BH
337 #undef UNALIGNED_BH
338
339
340 /*---------------------------------------------*/
341 /*--- The main, O(N^2 log(N)) sorting ---*/
342 /*--- algorithm. Faster for "normal" ---*/
343 /*--- non-repetitive blocks. ---*/
344 /*---------------------------------------------*/
345
346 /*---------------------------------------------*/
347 static
348 __inline__
mainGtU(UInt32 i1,UInt32 i2,UChar * block,UInt16 * quadrant,UInt32 nblock,Int32 * budget)349 Bool mainGtU ( UInt32 i1,
350 UInt32 i2,
351 UChar* block,
352 UInt16* quadrant,
353 UInt32 nblock,
354 Int32* budget )
355 {
356 Int32 k;
357 UChar c1, c2;
358 UInt16 s1, s2;
359
360 AssertD ( i1 != i2, "mainGtU" );
361 /* 1 */
362 c1 = block[i1]; c2 = block[i2];
363 if (c1 != c2) return (c1 > c2);
364 i1++; i2++;
365 /* 2 */
366 c1 = block[i1]; c2 = block[i2];
367 if (c1 != c2) return (c1 > c2);
368 i1++; i2++;
369 /* 3 */
370 c1 = block[i1]; c2 = block[i2];
371 if (c1 != c2) return (c1 > c2);
372 i1++; i2++;
373 /* 4 */
374 c1 = block[i1]; c2 = block[i2];
375 if (c1 != c2) return (c1 > c2);
376 i1++; i2++;
377 /* 5 */
378 c1 = block[i1]; c2 = block[i2];
379 if (c1 != c2) return (c1 > c2);
380 i1++; i2++;
381 /* 6 */
382 c1 = block[i1]; c2 = block[i2];
383 if (c1 != c2) return (c1 > c2);
384 i1++; i2++;
385 /* 7 */
386 c1 = block[i1]; c2 = block[i2];
387 if (c1 != c2) return (c1 > c2);
388 i1++; i2++;
389 /* 8 */
390 c1 = block[i1]; c2 = block[i2];
391 if (c1 != c2) return (c1 > c2);
392 i1++; i2++;
393 /* 9 */
394 c1 = block[i1]; c2 = block[i2];
395 if (c1 != c2) return (c1 > c2);
396 i1++; i2++;
397 /* 10 */
398 c1 = block[i1]; c2 = block[i2];
399 if (c1 != c2) return (c1 > c2);
400 i1++; i2++;
401 /* 11 */
402 c1 = block[i1]; c2 = block[i2];
403 if (c1 != c2) return (c1 > c2);
404 i1++; i2++;
405 /* 12 */
406 c1 = block[i1]; c2 = block[i2];
407 if (c1 != c2) return (c1 > c2);
408 i1++; i2++;
409
410 k = nblock + 8;
411
412 do {
413 /* 1 */
414 c1 = block[i1]; c2 = block[i2];
415 if (c1 != c2) return (c1 > c2);
416 s1 = quadrant[i1]; s2 = quadrant[i2];
417 if (s1 != s2) return (s1 > s2);
418 i1++; i2++;
419 /* 2 */
420 c1 = block[i1]; c2 = block[i2];
421 if (c1 != c2) return (c1 > c2);
422 s1 = quadrant[i1]; s2 = quadrant[i2];
423 if (s1 != s2) return (s1 > s2);
424 i1++; i2++;
425 /* 3 */
426 c1 = block[i1]; c2 = block[i2];
427 if (c1 != c2) return (c1 > c2);
428 s1 = quadrant[i1]; s2 = quadrant[i2];
429 if (s1 != s2) return (s1 > s2);
430 i1++; i2++;
431 /* 4 */
432 c1 = block[i1]; c2 = block[i2];
433 if (c1 != c2) return (c1 > c2);
434 s1 = quadrant[i1]; s2 = quadrant[i2];
435 if (s1 != s2) return (s1 > s2);
436 i1++; i2++;
437 /* 5 */
438 c1 = block[i1]; c2 = block[i2];
439 if (c1 != c2) return (c1 > c2);
440 s1 = quadrant[i1]; s2 = quadrant[i2];
441 if (s1 != s2) return (s1 > s2);
442 i1++; i2++;
443 /* 6 */
444 c1 = block[i1]; c2 = block[i2];
445 if (c1 != c2) return (c1 > c2);
446 s1 = quadrant[i1]; s2 = quadrant[i2];
447 if (s1 != s2) return (s1 > s2);
448 i1++; i2++;
449 /* 7 */
450 c1 = block[i1]; c2 = block[i2];
451 if (c1 != c2) return (c1 > c2);
452 s1 = quadrant[i1]; s2 = quadrant[i2];
453 if (s1 != s2) return (s1 > s2);
454 i1++; i2++;
455 /* 8 */
456 c1 = block[i1]; c2 = block[i2];
457 if (c1 != c2) return (c1 > c2);
458 s1 = quadrant[i1]; s2 = quadrant[i2];
459 if (s1 != s2) return (s1 > s2);
460 i1++; i2++;
461
462 if (i1 >= nblock) i1 -= nblock;
463 if (i2 >= nblock) i2 -= nblock;
464
465 k -= 8;
466 (*budget)--;
467 }
468 while (k >= 0);
469
470 return False;
471 }
472
473
474 /*---------------------------------------------*/
475 /*--
476 Knuth's increments seem to work better
477 than Incerpi-Sedgewick here. Possibly
478 because the number of elems to sort is
479 usually small, typically <= 20.
480 --*/
481 static
482 Int32 incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280,
483 9841, 29524, 88573, 265720,
484 797161, 2391484 };
485
486 static
mainSimpleSort(UInt32 * ptr,UChar * block,UInt16 * quadrant,Int32 nblock,Int32 lo,Int32 hi,Int32 d,Int32 * budget)487 void mainSimpleSort ( UInt32* ptr,
488 UChar* block,
489 UInt16* quadrant,
490 Int32 nblock,
491 Int32 lo,
492 Int32 hi,
493 Int32 d,
494 Int32* budget )
495 {
496 Int32 i, j, h, bigN, hp;
497 UInt32 v;
498
499 bigN = hi - lo + 1;
500 if (bigN < 2) return;
501
502 hp = 0;
503 while (incs[hp] < bigN) hp++;
504 hp--;
505
506 for (; hp >= 0; hp--) {
507 h = incs[hp];
508
509 i = lo + h;
510 while (True) {
511
512 /*-- copy 1 --*/
513 if (i > hi) break;
514 v = ptr[i];
515 j = i;
516 while ( mainGtU (
517 ptr[j-h]+d, v+d, block, quadrant, nblock, budget
518 ) ) {
519 ptr[j] = ptr[j-h];
520 j = j - h;
521 if (j <= (lo + h - 1)) break;
522 }
523 ptr[j] = v;
524 i++;
525
526 /*-- copy 2 --*/
527 if (i > hi) break;
528 v = ptr[i];
529 j = i;
530 while ( mainGtU (
531 ptr[j-h]+d, v+d, block, quadrant, nblock, budget
532 ) ) {
533 ptr[j] = ptr[j-h];
534 j = j - h;
535 if (j <= (lo + h - 1)) break;
536 }
537 ptr[j] = v;
538 i++;
539
540 /*-- copy 3 --*/
541 if (i > hi) break;
542 v = ptr[i];
543 j = i;
544 while ( mainGtU (
545 ptr[j-h]+d, v+d, block, quadrant, nblock, budget
546 ) ) {
547 ptr[j] = ptr[j-h];
548 j = j - h;
549 if (j <= (lo + h - 1)) break;
550 }
551 ptr[j] = v;
552 i++;
553
554 if (*budget < 0) return;
555 }
556 }
557 }
558
559
560 /*---------------------------------------------*/
561 /*--
562 The following is an implementation of
563 an elegant 3-way quicksort for strings,
564 described in a paper "Fast Algorithms for
565 Sorting and Searching Strings", by Robert
566 Sedgewick and Jon L. Bentley.
567 --*/
568
569 #define mswap(zz1, zz2) \
570 { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; }
571
572 #define mvswap(zzp1, zzp2, zzn) \
573 { \
574 Int32 yyp1 = (zzp1); \
575 Int32 yyp2 = (zzp2); \
576 Int32 yyn = (zzn); \
577 while (yyn > 0) { \
578 mswap(ptr[yyp1], ptr[yyp2]); \
579 yyp1++; yyp2++; yyn--; \
580 } \
581 }
582
583 static
584 __inline__
mmed3(UChar a,UChar b,UChar c)585 UChar mmed3 ( UChar a, UChar b, UChar c )
586 {
587 UChar t;
588 if (a > b) { t = a; a = b; b = t; };
589 if (b > c) {
590 b = c;
591 if (a > b) b = a;
592 }
593 return b;
594 }
595
596 #define mmin(a,b) ((a) < (b)) ? (a) : (b)
597
598 #define mpush(lz,hz,dz) { stackLo[sp] = lz; \
599 stackHi[sp] = hz; \
600 stackD [sp] = dz; \
601 sp++; }
602
603 #define mpop(lz,hz,dz) { sp--; \
604 lz = stackLo[sp]; \
605 hz = stackHi[sp]; \
606 dz = stackD [sp]; }
607
608
609 #define mnextsize(az) (nextHi[az]-nextLo[az])
610
611 #define mnextswap(az,bz) \
612 { Int32 tz; \
613 tz = nextLo[az]; nextLo[az] = nextLo[bz]; nextLo[bz] = tz; \
614 tz = nextHi[az]; nextHi[az] = nextHi[bz]; nextHi[bz] = tz; \
615 tz = nextD [az]; nextD [az] = nextD [bz]; nextD [bz] = tz; }
616
617
618 #define MAIN_QSORT_SMALL_THRESH 20
619 #define MAIN_QSORT_DEPTH_THRESH (BZ_N_RADIX + BZ_N_QSORT)
620 #define MAIN_QSORT_STACK_SIZE 100
621
622 static
mainQSort3(UInt32 * ptr,UChar * block,UInt16 * quadrant,Int32 nblock,Int32 loSt,Int32 hiSt,Int32 dSt,Int32 * budget)623 void mainQSort3 ( UInt32* ptr,
624 UChar* block,
625 UInt16* quadrant,
626 Int32 nblock,
627 Int32 loSt,
628 Int32 hiSt,
629 Int32 dSt,
630 Int32* budget )
631 {
632 Int32 unLo, unHi, ltLo, gtHi, n, m, med;
633 Int32 sp, lo, hi, d;
634
635 Int32 stackLo[MAIN_QSORT_STACK_SIZE];
636 Int32 stackHi[MAIN_QSORT_STACK_SIZE];
637 Int32 stackD [MAIN_QSORT_STACK_SIZE];
638
639 Int32 nextLo[3];
640 Int32 nextHi[3];
641 Int32 nextD [3];
642
643 sp = 0;
644 mpush ( loSt, hiSt, dSt );
645
646 while (sp > 0) {
647
648 AssertH ( sp < MAIN_QSORT_STACK_SIZE - 2, 1001 );
649
650 mpop ( lo, hi, d );
651 if (hi - lo < MAIN_QSORT_SMALL_THRESH ||
652 d > MAIN_QSORT_DEPTH_THRESH) {
653 mainSimpleSort ( ptr, block, quadrant, nblock, lo, hi, d, budget );
654 if (*budget < 0) return;
655 continue;
656 }
657
658 med = (Int32)
659 mmed3 ( block[ptr[ lo ]+d],
660 block[ptr[ hi ]+d],
661 block[ptr[ (lo+hi)>>1 ]+d] );
662
663 unLo = ltLo = lo;
664 unHi = gtHi = hi;
665
666 while (True) {
667 while (True) {
668 if (unLo > unHi) break;
669 n = ((Int32)block[ptr[unLo]+d]) - med;
670 if (n == 0) {
671 mswap(ptr[unLo], ptr[ltLo]);
672 ltLo++; unLo++; continue;
673 };
674 if (n > 0) break;
675 unLo++;
676 }
677 while (True) {
678 if (unLo > unHi) break;
679 n = ((Int32)block[ptr[unHi]+d]) - med;
680 if (n == 0) {
681 mswap(ptr[unHi], ptr[gtHi]);
682 gtHi--; unHi--; continue;
683 };
684 if (n < 0) break;
685 unHi--;
686 }
687 if (unLo > unHi) break;
688 mswap(ptr[unLo], ptr[unHi]); unLo++; unHi--;
689 }
690
691 AssertD ( unHi == unLo-1, "mainQSort3(2)" );
692
693 if (gtHi < ltLo) {
694 mpush(lo, hi, d+1 );
695 continue;
696 }
697
698 n = mmin(ltLo-lo, unLo-ltLo); mvswap(lo, unLo-n, n);
699 m = mmin(hi-gtHi, gtHi-unHi); mvswap(unLo, hi-m+1, m);
700
701 n = lo + unLo - ltLo - 1;
702 m = hi - (gtHi - unHi) + 1;
703
704 nextLo[0] = lo; nextHi[0] = n; nextD[0] = d;
705 nextLo[1] = m; nextHi[1] = hi; nextD[1] = d;
706 nextLo[2] = n+1; nextHi[2] = m-1; nextD[2] = d+1;
707
708 if (mnextsize(0) < mnextsize(1)) mnextswap(0,1);
709 if (mnextsize(1) < mnextsize(2)) mnextswap(1,2);
710 if (mnextsize(0) < mnextsize(1)) mnextswap(0,1);
711
712 AssertD (mnextsize(0) >= mnextsize(1), "mainQSort3(8)" );
713 AssertD (mnextsize(1) >= mnextsize(2), "mainQSort3(9)" );
714
715 mpush (nextLo[0], nextHi[0], nextD[0]);
716 mpush (nextLo[1], nextHi[1], nextD[1]);
717 mpush (nextLo[2], nextHi[2], nextD[2]);
718 }
719 }
720
721 #undef mswap
722 #undef mvswap
723 #undef mpush
724 #undef mpop
725 #undef mmin
726 #undef mnextsize
727 #undef mnextswap
728 #undef MAIN_QSORT_SMALL_THRESH
729 #undef MAIN_QSORT_DEPTH_THRESH
730 #undef MAIN_QSORT_STACK_SIZE
731
732
733 /*---------------------------------------------*/
734 /* Pre:
735 nblock > N_OVERSHOOT
736 block32 exists for [0 .. nblock-1 +N_OVERSHOOT]
737 ((UChar*)block32) [0 .. nblock-1] holds block
738 ptr exists for [0 .. nblock-1]
739
740 Post:
741 ((UChar*)block32) [0 .. nblock-1] holds block
742 All other areas of block32 destroyed
743 ftab [0 .. 65536 ] destroyed
744 ptr [0 .. nblock-1] holds sorted order
745 if (*budget < 0), sorting was abandoned
746 */
747
748 #define BIGFREQ(b) (ftab[((b)+1) << 8] - ftab[(b) << 8])
749 #define SETMASK (1 << 21)
750 #define CLEARMASK (~(SETMASK))
751
752 static
mainSort(UInt32 * ptr,UChar * block,UInt16 * quadrant,UInt32 * ftab,Int32 nblock,Int32 verb,Int32 * budget)753 void mainSort ( UInt32* ptr,
754 UChar* block,
755 UInt16* quadrant,
756 UInt32* ftab,
757 Int32 nblock,
758 Int32 verb,
759 Int32* budget )
760 {
761 Int32 i, j, k, ss, sb;
762 Int32 runningOrder[256];
763 Bool bigDone[256];
764 Int32 copyStart[256];
765 Int32 copyEnd [256];
766 UChar c1;
767 Int32 numQSorted;
768 UInt16 s;
769 if (verb >= 4) VPrintf0 ( " main sort initialise ...\n" );
770
771 /*-- set up the 2-byte frequency table --*/
772 for (i = 65536; i >= 0; i--) ftab[i] = 0;
773
774 j = block[0] << 8;
775 i = nblock-1;
776 for (; i >= 3; i -= 4) {
777 quadrant[i] = 0;
778 j = (j >> 8) | ( ((UInt16)block[i]) << 8);
779 ftab[j]++;
780 quadrant[i-1] = 0;
781 j = (j >> 8) | ( ((UInt16)block[i-1]) << 8);
782 ftab[j]++;
783 quadrant[i-2] = 0;
784 j = (j >> 8) | ( ((UInt16)block[i-2]) << 8);
785 ftab[j]++;
786 quadrant[i-3] = 0;
787 j = (j >> 8) | ( ((UInt16)block[i-3]) << 8);
788 ftab[j]++;
789 }
790 for (; i >= 0; i--) {
791 quadrant[i] = 0;
792 j = (j >> 8) | ( ((UInt16)block[i]) << 8);
793 ftab[j]++;
794 }
795
796 /*-- (emphasises close relationship of block & quadrant) --*/
797 for (i = 0; i < BZ_N_OVERSHOOT; i++) {
798 block [nblock+i] = block[i];
799 quadrant[nblock+i] = 0;
800 }
801
802 if (verb >= 4) VPrintf0 ( " bucket sorting ...\n" );
803
804 /*-- Complete the initial radix sort --*/
805 for (i = 1; i <= 65536; i++) ftab[i] += ftab[i-1];
806
807 s = block[0] << 8;
808 i = nblock-1;
809 for (; i >= 3; i -= 4) {
810 s = (s >> 8) | (block[i] << 8);
811 j = ftab[s] -1;
812 ftab[s] = j;
813 ptr[j] = i;
814 s = (s >> 8) | (block[i-1] << 8);
815 j = ftab[s] -1;
816 ftab[s] = j;
817 ptr[j] = i-1;
818 s = (s >> 8) | (block[i-2] << 8);
819 j = ftab[s] -1;
820 ftab[s] = j;
821 ptr[j] = i-2;
822 s = (s >> 8) | (block[i-3] << 8);
823 j = ftab[s] -1;
824 ftab[s] = j;
825 ptr[j] = i-3;
826 }
827 for (; i >= 0; i--) {
828 s = (s >> 8) | (block[i] << 8);
829 j = ftab[s] -1;
830 ftab[s] = j;
831 ptr[j] = i;
832 }
833
834 /*--
835 Now ftab contains the first loc of every small bucket.
836 Calculate the running order, from smallest to largest
837 big bucket.
838 --*/
839 for (i = 0; i <= 255; i++) {
840 bigDone [i] = False;
841 runningOrder[i] = i;
842 }
843
844 {
845 Int32 vv;
846 Int32 h = 1;
847 do h = 3 * h + 1; while (h <= 256);
848 do {
849 h = h / 3;
850 for (i = h; i <= 255; i++) {
851 vv = runningOrder[i];
852 j = i;
853 while ( BIGFREQ(runningOrder[j-h]) > BIGFREQ(vv) ) {
854 runningOrder[j] = runningOrder[j-h];
855 j = j - h;
856 if (j <= (h - 1)) goto zero;
857 }
858 zero:
859 runningOrder[j] = vv;
860 }
861 } while (h != 1);
862 }
863
864 /*--
865 The main sorting loop.
866 --*/
867
868 numQSorted = 0;
869
870 for (i = 0; i <= 255; i++) {
871
872 /*--
873 Process big buckets, starting with the least full.
874 Basically this is a 3-step process in which we call
875 mainQSort3 to sort the small buckets [ss, j], but
876 also make a big effort to avoid the calls if we can.
877 --*/
878 ss = runningOrder[i];
879
880 /*--
881 Step 1:
882 Complete the big bucket [ss] by quicksorting
883 any unsorted small buckets [ss, j], for j != ss.
884 Hopefully previous pointer-scanning phases have already
885 completed many of the small buckets [ss, j], so
886 we don't have to sort them at all.
887 --*/
888 for (j = 0; j <= 255; j++) {
889 if (j != ss) {
890 sb = (ss << 8) + j;
891 if ( ! (ftab[sb] & SETMASK) ) {
892 Int32 lo = ftab[sb] & CLEARMASK;
893 Int32 hi = (ftab[sb+1] & CLEARMASK) - 1;
894 if (hi > lo) {
895 if (verb >= 4)
896 VPrintf4 ( " qsort [0x%x, 0x%x] "
897 "done %d this %d\n",
898 ss, j, numQSorted, hi - lo + 1 );
899 mainQSort3 (
900 ptr, block, quadrant, nblock,
901 lo, hi, BZ_N_RADIX, budget
902 );
903 numQSorted += (hi - lo + 1);
904 if (*budget < 0) return;
905 }
906 }
907 ftab[sb] |= SETMASK;
908 }
909 }
910
911 AssertH ( !bigDone[ss], 1006 );
912
913 /*--
914 Step 2:
915 Now scan this big bucket [ss] so as to synthesise the
916 sorted order for small buckets [t, ss] for all t,
917 including, magically, the bucket [ss,ss] too.
918 This will avoid doing Real Work in subsequent Step 1's.
919 --*/
920 {
921 for (j = 0; j <= 255; j++) {
922 copyStart[j] = ftab[(j << 8) + ss] & CLEARMASK;
923 copyEnd [j] = (ftab[(j << 8) + ss + 1] & CLEARMASK) - 1;
924 }
925 for (j = ftab[ss << 8] & CLEARMASK; j < copyStart[ss]; j++) {
926 k = ptr[j]-1; if (k < 0) k += nblock;
927 c1 = block[k];
928 if (!bigDone[c1])
929 ptr[ copyStart[c1]++ ] = k;
930 }
931 for (j = (ftab[(ss+1) << 8] & CLEARMASK) - 1; j > copyEnd[ss]; j--) {
932 k = ptr[j]-1; if (k < 0) k += nblock;
933 c1 = block[k];
934 if (!bigDone[c1])
935 ptr[ copyEnd[c1]-- ] = k;
936 }
937 }
938
939 AssertH ( (copyStart[ss]-1 == copyEnd[ss])
940 ||
941 /* Extremely rare case missing in bzip2-1.0.0 and 1.0.1.
942 Necessity for this case is demonstrated by compressing
943 a sequence of approximately 48.5 million of character
944 251; 1.0.0/1.0.1 will then die here. */
945 (copyStart[ss] == 0 && copyEnd[ss] == nblock-1),
946 1007 )
947
948 for (j = 0; j <= 255; j++) ftab[(j << 8) + ss] |= SETMASK;
949
950 /*--
951 Step 3:
952 The [ss] big bucket is now done. Record this fact,
953 and update the quadrant descriptors. Remember to
954 update quadrants in the overshoot area too, if
955 necessary. The "if (i < 255)" test merely skips
956 this updating for the last bucket processed, since
957 updating for the last bucket is pointless.
958
959 The quadrant array provides a way to incrementally
960 cache sort orderings, as they appear, so as to
961 make subsequent comparisons in fullGtU() complete
962 faster. For repetitive blocks this makes a big
963 difference (but not big enough to be able to avoid
964 the fallback sorting mechanism, exponential radix sort).
965
966 The precise meaning is: at all times:
967
968 for 0 <= i < nblock and 0 <= j <= nblock
969
970 if block[i] != block[j],
971
972 then the relative values of quadrant[i] and
973 quadrant[j] are meaningless.
974
975 else {
976 if quadrant[i] < quadrant[j]
977 then the string starting at i lexicographically
978 precedes the string starting at j
979
980 else if quadrant[i] > quadrant[j]
981 then the string starting at j lexicographically
982 precedes the string starting at i
983
984 else
985 the relative ordering of the strings starting
986 at i and j has not yet been determined.
987 }
988 --*/
989 bigDone[ss] = True;
990
991 if (i < 255) {
992 Int32 bbStart = ftab[ss << 8] & CLEARMASK;
993 Int32 bbSize = (ftab[(ss+1) << 8] & CLEARMASK) - bbStart;
994 Int32 shifts = 0;
995
996 while ((bbSize >> shifts) > 65534) shifts++;
997
998 for (j = bbSize-1; j >= 0; j--) {
999 Int32 a2update = ptr[bbStart + j];
1000 UInt16 qVal = (UInt16)(j >> shifts);
1001 quadrant[a2update] = qVal;
1002 if (a2update < BZ_N_OVERSHOOT)
1003 quadrant[a2update + nblock] = qVal;
1004 }
1005 AssertH ( ((bbSize-1) >> shifts) <= 65535, 1002 );
1006 }
1007
1008 }
1009
1010 if (verb >= 4)
1011 VPrintf3 ( " %d pointers, %d sorted, %d scanned\n",
1012 nblock, numQSorted, nblock - numQSorted );
1013 }
1014
1015 #undef BIGFREQ
1016 #undef SETMASK
1017 #undef CLEARMASK
1018
1019
1020 /*---------------------------------------------*/
1021 /* Pre:
1022 nblock > 0
1023 arr2 exists for [0 .. nblock-1 +N_OVERSHOOT]
1024 ((UChar*)arr2) [0 .. nblock-1] holds block
1025 arr1 exists for [0 .. nblock-1]
1026
1027 Post:
1028 ((UChar*)arr2) [0 .. nblock-1] holds block
1029 All other areas of block destroyed
1030 ftab [ 0 .. 65536 ] destroyed
1031 arr1 [0 .. nblock-1] holds sorted order
1032 */
BZ2_blockSort(EState * s)1033 void BZ2_blockSort ( EState* s )
1034 {
1035 UInt32* ptr = s->ptr;
1036 UChar* block = s->block;
1037 UInt32* ftab = s->ftab;
1038 Int32 nblock = s->nblock;
1039 Int32 verb = s->verbosity;
1040 Int32 wfact = s->workFactor;
1041 UInt16* quadrant;
1042 Int32 budget;
1043 Int32 budgetInit;
1044 Int32 i;
1045
1046 if (nblock < 10000) {
1047 fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb );
1048 } else {
1049 /* Calculate the location for quadrant, remembering to get
1050 the alignment right. Assumes that &(block[0]) is at least
1051 2-byte aligned -- this should be ok since block is really
1052 the first section of arr2.
1053 */
1054 i = nblock+BZ_N_OVERSHOOT;
1055 if (i & 1) i++;
1056 quadrant = (UInt16*)(&(block[i]));
1057
1058 /* (wfact-1) / 3 puts the default-factor-30
1059 transition point at very roughly the same place as
1060 with v0.1 and v0.9.0.
1061 Not that it particularly matters any more, since the
1062 resulting compressed stream is now the same regardless
1063 of whether or not we use the main sort or fallback sort.
1064 */
1065 if (wfact < 1 ) wfact = 1;
1066 if (wfact > 100) wfact = 100;
1067 budgetInit = nblock * ((wfact-1) / 3);
1068 budget = budgetInit;
1069
1070 mainSort ( ptr, block, quadrant, ftab, nblock, verb, &budget );
1071 if (verb >= 3)
1072 VPrintf3 ( " %d work, %d block, ratio %5.2f\n",
1073 budgetInit - budget,
1074 nblock,
1075 (float)(budgetInit - budget) /
1076 (float)(nblock==0 ? 1 : nblock) );
1077 if (budget < 0) {
1078 if (verb >= 2)
1079 VPrintf0 ( " too repetitive; using fallback"
1080 " sorting algorithm\n" );
1081 fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb );
1082 }
1083 }
1084
1085 s->origPtr = -1;
1086 for (i = 0; i < s->nblock; i++)
1087 if (ptr[i] == 0)
1088 { s->origPtr = i; break; };
1089
1090 AssertH( s->origPtr != -1, 1003 );
1091 }
1092
1093
1094 /*-------------------------------------------------------------*/
1095 /*--- end blocksort.c ---*/
1096 /*-------------------------------------------------------------*/
1097
1098 ABC_NAMESPACE_IMPL_END
1099