1
2 /*-------------------------------------------------------------*/
3 /*--- Block sorting machinery ---*/
4 /*--- blocksort.c ---*/
5 /*-------------------------------------------------------------*/
6
7 /* ------------------------------------------------------------------
8 This file is part of bzip2/libbzip2, a program and library for
9 lossless, block-sorting data compression.
10
11 bzip2/libbzip2 version 1.0.5 of 10 December 2007
12 Copyright (C) 1996-2007 Julian Seward <jseward@bzip.org>
13
14 Please read the WARNING, DISCLAIMER and PATENTS sections in the
15 README file.
16
17 This program is released under the terms of the license contained
18 in the file LICENSE.
19 ------------------------------------------------------------------ */
20
21 #include "bzlib_private.h"
22
23 /*---------------------------------------------*/
24 /*--- Fallback O(N log(N)^2) sorting ---*/
25 /*--- algorithm, for repetitive blocks ---*/
26 /*---------------------------------------------*/
27
28 /*---------------------------------------------*/
29 static
30 __inline__
fallbackSimpleSort(UInt32 * fmap,UInt32 * eclass,Int32 lo,Int32 hi)31 void fallbackSimpleSort ( UInt32* fmap,
32 UInt32* eclass,
33 Int32 lo,
34 Int32 hi )
35 {
36 Int32 i, j, tmp;
37 UInt32 ec_tmp;
38
39 if (lo == hi) return;
40
41 if (hi - lo > 3) {
42 for ( i = hi-4; i >= lo; i-- ) {
43 tmp = fmap[i];
44 ec_tmp = eclass[tmp];
45 for ( j = i+4; j <= hi && ec_tmp > eclass[fmap[j]]; j += 4 )
46 fmap[j-4] = fmap[j];
47 fmap[j-4] = tmp;
48 }
49 }
50
51 for ( i = hi-1; i >= lo; i-- ) {
52 tmp = fmap[i];
53 ec_tmp = eclass[tmp];
54 for ( j = i+1; j <= hi && ec_tmp > eclass[fmap[j]]; j++ )
55 fmap[j-1] = fmap[j];
56 fmap[j-1] = tmp;
57 }
58 }
59
60 /*---------------------------------------------*/
61 #define fswap(zz1, zz2) \
62 { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; }
63
64 #define fvswap(zzp1, zzp2, zzn) \
65 { \
66 Int32 yyp1 = (zzp1); \
67 Int32 yyp2 = (zzp2); \
68 Int32 yyn = (zzn); \
69 while (yyn > 0) { \
70 fswap(fmap[yyp1], fmap[yyp2]); \
71 yyp1++; yyp2++; yyn--; \
72 } \
73 }
74
75 #define fmin(a,b) ((a) < (b)) ? (a) : (b)
76
77 #define fpush(lz,hz) { stackLo[sp] = lz; \
78 stackHi[sp] = hz; \
79 sp++; }
80
81 #define fpop(lz,hz) { sp--; \
82 lz = stackLo[sp]; \
83 hz = stackHi[sp]; }
84
85 #define FALLBACK_QSORT_SMALL_THRESH 10
86 #define FALLBACK_QSORT_STACK_SIZE 100
87
88
89 static
fallbackQSort3(UInt32 * fmap,UInt32 * eclass,Int32 loSt,Int32 hiSt)90 void fallbackQSort3 ( UInt32* fmap,
91 UInt32* eclass,
92 Int32 loSt,
93 Int32 hiSt )
94 {
95 Int32 unLo, unHi, ltLo, gtHi, n, m;
96 Int32 sp, lo, hi;
97 UInt32 med, r, r3;
98 Int32 stackLo[FALLBACK_QSORT_STACK_SIZE];
99 Int32 stackHi[FALLBACK_QSORT_STACK_SIZE];
100
101 r = 0;
102
103 sp = 0;
104 fpush ( loSt, hiSt );
105
106 while (sp > 0) {
107
108 AssertH ( sp < FALLBACK_QSORT_STACK_SIZE - 1, 1004 );
109
110 fpop ( lo, hi );
111 if (hi - lo < FALLBACK_QSORT_SMALL_THRESH) {
112 fallbackSimpleSort ( fmap, eclass, lo, hi );
113 continue;
114 }
115
116 /* Random partitioning. Median of 3 sometimes fails to
117 avoid bad cases. Median of 9 seems to help but
118 looks rather expensive. This too seems to work but
119 is cheaper. Guidance for the magic constants
120 7621 and 32768 is taken from Sedgewick's algorithms
121 book, chapter 35.
122 */
123 r = ((r * 7621) + 1) % 32768;
124 r3 = r % 3;
125 if (r3 == 0) med = eclass[fmap[lo]]; else
126 if (r3 == 1) med = eclass[fmap[(lo+hi)>>1]]; else
127 med = eclass[fmap[hi]];
128
129 unLo = ltLo = lo;
130 unHi = gtHi = hi;
131
132 while (1) {
133 while (1) {
134 if (unLo > unHi) break;
135 n = (Int32)eclass[fmap[unLo]] - (Int32)med;
136 if (n == 0) {
137 fswap(fmap[unLo], fmap[ltLo]);
138 ltLo++; unLo++;
139 continue;
140 };
141 if (n > 0) break;
142 unLo++;
143 }
144 while (1) {
145 if (unLo > unHi) break;
146 n = (Int32)eclass[fmap[unHi]] - (Int32)med;
147 if (n == 0) {
148 fswap(fmap[unHi], fmap[gtHi]);
149 gtHi--; unHi--;
150 continue;
151 };
152 if (n < 0) break;
153 unHi--;
154 }
155 if (unLo > unHi) break;
156 fswap(fmap[unLo], fmap[unHi]); unLo++; unHi--;
157 }
158
159 AssertD ( unHi == unLo-1, "fallbackQSort3(2)" );
160
161 if (gtHi < ltLo) continue;
162
163 n = fmin(ltLo-lo, unLo-ltLo); fvswap(lo, unLo-n, n);
164 m = fmin(hi-gtHi, gtHi-unHi); fvswap(unLo, hi-m+1, m);
165
166 n = lo + unLo - ltLo - 1;
167 m = hi - (gtHi - unHi) + 1;
168
169 if (n - lo > hi - m) {
170 fpush ( lo, n );
171 fpush ( m, hi );
172 } else {
173 fpush ( m, hi );
174 fpush ( lo, n );
175 }
176 }
177 }
178
179 #undef fmin
180 #undef fpush
181 #undef fpop
182 #undef fswap
183 #undef fvswap
184 #undef FALLBACK_QSORT_SMALL_THRESH
185 #undef FALLBACK_QSORT_STACK_SIZE
186
187 /*---------------------------------------------*/
188 /* Pre:
189 nblock > 0
190 eclass exists for [0 .. nblock-1]
191 ((UChar*)eclass) [0 .. nblock-1] holds block
192 ptr exists for [0 .. nblock-1]
193
194 Post:
195 ((UChar*)eclass) [0 .. nblock-1] holds block
196 All other areas of eclass destroyed
197 fmap [0 .. nblock-1] holds sorted order
198 bhtab [ 0 .. 2+(nblock/32) ] destroyed
199 */
200
201 #define SET_BH(zz) bhtab[(zz) >> 5] |= (1 << ((zz) & 31))
202 #define CLEAR_BH(zz) bhtab[(zz) >> 5] &= ~(1 << ((zz) & 31))
203 #define ISSET_BH(zz) (bhtab[(zz) >> 5] & (1 << ((zz) & 31)))
204 #define WORD_BH(zz) bhtab[(zz) >> 5]
205 #define UNALIGNED_BH(zz) ((zz) & 0x01f)
206
207 static
fallbackSort(UInt32 * fmap,UInt32 * eclass,UInt32 * bhtab,Int32 nblock,Int32 verb)208 void fallbackSort ( UInt32* fmap,
209 UInt32* eclass,
210 UInt32* bhtab,
211 Int32 nblock,
212 Int32 verb )
213 {
214 Int32 ftab[257];
215 Int32 ftabCopy[256];
216 Int32 H, i, j, k, l, r, cc, cc1;
217 Int32 nNotDone;
218 Int32 nBhtab;
219 UChar* eclass8 = (UChar*)eclass;
220
221 /*--
222 Initial 1-char radix sort to generate
223 initial fmap and initial BH bits.
224 --*/
225 if (verb >= 4)
226 VPrintf0 ( " bucket sorting ...\n" );
227 for (i = 0; i < 257; i++) ftab[i] = 0;
228 for (i = 0; i < nblock; i++) ftab[eclass8[i]]++;
229 for (i = 0; i < 256; i++) ftabCopy[i] = ftab[i];
230 for (i = 1; i < 257; i++) ftab[i] += ftab[i-1];
231
232 for (i = 0; i < nblock; i++) {
233 j = eclass8[i];
234 k = ftab[j] - 1;
235 ftab[j] = k;
236 fmap[k] = i;
237 }
238
239 nBhtab = 2 + (nblock / 32);
240 for (i = 0; i < nBhtab; i++) bhtab[i] = 0;
241 for (i = 0; i < 256; i++) SET_BH(ftab[i]);
242
243 /*--
244 Inductively refine the buckets. Kind-of an
245 "exponential radix sort" (!), inspired by the
246 Manber-Myers suffix array construction algorithm.
247 --*/
248
249 /*-- set sentinel bits for block-end detection --*/
250 for (i = 0; i < 32; i++) {
251 SET_BH(nblock + 2*i);
252 CLEAR_BH(nblock + 2*i + 1);
253 }
254
255 /*-- the log(N) loop --*/
256 H = 1;
257 while (1) {
258
259 if (verb >= 4)
260 VPrintf1 ( " depth %6d has ", H );
261
262 j = 0;
263 for (i = 0; i < nblock; i++) {
264 if (ISSET_BH(i)) j = i;
265 k = fmap[i] - H; if (k < 0) k += nblock;
266 eclass[k] = j;
267 }
268
269 nNotDone = 0;
270 r = -1;
271 while (1) {
272
273 /*-- find the next non-singleton bucket --*/
274 k = r + 1;
275 while (ISSET_BH(k) && UNALIGNED_BH(k)) k++;
276 if (ISSET_BH(k)) {
277 while (WORD_BH(k) == 0xffffffff) k += 32;
278 while (ISSET_BH(k)) k++;
279 }
280 l = k - 1;
281 if (l >= nblock) break;
282 while (!ISSET_BH(k) && UNALIGNED_BH(k)) k++;
283 if (!ISSET_BH(k)) {
284 while (WORD_BH(k) == 0x00000000) k += 32;
285 while (!ISSET_BH(k)) k++;
286 }
287 r = k - 1;
288 if (r >= nblock) break;
289
290 /*-- now [l, r] bracket current bucket --*/
291 if (r > l) {
292 nNotDone += (r - l + 1);
293 fallbackQSort3 ( fmap, eclass, l, r );
294
295 /*-- scan bucket and generate header bits-- */
296 cc = -1;
297 for (i = l; i <= r; i++) {
298 cc1 = eclass[fmap[i]];
299 if (cc != cc1) { SET_BH(i); cc = cc1; };
300 }
301 }
302 }
303
304 if (verb >= 4)
305 VPrintf1 ( "%6d unresolved strings\n", nNotDone );
306
307 H *= 2;
308 if (H > nblock || nNotDone == 0) break;
309 }
310
311 /*--
312 Reconstruct the original block in
313 eclass8 [0 .. nblock-1], since the
314 previous phase destroyed it.
315 --*/
316 if (verb >= 4)
317 VPrintf0 ( " reconstructing block ...\n" );
318 j = 0;
319 for (i = 0; i < nblock; i++) {
320 while (ftabCopy[j] == 0) j++;
321 ftabCopy[j]--;
322 eclass8[fmap[i]] = (UChar)j;
323 }
324 AssertH ( j < 256, 1005 );
325 }
326
327 #undef SET_BH
328 #undef CLEAR_BH
329 #undef ISSET_BH
330 #undef WORD_BH
331 #undef UNALIGNED_BH
332
333 /*---------------------------------------------*/
334 /*--- The main, O(N^2 log(N)) sorting ---*/
335 /*--- algorithm. Faster for "normal" ---*/
336 /*--- non-repetitive blocks. ---*/
337 /*---------------------------------------------*/
338
339 /*---------------------------------------------*/
340 static
341 __inline__
mainGtU(UInt32 i1,UInt32 i2,UChar * block,UInt16 * quadrant,UInt32 nblock,Int32 * budget)342 Bool mainGtU ( UInt32 i1,
343 UInt32 i2,
344 UChar* block,
345 UInt16* quadrant,
346 UInt32 nblock,
347 Int32* budget )
348 {
349 Int32 k;
350 UChar c1, c2;
351 UInt16 s1, s2;
352
353 AssertD ( i1 != i2, "mainGtU" );
354 /* 1 */
355 c1 = block[i1]; c2 = block[i2];
356 if (c1 != c2) return (c1 > c2);
357 i1++; i2++;
358 /* 2 */
359 c1 = block[i1]; c2 = block[i2];
360 if (c1 != c2) return (c1 > c2);
361 i1++; i2++;
362 /* 3 */
363 c1 = block[i1]; c2 = block[i2];
364 if (c1 != c2) return (c1 > c2);
365 i1++; i2++;
366 /* 4 */
367 c1 = block[i1]; c2 = block[i2];
368 if (c1 != c2) return (c1 > c2);
369 i1++; i2++;
370 /* 5 */
371 c1 = block[i1]; c2 = block[i2];
372 if (c1 != c2) return (c1 > c2);
373 i1++; i2++;
374 /* 6 */
375 c1 = block[i1]; c2 = block[i2];
376 if (c1 != c2) return (c1 > c2);
377 i1++; i2++;
378 /* 7 */
379 c1 = block[i1]; c2 = block[i2];
380 if (c1 != c2) return (c1 > c2);
381 i1++; i2++;
382 /* 8 */
383 c1 = block[i1]; c2 = block[i2];
384 if (c1 != c2) return (c1 > c2);
385 i1++; i2++;
386 /* 9 */
387 c1 = block[i1]; c2 = block[i2];
388 if (c1 != c2) return (c1 > c2);
389 i1++; i2++;
390 /* 10 */
391 c1 = block[i1]; c2 = block[i2];
392 if (c1 != c2) return (c1 > c2);
393 i1++; i2++;
394 /* 11 */
395 c1 = block[i1]; c2 = block[i2];
396 if (c1 != c2) return (c1 > c2);
397 i1++; i2++;
398 /* 12 */
399 c1 = block[i1]; c2 = block[i2];
400 if (c1 != c2) return (c1 > c2);
401 i1++; i2++;
402
403 k = nblock + 8;
404
405 do {
406 /* 1 */
407 c1 = block[i1]; c2 = block[i2];
408 if (c1 != c2) return (c1 > c2);
409 s1 = quadrant[i1]; s2 = quadrant[i2];
410 if (s1 != s2) return (s1 > s2);
411 i1++; i2++;
412 /* 2 */
413 c1 = block[i1]; c2 = block[i2];
414 if (c1 != c2) return (c1 > c2);
415 s1 = quadrant[i1]; s2 = quadrant[i2];
416 if (s1 != s2) return (s1 > s2);
417 i1++; i2++;
418 /* 3 */
419 c1 = block[i1]; c2 = block[i2];
420 if (c1 != c2) return (c1 > c2);
421 s1 = quadrant[i1]; s2 = quadrant[i2];
422 if (s1 != s2) return (s1 > s2);
423 i1++; i2++;
424 /* 4 */
425 c1 = block[i1]; c2 = block[i2];
426 if (c1 != c2) return (c1 > c2);
427 s1 = quadrant[i1]; s2 = quadrant[i2];
428 if (s1 != s2) return (s1 > s2);
429 i1++; i2++;
430 /* 5 */
431 c1 = block[i1]; c2 = block[i2];
432 if (c1 != c2) return (c1 > c2);
433 s1 = quadrant[i1]; s2 = quadrant[i2];
434 if (s1 != s2) return (s1 > s2);
435 i1++; i2++;
436 /* 6 */
437 c1 = block[i1]; c2 = block[i2];
438 if (c1 != c2) return (c1 > c2);
439 s1 = quadrant[i1]; s2 = quadrant[i2];
440 if (s1 != s2) return (s1 > s2);
441 i1++; i2++;
442 /* 7 */
443 c1 = block[i1]; c2 = block[i2];
444 if (c1 != c2) return (c1 > c2);
445 s1 = quadrant[i1]; s2 = quadrant[i2];
446 if (s1 != s2) return (s1 > s2);
447 i1++; i2++;
448 /* 8 */
449 c1 = block[i1]; c2 = block[i2];
450 if (c1 != c2) return (c1 > c2);
451 s1 = quadrant[i1]; s2 = quadrant[i2];
452 if (s1 != s2) return (s1 > s2);
453 i1++; i2++;
454
455 if (i1 >= nblock) i1 -= nblock;
456 if (i2 >= nblock) i2 -= nblock;
457
458 k -= 8;
459 (*budget)--;
460 }
461 while (k >= 0);
462
463 return False;
464 }
465
466 /*---------------------------------------------*/
467 /*--
468 Knuth's increments seem to work better
469 than Incerpi-Sedgewick here. Possibly
470 because the number of elems to sort is
471 usually small, typically <= 20.
472 --*/
473 static
474 Int32 incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280,
475 9841, 29524, 88573, 265720,
476 797161, 2391484 };
477
478 static
mainSimpleSort(UInt32 * ptr,UChar * block,UInt16 * quadrant,Int32 nblock,Int32 lo,Int32 hi,Int32 d,Int32 * budget)479 void mainSimpleSort ( UInt32* ptr,
480 UChar* block,
481 UInt16* quadrant,
482 Int32 nblock,
483 Int32 lo,
484 Int32 hi,
485 Int32 d,
486 Int32* budget )
487 {
488 Int32 i, j, h, bigN, hp;
489 UInt32 v;
490
491 bigN = hi - lo + 1;
492 if (bigN < 2) return;
493
494 hp = 0;
495 while (incs[hp] < bigN) hp++;
496 hp--;
497
498 for (; hp >= 0; hp--) {
499 h = incs[hp];
500
501 i = lo + h;
502 while (True) {
503
504 /*-- copy 1 --*/
505 if (i > hi) break;
506 v = ptr[i];
507 j = i;
508 while ( mainGtU (
509 ptr[j-h]+d, v+d, block, quadrant, nblock, budget
510 ) ) {
511 ptr[j] = ptr[j-h];
512 j = j - h;
513 if (j <= (lo + h - 1)) break;
514 }
515 ptr[j] = v;
516 i++;
517
518 /*-- copy 2 --*/
519 if (i > hi) break;
520 v = ptr[i];
521 j = i;
522 while ( mainGtU (
523 ptr[j-h]+d, v+d, block, quadrant, nblock, budget
524 ) ) {
525 ptr[j] = ptr[j-h];
526 j = j - h;
527 if (j <= (lo + h - 1)) break;
528 }
529 ptr[j] = v;
530 i++;
531
532 /*-- copy 3 --*/
533 if (i > hi) break;
534 v = ptr[i];
535 j = i;
536 while ( mainGtU (
537 ptr[j-h]+d, v+d, block, quadrant, nblock, budget
538 ) ) {
539 ptr[j] = ptr[j-h];
540 j = j - h;
541 if (j <= (lo + h - 1)) break;
542 }
543 ptr[j] = v;
544 i++;
545
546 if (*budget < 0) return;
547 }
548 }
549 }
550
551 /*---------------------------------------------*/
552 /*--
553 The following is an implementation of
554 an elegant 3-way quicksort for strings,
555 described in a paper "Fast Algorithms for
556 Sorting and Searching Strings", by Robert
557 Sedgewick and Jon L. Bentley.
558 --*/
559
560 #define mswap(zz1, zz2) \
561 { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; }
562
563 #define mvswap(zzp1, zzp2, zzn) \
564 { \
565 Int32 yyp1 = (zzp1); \
566 Int32 yyp2 = (zzp2); \
567 Int32 yyn = (zzn); \
568 while (yyn > 0) { \
569 mswap(ptr[yyp1], ptr[yyp2]); \
570 yyp1++; yyp2++; yyn--; \
571 } \
572 }
573
574 static
575 __inline__
mmed3(UChar a,UChar b,UChar c)576 UChar mmed3 ( UChar a, UChar b, UChar c )
577 {
578 UChar t;
579 if (a > b) { t = a; a = b; b = t; };
580 if (b > c) {
581 b = c;
582 if (a > b) b = a;
583 }
584 return b;
585 }
586
587 #define mmin(a,b) ((a) < (b)) ? (a) : (b)
588
589 #define mpush(lz,hz,dz) { stackLo[sp] = lz; \
590 stackHi[sp] = hz; \
591 stackD [sp] = dz; \
592 sp++; }
593
594 #define mpop(lz,hz,dz) { sp--; \
595 lz = stackLo[sp]; \
596 hz = stackHi[sp]; \
597 dz = stackD [sp]; }
598
599
600 #define mnextsize(az) (nextHi[az]-nextLo[az])
601
602 #define mnextswap(az,bz) \
603 { Int32 tz; \
604 tz = nextLo[az]; nextLo[az] = nextLo[bz]; nextLo[bz] = tz; \
605 tz = nextHi[az]; nextHi[az] = nextHi[bz]; nextHi[bz] = tz; \
606 tz = nextD [az]; nextD [az] = nextD [bz]; nextD [bz] = tz; }
607
608
609 #define MAIN_QSORT_SMALL_THRESH 20
610 #define MAIN_QSORT_DEPTH_THRESH (BZ_N_RADIX + BZ_N_QSORT)
611 #define MAIN_QSORT_STACK_SIZE 100
612
613 static
mainQSort3(UInt32 * ptr,UChar * block,UInt16 * quadrant,Int32 nblock,Int32 loSt,Int32 hiSt,Int32 dSt,Int32 * budget)614 void mainQSort3 ( UInt32* ptr,
615 UChar* block,
616 UInt16* quadrant,
617 Int32 nblock,
618 Int32 loSt,
619 Int32 hiSt,
620 Int32 dSt,
621 Int32* budget )
622 {
623 Int32 unLo, unHi, ltLo, gtHi, n, m, med;
624 Int32 sp, lo, hi, d;
625
626 Int32 stackLo[MAIN_QSORT_STACK_SIZE];
627 Int32 stackHi[MAIN_QSORT_STACK_SIZE];
628 Int32 stackD [MAIN_QSORT_STACK_SIZE];
629
630 Int32 nextLo[3];
631 Int32 nextHi[3];
632 Int32 nextD [3];
633
634 sp = 0;
635 mpush ( loSt, hiSt, dSt );
636
637 while (sp > 0) {
638
639 AssertH ( sp < MAIN_QSORT_STACK_SIZE - 2, 1001 );
640
641 mpop ( lo, hi, d );
642 if (hi - lo < MAIN_QSORT_SMALL_THRESH ||
643 d > MAIN_QSORT_DEPTH_THRESH) {
644 mainSimpleSort ( ptr, block, quadrant, nblock, lo, hi, d, budget );
645 if (*budget < 0) return;
646 continue;
647 }
648
649 med = (Int32)
650 mmed3 ( block[ptr[ lo ]+d],
651 block[ptr[ hi ]+d],
652 block[ptr[ (lo+hi)>>1 ]+d] );
653
654 unLo = ltLo = lo;
655 unHi = gtHi = hi;
656
657 while (True) {
658 while (True) {
659 if (unLo > unHi) break;
660 n = ((Int32)block[ptr[unLo]+d]) - med;
661 if (n == 0) {
662 mswap(ptr[unLo], ptr[ltLo]);
663 ltLo++; unLo++; continue;
664 };
665 if (n > 0) break;
666 unLo++;
667 }
668 while (True) {
669 if (unLo > unHi) break;
670 n = ((Int32)block[ptr[unHi]+d]) - med;
671 if (n == 0) {
672 mswap(ptr[unHi], ptr[gtHi]);
673 gtHi--; unHi--; continue;
674 };
675 if (n < 0) break;
676 unHi--;
677 }
678 if (unLo > unHi) break;
679 mswap(ptr[unLo], ptr[unHi]); unLo++; unHi--;
680 }
681
682 AssertD ( unHi == unLo-1, "mainQSort3(2)" );
683
684 if (gtHi < ltLo) {
685 mpush(lo, hi, d+1 );
686 continue;
687 }
688
689 n = mmin(ltLo-lo, unLo-ltLo); mvswap(lo, unLo-n, n);
690 m = mmin(hi-gtHi, gtHi-unHi); mvswap(unLo, hi-m+1, m);
691
692 n = lo + unLo - ltLo - 1;
693 m = hi - (gtHi - unHi) + 1;
694
695 nextLo[0] = lo; nextHi[0] = n; nextD[0] = d;
696 nextLo[1] = m; nextHi[1] = hi; nextD[1] = d;
697 nextLo[2] = n+1; nextHi[2] = m-1; nextD[2] = d+1;
698
699 if (mnextsize(0) < mnextsize(1)) mnextswap(0,1);
700 if (mnextsize(1) < mnextsize(2)) mnextswap(1,2);
701 if (mnextsize(0) < mnextsize(1)) mnextswap(0,1);
702
703 AssertD (mnextsize(0) >= mnextsize(1), "mainQSort3(8)" );
704 AssertD (mnextsize(1) >= mnextsize(2), "mainQSort3(9)" );
705
706 mpush (nextLo[0], nextHi[0], nextD[0]);
707 mpush (nextLo[1], nextHi[1], nextD[1]);
708 mpush (nextLo[2], nextHi[2], nextD[2]);
709 }
710 }
711
712 #undef mswap
713 #undef mvswap
714 #undef mpush
715 #undef mpop
716 #undef mmin
717 #undef mnextsize
718 #undef mnextswap
719 #undef MAIN_QSORT_SMALL_THRESH
720 #undef MAIN_QSORT_DEPTH_THRESH
721 #undef MAIN_QSORT_STACK_SIZE
722
723 /*---------------------------------------------*/
724 /* Pre:
725 nblock > N_OVERSHOOT
726 block32 exists for [0 .. nblock-1 +N_OVERSHOOT]
727 ((UChar*)block32) [0 .. nblock-1] holds block
728 ptr exists for [0 .. nblock-1]
729
730 Post:
731 ((UChar*)block32) [0 .. nblock-1] holds block
732 All other areas of block32 destroyed
733 ftab [0 .. 65536 ] destroyed
734 ptr [0 .. nblock-1] holds sorted order
735 if (*budget < 0), sorting was abandoned
736 */
737
738 #define BIGFREQ(b) (ftab[((b)+1) << 8] - ftab[(b) << 8])
739 #define SETMASK (1 << 21)
740 #define CLEARMASK (~(SETMASK))
741
742 static
mainSort(UInt32 * ptr,UChar * block,UInt16 * quadrant,UInt32 * ftab,Int32 nblock,Int32 verb,Int32 * budget)743 void mainSort ( UInt32* ptr,
744 UChar* block,
745 UInt16* quadrant,
746 UInt32* ftab,
747 Int32 nblock,
748 Int32 verb,
749 Int32* budget )
750 {
751 Int32 i, j, k, ss, sb;
752 Int32 runningOrder[256];
753 Bool bigDone[256];
754 Int32 copyStart[256];
755 Int32 copyEnd [256];
756 UChar c1;
757 Int32 numQSorted;
758 UInt16 s;
759 if (verb >= 4) VPrintf0 ( " main sort initialise ...\n" );
760
761 /*-- set up the 2-byte frequency table --*/
762 for (i = 65536; i >= 0; i--) ftab[i] = 0;
763
764 j = block[0] << 8;
765 i = nblock-1;
766 for (; i >= 3; i -= 4) {
767 quadrant[i] = 0;
768 j = (j >> 8) | ( ((UInt16)block[i]) << 8);
769 ftab[j]++;
770 quadrant[i-1] = 0;
771 j = (j >> 8) | ( ((UInt16)block[i-1]) << 8);
772 ftab[j]++;
773 quadrant[i-2] = 0;
774 j = (j >> 8) | ( ((UInt16)block[i-2]) << 8);
775 ftab[j]++;
776 quadrant[i-3] = 0;
777 j = (j >> 8) | ( ((UInt16)block[i-3]) << 8);
778 ftab[j]++;
779 }
780 for (; i >= 0; i--) {
781 quadrant[i] = 0;
782 j = (j >> 8) | ( ((UInt16)block[i]) << 8);
783 ftab[j]++;
784 }
785
786 /*-- (emphasises close relationship of block & quadrant) --*/
787 for (i = 0; i < BZ_N_OVERSHOOT; i++) {
788 block [nblock+i] = block[i];
789 quadrant[nblock+i] = 0;
790 }
791
792 if (verb >= 4) VPrintf0 ( " bucket sorting ...\n" );
793
794 /*-- Complete the initial radix sort --*/
795 for (i = 1; i <= 65536; i++) ftab[i] += ftab[i-1];
796
797 s = block[0] << 8;
798 i = nblock-1;
799 for (; i >= 3; i -= 4) {
800 s = (s >> 8) | (block[i] << 8);
801 j = ftab[s] -1;
802 ftab[s] = j;
803 ptr[j] = i;
804 s = (s >> 8) | (block[i-1] << 8);
805 j = ftab[s] -1;
806 ftab[s] = j;
807 ptr[j] = i-1;
808 s = (s >> 8) | (block[i-2] << 8);
809 j = ftab[s] -1;
810 ftab[s] = j;
811 ptr[j] = i-2;
812 s = (s >> 8) | (block[i-3] << 8);
813 j = ftab[s] -1;
814 ftab[s] = j;
815 ptr[j] = i-3;
816 }
817 for (; i >= 0; i--) {
818 s = (s >> 8) | (block[i] << 8);
819 j = ftab[s] -1;
820 ftab[s] = j;
821 ptr[j] = i;
822 }
823
824 /*--
825 Now ftab contains the first loc of every small bucket.
826 Calculate the running order, from smallest to largest
827 big bucket.
828 --*/
829 for (i = 0; i <= 255; i++) {
830 bigDone [i] = False;
831 runningOrder[i] = i;
832 }
833
834 {
835 Int32 vv;
836 Int32 h = 1;
837 do h = 3 * h + 1; while (h <= 256);
838 do {
839 h = h / 3;
840 for (i = h; i <= 255; i++) {
841 vv = runningOrder[i];
842 j = i;
843 while ( BIGFREQ(runningOrder[j-h]) > BIGFREQ(vv) ) {
844 runningOrder[j] = runningOrder[j-h];
845 j = j - h;
846 if (j <= (h - 1)) goto zero;
847 }
848 zero:
849 runningOrder[j] = vv;
850 }
851 } while (h != 1);
852 }
853
854 /*--
855 The main sorting loop.
856 --*/
857
858 numQSorted = 0;
859
860 for (i = 0; i <= 255; i++) {
861
862 /*--
863 Process big buckets, starting with the least full.
864 Basically this is a 3-step process in which we call
865 mainQSort3 to sort the small buckets [ss, j], but
866 also make a big effort to avoid the calls if we can.
867 --*/
868 ss = runningOrder[i];
869
870 /*--
871 Step 1:
872 Complete the big bucket [ss] by quicksorting
873 any unsorted small buckets [ss, j], for j != ss.
874 Hopefully previous pointer-scanning phases have already
875 completed many of the small buckets [ss, j], so
876 we don't have to sort them at all.
877 --*/
878 for (j = 0; j <= 255; j++) {
879 if (j != ss) {
880 sb = (ss << 8) + j;
881 if ( ! (ftab[sb] & SETMASK) ) {
882 Int32 lo = ftab[sb] & CLEARMASK;
883 Int32 hi = (ftab[sb+1] & CLEARMASK) - 1;
884 if (hi > lo) {
885 if (verb >= 4)
886 VPrintf4 ( " qsort [0x%x, 0x%x] "
887 "done %d this %d\n",
888 ss, j, numQSorted, hi - lo + 1 );
889 mainQSort3 (
890 ptr, block, quadrant, nblock,
891 lo, hi, BZ_N_RADIX, budget
892 );
893 numQSorted += (hi - lo + 1);
894 if (*budget < 0) return;
895 }
896 }
897 ftab[sb] |= SETMASK;
898 }
899 }
900
901 AssertH ( !bigDone[ss], 1006 );
902
903 /*--
904 Step 2:
905 Now scan this big bucket [ss] so as to synthesise the
906 sorted order for small buckets [t, ss] for all t,
907 including, magically, the bucket [ss,ss] too.
908 This will avoid doing Real Work in subsequent Step 1's.
909 --*/
910 {
911 for (j = 0; j <= 255; j++) {
912 copyStart[j] = ftab[(j << 8) + ss] & CLEARMASK;
913 copyEnd [j] = (ftab[(j << 8) + ss + 1] & CLEARMASK) - 1;
914 }
915 for (j = ftab[ss << 8] & CLEARMASK; j < copyStart[ss]; j++) {
916 k = ptr[j]-1; if (k < 0) k += nblock;
917 c1 = block[k];
918 if (!bigDone[c1])
919 ptr[ copyStart[c1]++ ] = k;
920 }
921 for (j = (ftab[(ss+1) << 8] & CLEARMASK) - 1; j > copyEnd[ss]; j--) {
922 k = ptr[j]-1; if (k < 0) k += nblock;
923 c1 = block[k];
924 if (!bigDone[c1])
925 ptr[ copyEnd[c1]-- ] = k;
926 }
927 }
928
929 AssertH ( (copyStart[ss]-1 == copyEnd[ss])
930 ||
931 /* Extremely rare case missing in bzip2-1.0.0 and 1.0.1.
932 Necessity for this case is demonstrated by compressing
933 a sequence of approximately 48.5 million of character
934 251; 1.0.0/1.0.1 will then die here. */
935 (copyStart[ss] == 0 && copyEnd[ss] == nblock-1),
936 1007 )
937
938 for (j = 0; j <= 255; j++) ftab[(j << 8) + ss] |= SETMASK;
939
940 /*--
941 Step 3:
942 The [ss] big bucket is now done. Record this fact,
943 and update the quadrant descriptors. Remember to
944 update quadrants in the overshoot area too, if
945 necessary. The "if (i < 255)" test merely skips
946 this updating for the last bucket processed, since
947 updating for the last bucket is pointless.
948
949 The quadrant array provides a way to incrementally
950 cache sort orderings, as they appear, so as to
951 make subsequent comparisons in fullGtU() complete
952 faster. For repetitive blocks this makes a big
953 difference (but not big enough to be able to avoid
954 the fallback sorting mechanism, exponential radix sort).
955
956 The precise meaning is: at all times:
957
958 for 0 <= i < nblock and 0 <= j <= nblock
959
960 if block[i] != block[j],
961
962 then the relative values of quadrant[i] and
963 quadrant[j] are meaningless.
964
965 else {
966 if quadrant[i] < quadrant[j]
967 then the string starting at i lexicographically
968 precedes the string starting at j
969
970 else if quadrant[i] > quadrant[j]
971 then the string starting at j lexicographically
972 precedes the string starting at i
973
974 else
975 the relative ordering of the strings starting
976 at i and j has not yet been determined.
977 }
978 --*/
979 bigDone[ss] = True;
980
981 if (i < 255) {
982 Int32 bbStart = ftab[ss << 8] & CLEARMASK;
983 Int32 bbSize = (ftab[(ss+1) << 8] & CLEARMASK) - bbStart;
984 Int32 shifts = 0;
985
986 while ((bbSize >> shifts) > 65534) shifts++;
987
988 for (j = bbSize-1; j >= 0; j--) {
989 Int32 a2update = ptr[bbStart + j];
990 UInt16 qVal = (UInt16)(j >> shifts);
991 quadrant[a2update] = qVal;
992 if (a2update < BZ_N_OVERSHOOT)
993 quadrant[a2update + nblock] = qVal;
994 }
995 AssertH ( ((bbSize-1) >> shifts) <= 65535, 1002 );
996 }
997
998 }
999
1000 if (verb >= 4)
1001 VPrintf3 ( " %d pointers, %d sorted, %d scanned\n",
1002 nblock, numQSorted, nblock - numQSorted );
1003 }
1004
1005 #undef BIGFREQ
1006 #undef SETMASK
1007 #undef CLEARMASK
1008
1009 /*---------------------------------------------*/
1010 /* Pre:
1011 nblock > 0
1012 arr2 exists for [0 .. nblock-1 +N_OVERSHOOT]
1013 ((UChar*)arr2) [0 .. nblock-1] holds block
1014 arr1 exists for [0 .. nblock-1]
1015
1016 Post:
1017 ((UChar*)arr2) [0 .. nblock-1] holds block
1018 All other areas of block destroyed
1019 ftab [ 0 .. 65536 ] destroyed
1020 arr1 [0 .. nblock-1] holds sorted order
1021 */
BZ2_blockSort(EState * s)1022 void BZ2_blockSort ( EState* s )
1023 {
1024 UInt32* ptr = s->ptr;
1025 UChar* block = s->block;
1026 UInt32* ftab = s->ftab;
1027 Int32 nblock = s->nblock;
1028 Int32 verb = s->verbosity;
1029 Int32 wfact = s->workFactor;
1030 UInt16* quadrant;
1031 Int32 budget;
1032 Int32 budgetInit;
1033 Int32 i;
1034
1035 if (nblock < 10000) {
1036 fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb );
1037 } else {
1038 /* Calculate the location for quadrant, remembering to get
1039 the alignment right. Assumes that &(block[0]) is at least
1040 2-byte aligned -- this should be ok since block is really
1041 the first section of arr2.
1042 */
1043 i = nblock+BZ_N_OVERSHOOT;
1044 if (i & 1) i++;
1045 quadrant = (UInt16*)(&(block[i]));
1046
1047 /* (wfact-1) / 3 puts the default-factor-30
1048 transition point at very roughly the same place as
1049 with v0.1 and v0.9.0.
1050 Not that it particularly matters any more, since the
1051 resulting compressed stream is now the same regardless
1052 of whether or not we use the main sort or fallback sort.
1053 */
1054 if (wfact < 1 ) wfact = 1;
1055 if (wfact > 100) wfact = 100;
1056 budgetInit = nblock * ((wfact-1) / 3);
1057 budget = budgetInit;
1058
1059 mainSort ( ptr, block, quadrant, ftab, nblock, verb, &budget );
1060 if (verb >= 3)
1061 VPrintf3 ( " %d work, %d block, ratio %5.2f\n",
1062 budgetInit - budget,
1063 nblock,
1064 (float)(budgetInit - budget) /
1065 (float)(nblock==0 ? 1 : nblock) );
1066 if (budget < 0) {
1067 if (verb >= 2)
1068 VPrintf0 ( " too repetitive; using fallback"
1069 " sorting algorithm\n" );
1070 fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb );
1071 }
1072 }
1073
1074 s->origPtr = -1;
1075 for (i = 0; i < s->nblock; i++)
1076 if (ptr[i] == 0)
1077 { s->origPtr = i; break; };
1078
1079 AssertH( s->origPtr != -1, 1003 );
1080 }
1081
1082 /*-------------------------------------------------------------*/
1083 /*--- end blocksort.c ---*/
1084 /*-------------------------------------------------------------*/
1085