1 #pragma prototyped
2
3 /*-------------------------------------------------------------*/
4 /*--- Block sorting machinery ---*/
5 /*--- blocksort.c ---*/
6 /*-------------------------------------------------------------*/
7
8 /*--
9 This file is a part of bzip2 and/or libbzip2, a program and
10 library for lossless, block-sorting data compression.
11
12 Copyright (C) 1996-1998 Julian R Seward. All rights reserved.
13
14 Redistribution and use in source and binary forms, with or without
15 modification, are permitted provided that the following conditions
16 are met:
17
18 1. Redistributions of source code must retain the above copyright
19 notice, this list of conditions and the following disclaimer.
20
21 2. The origin of this software must not be misrepresented; you must
22 not claim that you wrote the original software. If you use this
23 software in a product, an acknowledgment in the product
24 documentation would be appreciated but is not required.
25
26 3. Altered source versions must be plainly marked as such, and must
27 not be misrepresented as being the original software.
28
29 4. The name of the author may not be used to endorse or promote
30 products derived from this software without specific prior written
31 permission.
32
33 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
34 OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
35 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
36 ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
37 DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
38 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
39 GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
40 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
41 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
42 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
43 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
44
45 Julian Seward, Guildford, Surrey, UK.
46 jseward@acm.org
47 bzip2/libbzip2 version 0.9.0c of 18 October 1998
48
49 This program is based on (at least) the work of:
50 Mike Burrows
51 David Wheeler
52 Peter Fenwick
53 Alistair Moffat
54 Radford Neal
55 Ian H. Witten
56 Robert Sedgewick
57 Jon L. Bentley
58
59 For more information on these sources, see the manual.
60 --*/
61
62
63 #include "bzhdr.h"
64
65 /*---------------------------------------------*/
66 /*--
67 Compare two strings in block. We assume (see
68 discussion above) that i1 and i2 have a max
69 offset of 10 on entry, and that the first
70 bytes of both block and quadrant have been
71 copied into the "overshoot area", ie
72 into the subscript range
73 [nblock .. nblock+NUM_OVERSHOOT_BYTES-1].
74 --*/
fullGtU(UChar * block,UInt16 * quadrant,UInt32 nblock,Int32 * workDone,Int32 i1,Int32 i2)75 static __inline__ Bool fullGtU ( UChar* block,
76 UInt16* quadrant,
77 UInt32 nblock,
78 Int32* workDone,
79 Int32 i1,
80 Int32 i2
81 )
82 {
83 Int32 k;
84 UChar c1, c2;
85 UInt16 s1, s2;
86
87 AssertD ( i1 != i2, "fullGtU(1)" );
88
89 c1 = block[i1];
90 c2 = block[i2];
91 if (c1 != c2) return (c1 > c2);
92 i1++; i2++;
93
94 c1 = block[i1];
95 c2 = block[i2];
96 if (c1 != c2) return (c1 > c2);
97 i1++; i2++;
98
99 c1 = block[i1];
100 c2 = block[i2];
101 if (c1 != c2) return (c1 > c2);
102 i1++; i2++;
103
104 c1 = block[i1];
105 c2 = block[i2];
106 if (c1 != c2) return (c1 > c2);
107 i1++; i2++;
108
109 c1 = block[i1];
110 c2 = block[i2];
111 if (c1 != c2) return (c1 > c2);
112 i1++; i2++;
113
114 c1 = block[i1];
115 c2 = block[i2];
116 if (c1 != c2) return (c1 > c2);
117 i1++; i2++;
118
119 k = nblock;
120
121 do {
122
123 c1 = block[i1];
124 c2 = block[i2];
125 if (c1 != c2) return (c1 > c2);
126 s1 = quadrant[i1];
127 s2 = quadrant[i2];
128 if (s1 != s2) return (s1 > s2);
129 i1++; i2++;
130
131 c1 = block[i1];
132 c2 = block[i2];
133 if (c1 != c2) return (c1 > c2);
134 s1 = quadrant[i1];
135 s2 = quadrant[i2];
136 if (s1 != s2) return (s1 > s2);
137 i1++; i2++;
138
139 c1 = block[i1];
140 c2 = block[i2];
141 if (c1 != c2) return (c1 > c2);
142 s1 = quadrant[i1];
143 s2 = quadrant[i2];
144 if (s1 != s2) return (s1 > s2);
145 i1++; i2++;
146
147 c1 = block[i1];
148 c2 = block[i2];
149 if (c1 != c2) return (c1 > c2);
150 s1 = quadrant[i1];
151 s2 = quadrant[i2];
152 if (s1 != s2) return (s1 > s2);
153 i1++; i2++;
154
155 if (i1 >= nblock) i1 -= nblock;
156 if (i2 >= nblock) i2 -= nblock;
157
158 k -= 4;
159 (*workDone)++;
160 }
161 while (k >= 0);
162
163 return False;
164 }
165
166 /*---------------------------------------------*/
167 /*--
168 Knuth's increments seem to work better
169 than Incerpi-Sedgewick here. Possibly
170 because the number of elems to sort is
171 usually small, typically <= 20.
172 --*/
173 static Int32 incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280,
174 9841, 29524, 88573, 265720,
175 797161, 2391484 };
176
simpleSort(EState * s,Int32 lo,Int32 hi,Int32 d)177 static void simpleSort ( EState* s, Int32 lo, Int32 hi, Int32 d )
178 {
179 Int32 i, j, h, bigN, hp;
180 Int32 v;
181
182 UChar* block = s->block;
183 UInt32* zptr = s->zptr;
184 UInt16* quadrant = s->quadrant;
185 Int32* workDone = &(s->workDone);
186 Int32 nblock = s->nblock;
187 Int32 workLimit = s->workLimit;
188 Bool firstAttempt = s->firstAttempt;
189
190 bigN = hi - lo + 1;
191 if (bigN < 2) return;
192
193 hp = 0;
194 while (incs[hp] < bigN) hp++;
195 hp--;
196
197 for (; hp >= 0; hp--) {
198 h = incs[hp];
199 i = lo + h;
200 while (True) {
201
202 /*-- copy 1 --*/
203 if (i > hi) break;
204 v = zptr[i];
205 j = i;
206 while ( fullGtU ( block, quadrant, nblock, workDone,
207 zptr[j-h]+d, v+d ) ) {
208 zptr[j] = zptr[j-h];
209 j = j - h;
210 if (j <= (lo + h - 1)) break;
211 }
212 zptr[j] = v;
213 i++;
214
215 /*-- copy 2 --*/
216 if (i > hi) break;
217 v = zptr[i];
218 j = i;
219 while ( fullGtU ( block, quadrant, nblock, workDone,
220 zptr[j-h]+d, v+d ) ) {
221 zptr[j] = zptr[j-h];
222 j = j - h;
223 if (j <= (lo + h - 1)) break;
224 }
225 zptr[j] = v;
226 i++;
227
228 /*-- copy 3 --*/
229 if (i > hi) break;
230 v = zptr[i];
231 j = i;
232 while ( fullGtU ( block, quadrant, nblock, workDone,
233 zptr[j-h]+d, v+d ) ) {
234 zptr[j] = zptr[j-h];
235 j = j - h;
236 if (j <= (lo + h - 1)) break;
237 }
238 zptr[j] = v;
239 i++;
240
241 if (*workDone > workLimit && firstAttempt) return;
242 }
243 }
244 }
245
246
247 /*---------------------------------------------*/
248 /*--
249 The following is an implementation of
250 an elegant 3-way quicksort for strings,
251 described in a paper "Fast Algorithms for
252 Sorting and Searching Strings", by Robert
253 Sedgewick and Jon L. Bentley.
254 --*/
255
256 #define swap(lv1, lv2) \
257 { Int32 tmp = lv1; lv1 = lv2; lv2 = tmp; }
258
vswap(UInt32 * zptr,Int32 p1,Int32 p2,Int32 n)259 static void vswap ( UInt32* zptr, Int32 p1, Int32 p2, Int32 n )
260 {
261 while (n > 0) {
262 swap(zptr[p1], zptr[p2]);
263 p1++; p2++; n--;
264 }
265 }
266
med3(UChar a,UChar b,UChar c)267 static UChar med3 ( UChar a, UChar b, UChar c )
268 {
269 UChar t;
270 if (a > b) { t = a; a = b; b = t; };
271 if (b > c) { t = b; b = c; c = t; };
272 if (a > b) b = a;
273 return b;
274 }
275
276
277 #define min(a,b) ((a) < (b)) ? (a) : (b)
278
279 typedef
280 struct { Int32 ll; Int32 hh; Int32 dd; }
281 StackElem;
282
283 #define push(lz,hz,dz) { stack[sp].ll = lz; \
284 stack[sp].hh = hz; \
285 stack[sp].dd = dz; \
286 sp++; }
287
288 #define pop(lz,hz,dz) { sp--; \
289 lz = stack[sp].ll; \
290 hz = stack[sp].hh; \
291 dz = stack[sp].dd; }
292
293 #define SMALL_THRESH 20
294 #define DEPTH_THRESH 10
295
296 /*--
297 If you are ever unlucky/improbable enough
298 to get a stack overflow whilst sorting,
299 increase the following constant and try
300 again. In practice I have never seen the
301 stack go above 27 elems, so the following
302 limit seems very generous.
303 --*/
304 #define QSORT_STACK_SIZE 1000
305
306
qSort3(EState * s,Int32 loSt,Int32 hiSt,Int32 dSt)307 static void qSort3 ( EState* s, Int32 loSt, Int32 hiSt, Int32 dSt )
308 {
309 Int32 unLo, unHi, ltLo, gtHi, med, n, m;
310 Int32 sp, lo, hi, d;
311 StackElem stack[QSORT_STACK_SIZE];
312
313 UChar* block = s->block;
314 UInt32* zptr = s->zptr;
315 Int32* workDone = &(s->workDone);
316 Int32 workLimit = s->workLimit;
317 Bool firstAttempt = s->firstAttempt;
318
319 sp = 0;
320 push ( loSt, hiSt, dSt );
321
322 while (sp > 0) {
323
324 AssertH ( sp < QSORT_STACK_SIZE, 1001 );
325
326 pop ( lo, hi, d );
327
328 if (hi - lo < SMALL_THRESH || d > DEPTH_THRESH) {
329 simpleSort ( s, lo, hi, d );
330 if (*workDone > workLimit && firstAttempt) return;
331 continue;
332 }
333
334 med = med3 ( block[zptr[ lo ]+d],
335 block[zptr[ hi ]+d],
336 block[zptr[ (lo+hi)>>1 ]+d] );
337
338 unLo = ltLo = lo;
339 unHi = gtHi = hi;
340
341 while (True) {
342 while (True) {
343 if (unLo > unHi) break;
344 n = ((Int32)block[zptr[unLo]+d]) - med;
345 if (n == 0) { swap(zptr[unLo], zptr[ltLo]); ltLo++; unLo++; continue; };
346 if (n > 0) break;
347 unLo++;
348 }
349 while (True) {
350 if (unLo > unHi) break;
351 n = ((Int32)block[zptr[unHi]+d]) - med;
352 if (n == 0) { swap(zptr[unHi], zptr[gtHi]); gtHi--; unHi--; continue; };
353 if (n < 0) break;
354 unHi--;
355 }
356 if (unLo > unHi) break;
357 swap(zptr[unLo], zptr[unHi]); unLo++; unHi--;
358 }
359
360 AssertD ( unHi == unLo-1, "bad termination in qSort3" );
361
362 if (gtHi < ltLo) {
363 push(lo, hi, d+1 );
364 continue;
365 }
366
367 n = min(ltLo-lo, unLo-ltLo); vswap(zptr, lo, unLo-n, n);
368 m = min(hi-gtHi, gtHi-unHi); vswap(zptr, unLo, hi-m+1, m);
369
370 n = lo + unLo - ltLo - 1;
371 m = hi - (gtHi - unHi) + 1;
372
373 push ( lo, n, d );
374 push ( n+1, m-1, d+1 );
375 push ( m, hi, d );
376 }
377 }
378
379
380 /*---------------------------------------------*/
381
382 #define BIGFREQ(b) (ftab[((b)+1) << 8] - ftab[(b) << 8])
383
384 #define SETMASK (1 << 21)
385 #define CLEARMASK (~(SETMASK))
386
sortMain(EState * s)387 static void sortMain ( EState* s )
388 {
389 Int32 i, j, k, ss, sb;
390 Int32 runningOrder[256];
391 Int32 copy[256];
392 Bool bigDone[256];
393 UChar c1, c2;
394 Int32 numQSorted;
395
396 UChar* block = s->block;
397 UInt32* zptr = s->zptr;
398 UInt16* quadrant = s->quadrant;
399 Int32* ftab = s->ftab;
400 Int32* workDone = &(s->workDone);
401 Int32 nblock = s->nblock;
402 Int32 workLimit = s->workLimit;
403 Bool firstAttempt = s->firstAttempt;
404
405 /*--
406 In the various block-sized structures, live data runs
407 from 0 to last+NUM_OVERSHOOT_BYTES inclusive. First,
408 set up the overshoot area for block.
409 --*/
410
411 if (s->verbosity >= 4)
412 VPrintf0( " sort initialise ...\n" );
413
414 for (i = 0; i < BZ_NUM_OVERSHOOT_BYTES; i++)
415 block[nblock+i] = block[i % nblock];
416 for (i = 0; i < nblock+BZ_NUM_OVERSHOOT_BYTES; i++)
417 quadrant[i] = 0;
418
419
420 if (nblock <= 4000) {
421
422 /*--
423 Use simpleSort(), since the full sorting mechanism
424 has quite a large constant overhead.
425 --*/
426 if (s->verbosity >= 4) VPrintf0( " simpleSort ...\n" );
427 for (i = 0; i < nblock; i++) zptr[i] = i;
428 firstAttempt = False;
429 *workDone = workLimit = 0;
430 simpleSort ( s, 0, nblock-1, 0 );
431 if (s->verbosity >= 4) VPrintf0( " simpleSort done.\n" );
432
433 } else {
434
435 numQSorted = 0;
436 for (i = 0; i <= 255; i++) bigDone[i] = False;
437
438 if (s->verbosity >= 4) VPrintf0( " bucket sorting ...\n" );
439
440 for (i = 0; i <= 65536; i++) ftab[i] = 0;
441
442 c1 = block[nblock-1];
443 for (i = 0; i < nblock; i++) {
444 c2 = block[i];
445 ftab[(c1 << 8) + c2]++;
446 c1 = c2;
447 }
448
449 for (i = 1; i <= 65536; i++) ftab[i] += ftab[i-1];
450
451 c1 = block[0];
452 for (i = 0; i < nblock-1; i++) {
453 c2 = block[i+1];
454 j = (c1 << 8) + c2;
455 c1 = c2;
456 ftab[j]--;
457 zptr[ftab[j]] = i;
458 }
459 j = (block[nblock-1] << 8) + block[0];
460 ftab[j]--;
461 zptr[ftab[j]] = nblock-1;
462
463 /*--
464 Now ftab contains the first loc of every small bucket.
465 Calculate the running order, from smallest to largest
466 big bucket.
467 --*/
468
469 for (i = 0; i <= 255; i++) runningOrder[i] = i;
470
471 {
472 Int32 vv;
473 Int32 h = 1;
474 do h = 3 * h + 1; while (h <= 256);
475 do {
476 h = h / 3;
477 for (i = h; i <= 255; i++) {
478 vv = runningOrder[i];
479 j = i;
480 while ( BIGFREQ(runningOrder[j-h]) > BIGFREQ(vv) ) {
481 runningOrder[j] = runningOrder[j-h];
482 j = j - h;
483 if (j <= (h - 1)) goto zero;
484 }
485 zero:
486 runningOrder[j] = vv;
487 }
488 } while (h != 1);
489 }
490
491 /*--
492 The main sorting loop.
493 --*/
494
495 for (i = 0; i <= 255; i++) {
496
497 /*--
498 Process big buckets, starting with the least full.
499 Basically this is a 4-step process in which we call
500 qSort3 to sort the small buckets [ss, j], but
501 also make a big effort to avoid the calls if we can.
502 --*/
503 ss = runningOrder[i];
504
505 /*--
506 Step 1:
507 Complete the big bucket [ss] by quicksorting
508 any unsorted small buckets [ss, j], for j != ss.
509 Hopefully previous pointer-scanning phases have already
510 completed many of the small buckets [ss, j], so
511 we don't have to sort them at all.
512 --*/
513 for (j = 0; j <= 255; j++) {
514 if (j != ss) {
515 sb = (ss << 8) + j;
516 if ( ! (ftab[sb] & SETMASK) ) {
517 Int32 lo = ftab[sb] & CLEARMASK;
518 Int32 hi = (ftab[sb+1] & CLEARMASK) - 1;
519 if (hi > lo) {
520 if (s->verbosity >= 4)
521 VPrintf4( " qsort [0x%x, 0x%x] done %d this %d\n",
522 ss, j, numQSorted, hi - lo + 1 );
523 qSort3 ( s, lo, hi, 2 );
524 numQSorted += ( hi - lo + 1 );
525 if (*workDone > workLimit && firstAttempt) return;
526 }
527 }
528 ftab[sb] |= SETMASK;
529 }
530 }
531
532 /*--
533 Step 2:
534 Deal specially with case [ss, ss]. This establishes the
535 sorted order for [ss, ss] without any comparisons.
536 A clever trick, cryptically described as steps Q6b and Q6c
537 in SRC-124 (aka BW94). This makes it entirely practical to
538 not use a preliminary run-length coder, but unfortunately
539 we are now stuck with the .bz2 file format.
540 --*/
541 {
542 Int32 put0, get0, put1, get1;
543 Int32 sbn = (ss << 8) + ss;
544 Int32 lo = ftab[sbn] & CLEARMASK;
545 Int32 hi = (ftab[sbn+1] & CLEARMASK) - 1;
546 UChar ssc = (UChar)ss;
547 put0 = lo;
548 get0 = ftab[ss << 8] & CLEARMASK;
549 put1 = hi;
550 get1 = (ftab[(ss+1) << 8] & CLEARMASK) - 1;
551 while (get0 < put0) {
552 j = zptr[get0]-1; if (j < 0) j += nblock;
553 c1 = block[j];
554 if (c1 == ssc) { zptr[put0] = j; put0++; };
555 get0++;
556 }
557 while (get1 > put1) {
558 j = zptr[get1]-1; if (j < 0) j += nblock;
559 c1 = block[j];
560 if (c1 == ssc) { zptr[put1] = j; put1--; };
561 get1--;
562 }
563 ftab[sbn] |= SETMASK;
564 }
565
566 /*--
567 Step 3:
568 The [ss] big bucket is now done. Record this fact,
569 and update the quadrant descriptors. Remember to
570 update quadrants in the overshoot area too, if
571 necessary. The "if (i < 255)" test merely skips
572 this updating for the last bucket processed, since
573 updating for the last bucket is pointless.
574
575 The quadrant array provides a way to incrementally
576 cache sort orderings, as they appear, so as to
577 make subsequent comparisons in fullGtU() complete
578 faster. For repetitive blocks this makes a big
579 difference (but not big enough to be able to avoid
580 randomisation for very repetitive data.)
581
582 The precise meaning is: at all times:
583
584 for 0 <= i < nblock and 0 <= j <= nblock
585
586 if block[i] != block[j],
587
588 then the relative values of quadrant[i] and
589 quadrant[j] are meaningless.
590
591 else {
592 if quadrant[i] < quadrant[j]
593 then the string starting at i lexicographically
594 precedes the string starting at j
595
596 else if quadrant[i] > quadrant[j]
597 then the string starting at j lexicographically
598 precedes the string starting at i
599
600 else
601 the relative ordering of the strings starting
602 at i and j has not yet been determined.
603 }
604 --*/
605 bigDone[ss] = True;
606
607 if (i < 255) {
608 Int32 bbStart = ftab[ss << 8] & CLEARMASK;
609 Int32 bbSize = (ftab[(ss+1) << 8] & CLEARMASK) - bbStart;
610 Int32 shifts = 0;
611
612 while ((bbSize >> shifts) > 65534) shifts++;
613
614 for (j = 0; j < bbSize; j++) {
615 Int32 a2update = zptr[bbStart + j];
616 UInt16 qVal = (UInt16)(j >> shifts);
617 quadrant[a2update] = qVal;
618 if (a2update < BZ_NUM_OVERSHOOT_BYTES)
619 quadrant[a2update + nblock] = qVal;
620 }
621
622 AssertH ( ( ((bbSize-1) >> shifts) <= 65535 ), 1002 );
623 }
624
625 /*--
626 Step 4:
627 Now scan this big bucket [ss] so as to synthesise the
628 sorted order for small buckets [t, ss] for all t != ss.
629 This will avoid doing Real Work in subsequent Step 1's.
630 --*/
631 for (j = 0; j <= 255; j++)
632 copy[j] = ftab[(j << 8) + ss] & CLEARMASK;
633
634 for (j = ftab[ss << 8] & CLEARMASK;
635 j < (ftab[(ss+1) << 8] & CLEARMASK);
636 j++) {
637 k = zptr[j]-1; if (k < 0) k += nblock;
638 c1 = block[k];
639 if ( ! bigDone[c1] ) {
640 zptr[copy[c1]] = k;
641 copy[c1] ++;
642 }
643 }
644
645 for (j = 0; j <= 255; j++) ftab[(j << 8) + ss] |= SETMASK;
646 }
647 if (s->verbosity >= 4)
648 VPrintf3( " %d pointers, %d sorted, %d scanned\n",
649 nblock, numQSorted, nblock - numQSorted );
650 }
651 }
652
653
654 /*---------------------------------------------*/
randomiseBlock(EState * s)655 static void randomiseBlock ( EState* s )
656 {
657 Int32 i;
658 BZ_RAND_INIT_MASK;
659 for (i = 0; i < 256; i++) s->inUse[i] = False;
660
661 for (i = 0; i < s->nblock; i++) {
662 BZ_RAND_UPD_MASK;
663 s->block[i] ^= BZ_RAND_MASK;
664 s->inUse[s->block[i]] = True;
665 }
666 }
667
668
669 /*---------------------------------------------*/
blockSort(EState * s)670 void blockSort ( EState* s )
671 {
672 Int32 i;
673
674 s->workLimit = s->workFactor * (s->nblock - 1);
675 s->workDone = 0;
676 s->blockRandomised = False;
677 s->firstAttempt = True;
678
679 sortMain ( s );
680
681 if (s->verbosity >= 3)
682 VPrintf3( " %d work, %d block, ratio %5.2f\n",
683 s->workDone, s->nblock-1,
684 (float)(s->workDone) / (float)(s->nblock-1) );
685
686 if (s->workDone > s->workLimit && s->firstAttempt) {
687 if (s->verbosity >= 2)
688 VPrintf0( " sorting aborted; randomising block\n" );
689 randomiseBlock ( s );
690 s->workLimit = s->workDone = 0;
691 s->blockRandomised = True;
692 s->firstAttempt = False;
693 sortMain ( s );
694 if (s->verbosity >= 3)
695 VPrintf3( " %d work, %d block, ratio %f\n",
696 s->workDone, s->nblock-1,
697 (float)(s->workDone) / (float)(s->nblock-1) );
698 }
699
700 s->origPtr = -1;
701 for (i = 0; i < s->nblock; i++)
702 if (s->zptr[i] == 0)
703 { s->origPtr = i; break; };
704
705 AssertH( s->origPtr != -1, 1003 );
706 }
707
708 /*-------------------------------------------------------------*/
709 /*--- end blocksort.c ---*/
710 /*-------------------------------------------------------------*/
711