1 /*
2 * Copyright (c) 2002, 2009, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26 // -*- C++ -*-
27 // Small program for unpacking specially compressed Java packages.
28 // John R. Rose
29
30 #include <stdio.h>
31 #include <string.h>
32 #include <stdlib.h>
33 #include <stdarg.h>
34
35 #include "jni_util.h"
36
37 #include "defines.h"
38 #include "bytes.h"
39 #include "utils.h"
40 #include "coding.h"
41
42 #include "constants.h"
43 #include "unpack.h"
44
45 extern coding basic_codings[];
46
47 #define CODING_PRIVATE(spec) \
48 int spec_ = spec; \
49 int B = CODING_B(spec_); \
50 int H = CODING_H(spec_); \
51 int L = 256 - H; \
52 int S = CODING_S(spec_); \
53 int D = CODING_D(spec_)
54
55 #define IS_NEG_CODE(S, codeVal) \
56 ( (((int)(codeVal)+1) & ((1<<S)-1)) == 0 )
57
58 #define DECODE_SIGN_S1(ux) \
59 ( ((uint)(ux) >> 1) ^ -((int)(ux) & 1) )
60
61 static maybe_inline
decode_sign(int S,uint ux)62 int decode_sign(int S, uint ux) { // == Coding.decodeSign32
63 assert(S > 0);
64 uint sigbits = (ux >> S);
65 if (IS_NEG_CODE(S, ux))
66 return (int)( ~sigbits);
67 else
68 return (int)(ux - sigbits);
69 // Note that (int)(ux-sigbits) can be negative, if ux is large enough.
70 }
71
init()72 coding* coding::init() {
73 if (umax > 0) return this; // already done
74 assert(spec != 0); // sanity
75
76 // fill in derived fields
77 CODING_PRIVATE(spec);
78
79 // Return null if 'arb(BHSD)' parameter constraints are not met:
80 if (B < 1 || B > B_MAX) return null;
81 if (H < 1 || H > 256) return null;
82 if (S < 0 || S > 2) return null;
83 if (D < 0 || D > 1) return null;
84 if (B == 1 && H != 256) return null; // 1-byte coding must be fixed-size
85 if (B >= 5 && H == 256) return null; // no 5-byte fixed-size coding
86
87 // first compute the range of the coding, in 64 bits
88 jlong range = 0;
89 {
90 jlong H_i = 1;
91 for (int i = 0; i < B; i++) {
92 range += H_i;
93 H_i *= H;
94 }
95 range *= L;
96 range += H_i;
97 }
98 assert(range > 0); // no useless codings, please
99
100 int this_umax;
101
102 // now, compute min and max
103 if (range >= ((jlong)1 << 32)) {
104 this_umax = INT_MAX_VALUE;
105 this->umin = INT_MIN_VALUE;
106 this->max = INT_MAX_VALUE;
107 this->min = INT_MIN_VALUE;
108 } else {
109 this_umax = (range > INT_MAX_VALUE) ? INT_MAX_VALUE : (int)range-1;
110 this->max = this_umax;
111 this->min = this->umin = 0;
112 if (S != 0 && range != 0) {
113 int Smask = (1<<S)-1;
114 jlong maxPosCode = range-1;
115 jlong maxNegCode = range-1;
116 while (IS_NEG_CODE(S, maxPosCode)) --maxPosCode;
117 while (!IS_NEG_CODE(S, maxNegCode)) --maxNegCode;
118 int maxPos = decode_sign(S, (uint)maxPosCode);
119 if (maxPos < 0)
120 this->max = INT_MAX_VALUE; // 32-bit wraparound
121 else
122 this->max = maxPos;
123 if (maxNegCode < 0)
124 this->min = 0; // No negative codings at all.
125 else
126 this->min = decode_sign(S, (uint)maxNegCode);
127 }
128 }
129
130 assert(!(isFullRange | isSigned | isSubrange)); // init
131 if (min < 0)
132 this->isSigned = true;
133 if (max < INT_MAX_VALUE && range <= INT_MAX_VALUE)
134 this->isSubrange = true;
135 if (max == INT_MAX_VALUE && min == INT_MIN_VALUE)
136 this->isFullRange = true;
137
138 // do this last, to reduce MT exposure (should have a membar too)
139 this->umax = this_umax;
140
141 return this;
142 }
143
findBySpec(int spec)144 coding* coding::findBySpec(int spec) {
145 for (coding* scan = &basic_codings[0]; ; scan++) {
146 if (scan->spec == spec)
147 return scan->init();
148 if (scan->spec == 0)
149 break;
150 }
151 coding* ptr = NEW(coding, 1);
152 CHECK_NULL_RETURN(ptr, 0);
153 coding* c = ptr->initFrom(spec);
154 if (c == null) {
155 mtrace('f', ptr, 0);
156 ::free(ptr);
157 } else
158 // else caller should free it...
159 c->isMalloc = true;
160 return c;
161 }
162
findBySpec(int B,int H,int S,int D)163 coding* coding::findBySpec(int B, int H, int S, int D) {
164 if (B < 1 || B > B_MAX) return null;
165 if (H < 1 || H > 256) return null;
166 if (S < 0 || S > 2) return null;
167 if (D < 0 || D > 1) return null;
168 return findBySpec(CODING_SPEC(B, H, S, D));
169 }
170
free()171 void coding::free() {
172 if (isMalloc) {
173 mtrace('f', this, 0);
174 ::free(this);
175 }
176 }
177
reset(value_stream * state)178 void coding_method::reset(value_stream* state) {
179 assert(state->rp == state->rplimit); // not in mid-stream, please
180 //assert(this == vs0.cm);
181 state[0] = vs0;
182 if (uValues != null) {
183 uValues->reset(state->helper());
184 }
185 }
186
187 maybe_inline
parse(byte * & rp,int B,int H)188 uint coding::parse(byte* &rp, int B, int H) {
189 int L = 256-H;
190 byte* ptr = rp;
191 // hand peel the i==0 part of the loop:
192 uint b_i = *ptr++ & 0xFF;
193 if (B == 1 || b_i < (uint)L)
194 { rp = ptr; return b_i; }
195 uint sum = b_i;
196 uint H_i = H;
197 assert(B <= B_MAX);
198 for (int i = 2; i <= B_MAX; i++) { // easy for compilers to unroll if desired
199 b_i = *ptr++ & 0xFF;
200 sum += b_i * H_i;
201 if (i == B || b_i < (uint)L)
202 { rp = ptr; return sum; }
203 H_i *= H;
204 }
205 assert(false);
206 return 0;
207 }
208
209 maybe_inline
parse_lgH(byte * & rp,int B,int H,int lgH)210 uint coding::parse_lgH(byte* &rp, int B, int H, int lgH) {
211 assert(H == (1<<lgH));
212 int L = 256-(1<<lgH);
213 byte* ptr = rp;
214 // hand peel the i==0 part of the loop:
215 uint b_i = *ptr++ & 0xFF;
216 if (B == 1 || b_i < (uint)L)
217 { rp = ptr; return b_i; }
218 uint sum = b_i;
219 uint lg_H_i = lgH;
220 assert(B <= B_MAX);
221 for (int i = 2; i <= B_MAX; i++) { // easy for compilers to unroll if desired
222 b_i = *ptr++ & 0xFF;
223 sum += b_i << lg_H_i;
224 if (i == B || b_i < (uint)L)
225 { rp = ptr; return sum; }
226 lg_H_i += lgH;
227 }
228 assert(false);
229 return 0;
230 }
231
232 static const char ERB[] = "EOF reading band";
233
234 maybe_inline
parseMultiple(byte * & rp,int N,byte * limit,int B,int H)235 void coding::parseMultiple(byte* &rp, int N, byte* limit, int B, int H) {
236 if (N < 0) {
237 abort("bad value count");
238 return;
239 }
240 byte* ptr = rp;
241 if (B == 1 || H == 256) {
242 size_t len = (size_t)N*B;
243 if (len / B != (size_t)N || ptr+len > limit) {
244 abort(ERB);
245 return;
246 }
247 rp = ptr+len;
248 return;
249 }
250 // Note: We assume rp has enough zero-padding.
251 int L = 256-H;
252 int n = B;
253 while (N > 0) {
254 ptr += 1;
255 if (--n == 0) {
256 // end of encoding at B bytes, regardless of byte value
257 } else {
258 int b = (ptr[-1] & 0xFF);
259 if (b >= L) {
260 // keep going, unless we find a byte < L
261 continue;
262 }
263 }
264 // found the last byte
265 N -= 1;
266 n = B; // reset length counter
267 // do an error check here
268 if (ptr > limit) {
269 abort(ERB);
270 return;
271 }
272 }
273 rp = ptr;
274 return;
275 }
276
hasHelper()277 bool value_stream::hasHelper() {
278 // If my coding method is a pop-style method,
279 // then I need a second value stream to transmit
280 // unfavored values.
281 // This can be determined by examining fValues.
282 return cm->fValues != null;
283 }
284
init(byte * rp_,byte * rplimit_,coding * defc)285 void value_stream::init(byte* rp_, byte* rplimit_, coding* defc) {
286 rp = rp_;
287 rplimit = rplimit_;
288 sum = 0;
289 cm = null; // no need in the simple case
290 setCoding(defc);
291 }
292
setCoding(coding * defc)293 void value_stream::setCoding(coding* defc) {
294 if (defc == null) {
295 unpack_abort("bad coding");
296 defc = coding::findByIndex(_meta_canon_min); // random pick for recovery
297 }
298
299 c = (*defc);
300
301 // choose cmk
302 cmk = cmk_ERROR;
303 switch (c.spec) {
304 case BYTE1_spec: cmk = cmk_BYTE1; break;
305 case CHAR3_spec: cmk = cmk_CHAR3; break;
306 case UNSIGNED5_spec: cmk = cmk_UNSIGNED5; break;
307 case DELTA5_spec: cmk = cmk_DELTA5; break;
308 case BCI5_spec: cmk = cmk_BCI5; break;
309 case BRANCH5_spec: cmk = cmk_BRANCH5; break;
310 default:
311 if (c.D() == 0) {
312 switch (c.S()) {
313 case 0: cmk = cmk_BHS0; break;
314 case 1: cmk = cmk_BHS1; break;
315 default: cmk = cmk_BHS; break;
316 }
317 } else {
318 if (c.S() == 1) {
319 if (c.isFullRange) cmk = cmk_BHS1D1full;
320 if (c.isSubrange) cmk = cmk_BHS1D1sub;
321 }
322 if (cmk == cmk_ERROR) cmk = cmk_BHSD1;
323 }
324 }
325 }
326
327 static maybe_inline
getPopValue(value_stream * self,uint uval)328 int getPopValue(value_stream* self, uint uval) {
329 if (uval > 0) {
330 // note that the initial parse performed a range check
331 assert(uval <= (uint)self->cm->fVlength);
332 return self->cm->fValues[uval-1];
333 } else {
334 // take an unfavored value
335 return self->helper()->getInt();
336 }
337 }
338
339 maybe_inline
sumInUnsignedRange(int x,int y)340 int coding::sumInUnsignedRange(int x, int y) {
341 assert(isSubrange);
342 int range = (int)(umax+1);
343 assert(range > 0);
344 x += y;
345 if (x != (int)((jlong)(x-y) + (jlong)y)) {
346 // 32-bit overflow interferes with range reduction.
347 // Back off from the overflow by adding a multiple of range:
348 if (x < 0) {
349 x -= range;
350 assert(x >= 0);
351 } else {
352 x += range;
353 assert(x < 0);
354 }
355 }
356 if (x < 0) {
357 x += range;
358 if (x >= 0) return x;
359 } else if (x >= range) {
360 x -= range;
361 if (x < range) return x;
362 } else {
363 // in range
364 return x;
365 }
366 // do it the hard way
367 x %= range;
368 if (x < 0) x += range;
369 return x;
370 }
371
372 static maybe_inline
getDeltaValue(value_stream * self,uint uval,bool isSubrange)373 int getDeltaValue(value_stream* self, uint uval, bool isSubrange) {
374 assert((uint)(self->c.isSubrange) == (uint)isSubrange);
375 assert(self->c.isSubrange | self->c.isFullRange);
376 if (isSubrange)
377 return self->sum = self->c.sumInUnsignedRange(self->sum, (int)uval);
378 else
379 return self->sum += (int) uval;
380 }
381
hasValue()382 bool value_stream::hasValue() {
383 if (rp < rplimit) return true;
384 if (cm == null) return false;
385 if (cm->next == null) return false;
386 cm->next->reset(this);
387 return hasValue();
388 }
389
getInt()390 int value_stream::getInt() {
391 if (rp >= rplimit) {
392 // Advance to next coding segment.
393 if (rp > rplimit || cm == null || cm->next == null) {
394 // Must perform this check and throw an exception on bad input.
395 unpack_abort(ERB);
396 return 0;
397 }
398 cm->next->reset(this);
399 return getInt();
400 }
401
402 CODING_PRIVATE(c.spec);
403 uint uval;
404 enum {
405 B5 = 5,
406 B3 = 3,
407 H128 = 128,
408 H64 = 64,
409 H4 = 4
410 };
411 switch (cmk) {
412 case cmk_BHS:
413 assert(D == 0);
414 uval = coding::parse(rp, B, H);
415 if (S == 0)
416 return (int) uval;
417 return decode_sign(S, uval);
418
419 case cmk_BHS0:
420 assert(S == 0 && D == 0);
421 uval = coding::parse(rp, B, H);
422 return (int) uval;
423
424 case cmk_BHS1:
425 assert(S == 1 && D == 0);
426 uval = coding::parse(rp, B, H);
427 return DECODE_SIGN_S1(uval);
428
429 case cmk_BYTE1:
430 assert(c.spec == BYTE1_spec);
431 assert(B == 1 && H == 256 && S == 0 && D == 0);
432 return *rp++ & 0xFF;
433
434 case cmk_CHAR3:
435 assert(c.spec == CHAR3_spec);
436 assert(B == B3 && H == H128 && S == 0 && D == 0);
437 return coding::parse_lgH(rp, B3, H128, 7);
438
439 case cmk_UNSIGNED5:
440 assert(c.spec == UNSIGNED5_spec);
441 assert(B == B5 && H == H64 && S == 0 && D == 0);
442 return coding::parse_lgH(rp, B5, H64, 6);
443
444 case cmk_BHSD1:
445 assert(D == 1);
446 uval = coding::parse(rp, B, H);
447 if (S != 0)
448 uval = (uint) decode_sign(S, uval);
449 return getDeltaValue(this, uval, (bool)c.isSubrange);
450
451 case cmk_BHS1D1full:
452 assert(S == 1 && D == 1 && c.isFullRange);
453 uval = coding::parse(rp, B, H);
454 uval = (uint) DECODE_SIGN_S1(uval);
455 return getDeltaValue(this, uval, false);
456
457 case cmk_BHS1D1sub:
458 assert(S == 1 && D == 1 && c.isSubrange);
459 uval = coding::parse(rp, B, H);
460 uval = (uint) DECODE_SIGN_S1(uval);
461 return getDeltaValue(this, uval, true);
462
463 case cmk_DELTA5:
464 assert(c.spec == DELTA5_spec);
465 assert(B == B5 && H == H64 && S == 1 && D == 1 && c.isFullRange);
466 uval = coding::parse_lgH(rp, B5, H64, 6);
467 sum += DECODE_SIGN_S1(uval);
468 return sum;
469
470 case cmk_BCI5:
471 assert(c.spec == BCI5_spec);
472 assert(B == B5 && H == H4 && S == 0 && D == 0);
473 return coding::parse_lgH(rp, B5, H4, 2);
474
475 case cmk_BRANCH5:
476 assert(c.spec == BRANCH5_spec);
477 assert(B == B5 && H == H4 && S == 2 && D == 0);
478 uval = coding::parse_lgH(rp, B5, H4, 2);
479 return decode_sign(S, uval);
480
481 case cmk_pop:
482 uval = coding::parse(rp, B, H);
483 if (S != 0) {
484 uval = (uint) decode_sign(S, uval);
485 }
486 if (D != 0) {
487 assert(c.isSubrange | c.isFullRange);
488 if (c.isSubrange)
489 sum = c.sumInUnsignedRange(sum, (int) uval);
490 else
491 sum += (int) uval;
492 uval = (uint) sum;
493 }
494 return getPopValue(this, uval);
495
496 case cmk_pop_BHS0:
497 assert(S == 0 && D == 0);
498 uval = coding::parse(rp, B, H);
499 return getPopValue(this, uval);
500
501 case cmk_pop_BYTE1:
502 assert(c.spec == BYTE1_spec);
503 assert(B == 1 && H == 256 && S == 0 && D == 0);
504 return getPopValue(this, *rp++ & 0xFF);
505
506 default:
507 break;
508 }
509 assert(false);
510 return 0;
511 }
512
513 static maybe_inline
moreCentral(int x,int y)514 int moreCentral(int x, int y) { // used to find end of Pop.{F}
515 // Suggested implementation from the Pack200 specification:
516 uint kx = (x >> 31) ^ (x << 1);
517 uint ky = (y >> 31) ^ (y << 1);
518 return (kx < ky? x: y);
519 }
520 //static maybe_inline
521 //int moreCentral2(int x, int y, int min) {
522 // // Strict implementation of buggy 150.7 specification.
523 // // The bug is that the spec. says absolute-value ties are broken
524 // // in favor of positive numbers, but the suggested implementation
525 // // (also mentioned in the spec.) breaks ties in favor of negative numbers.
526 // if ((x + y) != 0)
527 // return min;
528 // else
529 // // return the other value, which breaks a tie in the positive direction
530 // return (x > y)? x: y;
531 //}
532
533 static const byte* no_meta[] = {null};
534 #define NO_META (*(byte**)no_meta)
535 enum { POP_FAVORED_N = -2 };
536
537 // mode bits
538 #define DISABLE_RUN 1 // used immediately inside ACodee
539 #define DISABLE_POP 2 // used recursively in all pop sub-bands
540
541 // This function knows all about meta-coding.
init(byte * & band_rp,byte * band_limit,byte * & meta_rp,int mode,coding * defc,int N,intlist * valueSink)542 void coding_method::init(byte* &band_rp, byte* band_limit,
543 byte* &meta_rp, int mode,
544 coding* defc, int N,
545 intlist* valueSink) {
546 assert(N != 0);
547
548 assert(u != null); // must be pre-initialized
549 //if (u == null) u = unpacker::current(); // expensive
550
551 int op = (meta_rp == null) ? _meta_default : (*meta_rp++ & 0xFF);
552 coding* foundc = null;
553 coding* to_free = null;
554
555 if (op == _meta_default) {
556 foundc = defc;
557 // and fall through
558
559 } else if (op >= _meta_canon_min && op <= _meta_canon_max) {
560 foundc = coding::findByIndex(op);
561 // and fall through
562
563 } else if (op == _meta_arb) {
564 int args = (*meta_rp++ & 0xFF);
565 // args = (D:[0..1] + 2*S[0..2] + 8*(B:[1..5]-1))
566 int D = ((args >> 0) & 1);
567 int S = ((args >> 1) & 3);
568 int B = ((args >> 3) & -1) + 1;
569 // & (H[1..256]-1)
570 int H = (*meta_rp++ & 0xFF) + 1;
571 foundc = coding::findBySpec(B, H, S, D);
572 to_free = foundc; // findBySpec may dynamically allocate
573 if (foundc == null) {
574 abort("illegal arb. coding");
575 return;
576 }
577 // and fall through
578
579 } else if (op >= _meta_run && op < _meta_pop) {
580 int args = (op - _meta_run);
581 // args: KX:[0..3] + 4*(KBFlag:[0..1]) + 8*(ABDef:[0..2])
582 int KX = ((args >> 0) & 3);
583 int KBFlag = ((args >> 2) & 1);
584 int ABDef = ((args >> 3) & -1);
585 assert(ABDef <= 2);
586 // & KB: one of [0..255] if KBFlag=1
587 int KB = (!KBFlag? 3: (*meta_rp++ & 0xFF));
588 int K = (KB+1) << (KX * 4);
589 int N2 = (N >= 0) ? N-K : N;
590 if (N == 0 || (N2 <= 0 && N2 != N)) {
591 abort("illegal run encoding");
592 return;
593 }
594 if ((mode & DISABLE_RUN) != 0) {
595 abort("illegal nested run encoding");
596 return;
597 }
598
599 // & Enc{ ACode } if ADef=0 (ABDef != 1)
600 // No direct nesting of 'run' in ACode, but in BCode it's OK.
601 int disRun = mode | DISABLE_RUN;
602 if (ABDef == 1) {
603 this->init(band_rp, band_limit, NO_META, disRun, defc, K, valueSink);
604 } else {
605 this->init(band_rp, band_limit, meta_rp, disRun, defc, K, valueSink);
606 }
607 CHECK;
608
609 // & Enc{ BCode } if BDef=0 (ABDef != 2)
610 coding_method* tail = U_NEW(coding_method, 1);
611 CHECK_NULL(tail);
612 tail->u = u;
613
614 // The 'run' codings may be nested indirectly via 'pop' codings.
615 // This means that this->next may already be filled in, if
616 // ACode was of type 'pop' with a 'run' token coding.
617 // No problem: Just chain the upcoming BCode onto the end.
618 for (coding_method* self = this; ; self = self->next) {
619 if (self->next == null) {
620 self->next = tail;
621 break;
622 }
623 }
624
625 if (ABDef == 2) {
626 tail->init(band_rp, band_limit, NO_META, mode, defc, N2, valueSink);
627 } else {
628 tail->init(band_rp, band_limit, meta_rp, mode, defc, N2, valueSink);
629 }
630 // Note: The preceding calls to init should be tail-recursive.
631
632 return; // done; no falling through
633
634 } else if (op >= _meta_pop && op < _meta_limit) {
635 int args = (op - _meta_pop);
636 // args: (FDef:[0..1]) + 2*UDef:[0..1] + 4*(TDefL:[0..11])
637 int FDef = ((args >> 0) & 1);
638 int UDef = ((args >> 1) & 1);
639 int TDefL = ((args >> 2) & -1);
640 assert(TDefL <= 11);
641 int TDef = (TDefL > 0);
642 int TL = (TDefL <= 6) ? (2 << TDefL) : (256 - (4 << (11-TDefL)));
643 int TH = (256-TL);
644 if (N <= 0) {
645 abort("illegal pop encoding");
646 return;
647 }
648 if ((mode & DISABLE_POP) != 0) {
649 abort("illegal nested pop encoding");
650 return;
651 }
652
653 // No indirect nesting of 'pop', but 'run' is OK.
654 int disPop = DISABLE_POP;
655
656 // & Enc{ FCode } if FDef=0
657 int FN = POP_FAVORED_N;
658 assert(valueSink == null);
659 intlist fValueSink; fValueSink.init();
660 coding_method fval;
661 BYTES_OF(fval).clear(); fval.u = u;
662 if (FDef != 0) {
663 fval.init(band_rp, band_limit, NO_META, disPop, defc, FN, &fValueSink);
664 } else {
665 fval.init(band_rp, band_limit, meta_rp, disPop, defc, FN, &fValueSink);
666 }
667 bytes fvbuf;
668 fValues = (u->saveTo(fvbuf, fValueSink.b), (int*) fvbuf.ptr);
669 fVlength = fValueSink.length(); // i.e., the parameter K
670 fValueSink.free();
671 CHECK;
672
673 // Skip the first {F} run in all subsequent passes.
674 // The next call to this->init(...) will set vs0.rp to point after the {F}.
675
676 // & Enc{ TCode } if TDef=0 (TDefL==0)
677 if (TDef != 0) {
678 coding* tcode = coding::findBySpec(1, 256); // BYTE1
679 // find the most narrowly sufficient code:
680 for (int B = 2; B <= B_MAX; B++) {
681 if (fVlength <= tcode->umax) break; // found it
682 tcode->free();
683 tcode = coding::findBySpec(B, TH);
684 CHECK_NULL(tcode);
685 }
686 if (!(fVlength <= tcode->umax)) {
687 abort("pop.L value too small");
688 return;
689 }
690 this->init(band_rp, band_limit, NO_META, disPop, tcode, N, null);
691 tcode->free();
692 } else {
693 this->init(band_rp, band_limit, meta_rp, disPop, defc, N, null);
694 }
695 CHECK;
696
697 // Count the number of zero tokens right now.
698 // Also verify that they are in bounds.
699 int UN = 0; // one {U} for each zero in {T}
700 value_stream vs = vs0;
701 for (int i = 0; i < N; i++) {
702 uint val = vs.getInt();
703 if (val == 0) UN += 1;
704 if (!(val <= (uint)fVlength)) {
705 abort("pop token out of range");
706 return;
707 }
708 }
709 vs.done();
710
711 // & Enc{ UCode } if UDef=0
712 if (UN != 0) {
713 uValues = U_NEW(coding_method, 1);
714 CHECK_NULL(uValues);
715 uValues->u = u;
716 if (UDef != 0) {
717 uValues->init(band_rp, band_limit, NO_META, disPop, defc, UN, null);
718 } else {
719 uValues->init(band_rp, band_limit, meta_rp, disPop, defc, UN, null);
720 }
721 } else {
722 if (UDef == 0) {
723 int uop = (*meta_rp++ & 0xFF);
724 if (uop > _meta_canon_max)
725 // %%% Spec. requires the more strict (uop != _meta_default).
726 abort("bad meta-coding for empty pop/U");
727 }
728 }
729
730 // Bug fix for 6259542
731 // Last of all, adjust vs0.cmk to the 'pop' flavor
732 for (coding_method* self = this; self != null; self = self->next) {
733 coding_method_kind cmk2 = cmk_pop;
734 switch (self->vs0.cmk) {
735 case cmk_BHS0: cmk2 = cmk_pop_BHS0; break;
736 case cmk_BYTE1: cmk2 = cmk_pop_BYTE1; break;
737 default: break;
738 }
739 self->vs0.cmk = cmk2;
740 if (self != this) {
741 assert(self->fValues == null); // no double init
742 self->fValues = this->fValues;
743 self->fVlength = this->fVlength;
744 assert(self->uValues == null); // must stay null
745 }
746 }
747
748 return; // done; no falling through
749
750 } else {
751 abort("bad meta-coding");
752 return;
753 }
754
755 // Common code here skips a series of values with one coding.
756 assert(foundc != null);
757
758 assert(vs0.cmk == cmk_ERROR); // no garbage, please
759 assert(vs0.rp == null); // no garbage, please
760 assert(vs0.rplimit == null); // no garbage, please
761 assert(vs0.sum == 0); // no garbage, please
762
763 vs0.init(band_rp, band_limit, foundc);
764
765 // Done with foundc. Free if necessary.
766 if (to_free != null) {
767 to_free->free();
768 to_free = null;
769 }
770 foundc = null;
771
772 coding& c = vs0.c;
773 CODING_PRIVATE(c.spec);
774 // assert sane N
775 assert((uint)N < INT_MAX_VALUE || N == POP_FAVORED_N);
776
777 // Look at the values, or at least skip over them quickly.
778 if (valueSink == null) {
779 // Skip and ignore values in the first pass.
780 c.parseMultiple(band_rp, N, band_limit, B, H);
781 } else if (N >= 0) {
782 // Pop coding, {F} sequence, initial run of values...
783 assert((mode & DISABLE_POP) != 0);
784 value_stream vs = vs0;
785 for (int n = 0; n < N; n++) {
786 int val = vs.getInt();
787 valueSink->add(val);
788 }
789 band_rp = vs.rp;
790 } else {
791 // Pop coding, {F} sequence, final run of values...
792 assert((mode & DISABLE_POP) != 0);
793 assert(N == POP_FAVORED_N);
794 int min = INT_MIN_VALUE; // farthest from the center
795 // min2 is based on the buggy specification of centrality in version 150.7
796 // no known implementations transmit this value, but just in case...
797 //int min2 = INT_MIN_VALUE;
798 int last = 0;
799 // if there were initial runs, find the potential sentinels in them:
800 for (int i = 0; i < valueSink->length(); i++) {
801 last = valueSink->get(i);
802 min = moreCentral(min, last);
803 //min2 = moreCentral2(min2, last, min);
804 }
805 value_stream vs = vs0;
806 for (;;) {
807 int val = vs.getInt();
808 if (valueSink->length() > 0 &&
809 (val == last || val == min)) //|| val == min2
810 break;
811 valueSink->add(val);
812 CHECK;
813 last = val;
814 min = moreCentral(min, last);
815 //min2 = moreCentral2(min2, last, min);
816 }
817 band_rp = vs.rp;
818 }
819 CHECK;
820
821 // Get an accurate upper limit now.
822 vs0.rplimit = band_rp;
823 vs0.cm = this;
824
825 return; // success
826 }
827
828 coding basic_codings[] = {
829 // This one is not a usable irregular coding, but is used by cp_Utf8_chars.
830 CODING_INIT(3,128,0,0),
831
832 // Fixed-length codings:
833 CODING_INIT(1,256,0,0),
834 CODING_INIT(1,256,1,0),
835 CODING_INIT(1,256,0,1),
836 CODING_INIT(1,256,1,1),
837 CODING_INIT(2,256,0,0),
838 CODING_INIT(2,256,1,0),
839 CODING_INIT(2,256,0,1),
840 CODING_INIT(2,256,1,1),
841 CODING_INIT(3,256,0,0),
842 CODING_INIT(3,256,1,0),
843 CODING_INIT(3,256,0,1),
844 CODING_INIT(3,256,1,1),
845 CODING_INIT(4,256,0,0),
846 CODING_INIT(4,256,1,0),
847 CODING_INIT(4,256,0,1),
848 CODING_INIT(4,256,1,1),
849
850 // Full-range variable-length codings:
851 CODING_INIT(5, 4,0,0),
852 CODING_INIT(5, 4,1,0),
853 CODING_INIT(5, 4,2,0),
854 CODING_INIT(5, 16,0,0),
855 CODING_INIT(5, 16,1,0),
856 CODING_INIT(5, 16,2,0),
857 CODING_INIT(5, 32,0,0),
858 CODING_INIT(5, 32,1,0),
859 CODING_INIT(5, 32,2,0),
860 CODING_INIT(5, 64,0,0),
861 CODING_INIT(5, 64,1,0),
862 CODING_INIT(5, 64,2,0),
863 CODING_INIT(5,128,0,0),
864 CODING_INIT(5,128,1,0),
865 CODING_INIT(5,128,2,0),
866
867 CODING_INIT(5, 4,0,1),
868 CODING_INIT(5, 4,1,1),
869 CODING_INIT(5, 4,2,1),
870 CODING_INIT(5, 16,0,1),
871 CODING_INIT(5, 16,1,1),
872 CODING_INIT(5, 16,2,1),
873 CODING_INIT(5, 32,0,1),
874 CODING_INIT(5, 32,1,1),
875 CODING_INIT(5, 32,2,1),
876 CODING_INIT(5, 64,0,1),
877 CODING_INIT(5, 64,1,1),
878 CODING_INIT(5, 64,2,1),
879 CODING_INIT(5,128,0,1),
880 CODING_INIT(5,128,1,1),
881 CODING_INIT(5,128,2,1),
882
883 // Variable length subrange codings:
884 CODING_INIT(2,192,0,0),
885 CODING_INIT(2,224,0,0),
886 CODING_INIT(2,240,0,0),
887 CODING_INIT(2,248,0,0),
888 CODING_INIT(2,252,0,0),
889
890 CODING_INIT(2, 8,0,1),
891 CODING_INIT(2, 8,1,1),
892 CODING_INIT(2, 16,0,1),
893 CODING_INIT(2, 16,1,1),
894 CODING_INIT(2, 32,0,1),
895 CODING_INIT(2, 32,1,1),
896 CODING_INIT(2, 64,0,1),
897 CODING_INIT(2, 64,1,1),
898 CODING_INIT(2,128,0,1),
899 CODING_INIT(2,128,1,1),
900 CODING_INIT(2,192,0,1),
901 CODING_INIT(2,192,1,1),
902 CODING_INIT(2,224,0,1),
903 CODING_INIT(2,224,1,1),
904 CODING_INIT(2,240,0,1),
905 CODING_INIT(2,240,1,1),
906 CODING_INIT(2,248,0,1),
907 CODING_INIT(2,248,1,1),
908
909 CODING_INIT(3,192,0,0),
910 CODING_INIT(3,224,0,0),
911 CODING_INIT(3,240,0,0),
912 CODING_INIT(3,248,0,0),
913 CODING_INIT(3,252,0,0),
914
915 CODING_INIT(3, 8,0,1),
916 CODING_INIT(3, 8,1,1),
917 CODING_INIT(3, 16,0,1),
918 CODING_INIT(3, 16,1,1),
919 CODING_INIT(3, 32,0,1),
920 CODING_INIT(3, 32,1,1),
921 CODING_INIT(3, 64,0,1),
922 CODING_INIT(3, 64,1,1),
923 CODING_INIT(3,128,0,1),
924 CODING_INIT(3,128,1,1),
925 CODING_INIT(3,192,0,1),
926 CODING_INIT(3,192,1,1),
927 CODING_INIT(3,224,0,1),
928 CODING_INIT(3,224,1,1),
929 CODING_INIT(3,240,0,1),
930 CODING_INIT(3,240,1,1),
931 CODING_INIT(3,248,0,1),
932 CODING_INIT(3,248,1,1),
933
934 CODING_INIT(4,192,0,0),
935 CODING_INIT(4,224,0,0),
936 CODING_INIT(4,240,0,0),
937 CODING_INIT(4,248,0,0),
938 CODING_INIT(4,252,0,0),
939
940 CODING_INIT(4, 8,0,1),
941 CODING_INIT(4, 8,1,1),
942 CODING_INIT(4, 16,0,1),
943 CODING_INIT(4, 16,1,1),
944 CODING_INIT(4, 32,0,1),
945 CODING_INIT(4, 32,1,1),
946 CODING_INIT(4, 64,0,1),
947 CODING_INIT(4, 64,1,1),
948 CODING_INIT(4,128,0,1),
949 CODING_INIT(4,128,1,1),
950 CODING_INIT(4,192,0,1),
951 CODING_INIT(4,192,1,1),
952 CODING_INIT(4,224,0,1),
953 CODING_INIT(4,224,1,1),
954 CODING_INIT(4,240,0,1),
955 CODING_INIT(4,240,1,1),
956 CODING_INIT(4,248,0,1),
957 CODING_INIT(4,248,1,1),
958 CODING_INIT(0,0,0,0)
959 };
960 #define BASIC_INDEX_LIMIT \
961 (int)(sizeof(basic_codings)/sizeof(basic_codings[0])-1)
962
findByIndex(int idx)963 coding* coding::findByIndex(int idx) {
964 #ifndef PRODUCT
965 /* Tricky assert here, constants and gcc complains about it without local. */
966 int index_limit = BASIC_INDEX_LIMIT;
967 assert(_meta_canon_min == 1 && _meta_canon_max+1 == index_limit);
968 #endif
969 if (idx >= _meta_canon_min && idx <= _meta_canon_max)
970 return basic_codings[idx].init();
971 else
972 return null;
973 }
974
975 #ifndef PRODUCT
string()976 const char* coding::string() {
977 CODING_PRIVATE(spec);
978 bytes buf;
979 buf.malloc(100);
980 char maxS[20], minS[20];
981 sprintf(maxS, "%d", max);
982 sprintf(minS, "%d", min);
983 if (max == INT_MAX_VALUE) strcpy(maxS, "max");
984 if (min == INT_MIN_VALUE) strcpy(minS, "min");
985 sprintf((char*)buf.ptr, "(%d,%d,%d,%d) L=%d r=[%s,%s]",
986 B,H,S,D,L,minS,maxS);
987 return (const char*) buf.ptr;
988 }
989 #endif
990