1 /*
2  * Copyright (c) 2002, 2009, Oracle and/or its affiliates. All rights reserved.
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * This code is free software; you can redistribute it and/or modify it
6  * under the terms of the GNU General Public License version 2 only, as
7  * published by the Free Software Foundation.  Oracle designates this
8  * particular file as subject to the "Classpath" exception as provided
9  * by Oracle in the LICENSE file that accompanied this code.
10  *
11  * This code is distributed in the hope that it will be useful, but WITHOUT
12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14  * version 2 for more details (a copy is included in the LICENSE file that
15  * accompanied this code).
16  *
17  * You should have received a copy of the GNU General Public License version
18  * 2 along with this work; if not, write to the Free Software Foundation,
19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20  *
21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22  * or visit www.oracle.com if you need additional information or have any
23  * questions.
24  */
25 
26 // -*- C++ -*-
27 // Small program for unpacking specially compressed Java packages.
28 // John R. Rose
29 
30 #include <stdio.h>
31 #include <string.h>
32 #include <stdlib.h>
33 #include <stdarg.h>
34 
35 #include "jni_util.h"
36 
37 #include "defines.h"
38 #include "bytes.h"
39 #include "utils.h"
40 #include "coding.h"
41 
42 #include "constants.h"
43 #include "unpack.h"
44 
45 extern coding basic_codings[];
46 
47 #define CODING_PRIVATE(spec) \
48   int spec_ = spec; \
49   int B = CODING_B(spec_); \
50   int H = CODING_H(spec_); \
51   int L = 256 - H; \
52   int S = CODING_S(spec_); \
53   int D = CODING_D(spec_)
54 
55 #define IS_NEG_CODE(S, codeVal) \
56   ( (((int)(codeVal)+1) & ((1<<S)-1)) == 0 )
57 
58 #define DECODE_SIGN_S1(ux) \
59   ( ((uint)(ux) >> 1) ^ -((int)(ux) & 1) )
60 
61 static maybe_inline
decode_sign(int S,uint ux)62 int decode_sign(int S, uint ux) {  // == Coding.decodeSign32
63   assert(S > 0);
64   uint sigbits = (ux >> S);
65   if (IS_NEG_CODE(S, ux))
66     return (int)(    ~sigbits);
67   else
68     return (int)(ux - sigbits);
69   // Note that (int)(ux-sigbits) can be negative, if ux is large enough.
70 }
71 
init()72 coding* coding::init() {
73   if (umax > 0)  return this;  // already done
74   assert(spec != 0);  // sanity
75 
76   // fill in derived fields
77   CODING_PRIVATE(spec);
78 
79   // Return null if 'arb(BHSD)' parameter constraints are not met:
80   if (B < 1 || B > B_MAX)  return null;
81   if (H < 1 || H > 256)    return null;
82   if (S < 0 || S > 2)      return null;
83   if (D < 0 || D > 1)      return null;
84   if (B == 1 && H != 256)  return null;  // 1-byte coding must be fixed-size
85   if (B >= 5 && H == 256)  return null;  // no 5-byte fixed-size coding
86 
87   // first compute the range of the coding, in 64 bits
88   jlong range = 0;
89   {
90     jlong H_i = 1;
91     for (int i = 0; i < B; i++) {
92       range += H_i;
93       H_i *= H;
94     }
95     range *= L;
96     range += H_i;
97   }
98   assert(range > 0);  // no useless codings, please
99 
100   int this_umax;
101 
102   // now, compute min and max
103   if (range >= ((jlong)1 << 32)) {
104     this_umax  = INT_MAX_VALUE;
105     this->umin = INT_MIN_VALUE;
106     this->max  = INT_MAX_VALUE;
107     this->min  = INT_MIN_VALUE;
108   } else {
109     this_umax = (range > INT_MAX_VALUE) ? INT_MAX_VALUE : (int)range-1;
110     this->max = this_umax;
111     this->min = this->umin = 0;
112     if (S != 0 && range != 0) {
113       int Smask = (1<<S)-1;
114       jlong maxPosCode = range-1;
115       jlong maxNegCode = range-1;
116       while (IS_NEG_CODE(S,  maxPosCode))  --maxPosCode;
117       while (!IS_NEG_CODE(S, maxNegCode))  --maxNegCode;
118       int maxPos = decode_sign(S, (uint)maxPosCode);
119       if (maxPos < 0)
120         this->max = INT_MAX_VALUE;  // 32-bit wraparound
121       else
122         this->max = maxPos;
123       if (maxNegCode < 0)
124         this->min = 0;  // No negative codings at all.
125       else
126         this->min = decode_sign(S, (uint)maxNegCode);
127     }
128   }
129 
130   assert(!(isFullRange | isSigned | isSubrange)); // init
131   if (min < 0)
132     this->isSigned = true;
133   if (max < INT_MAX_VALUE && range <= INT_MAX_VALUE)
134     this->isSubrange = true;
135   if (max == INT_MAX_VALUE && min == INT_MIN_VALUE)
136     this->isFullRange = true;
137 
138   // do this last, to reduce MT exposure (should have a membar too)
139   this->umax = this_umax;
140 
141   return this;
142 }
143 
findBySpec(int spec)144 coding* coding::findBySpec(int spec) {
145   for (coding* scan = &basic_codings[0]; ; scan++) {
146     if (scan->spec == spec)
147       return scan->init();
148     if (scan->spec == 0)
149       break;
150   }
151   coding* ptr = NEW(coding, 1);
152   CHECK_NULL_RETURN(ptr, 0);
153   coding* c = ptr->initFrom(spec);
154   if (c == null) {
155     mtrace('f', ptr, 0);
156     ::free(ptr);
157   } else
158     // else caller should free it...
159     c->isMalloc = true;
160   return c;
161 }
162 
findBySpec(int B,int H,int S,int D)163 coding* coding::findBySpec(int B, int H, int S, int D) {
164   if (B < 1 || B > B_MAX)  return null;
165   if (H < 1 || H > 256)    return null;
166   if (S < 0 || S > 2)      return null;
167   if (D < 0 || D > 1)      return null;
168   return findBySpec(CODING_SPEC(B, H, S, D));
169 }
170 
free()171 void coding::free() {
172   if (isMalloc) {
173     mtrace('f', this, 0);
174     ::free(this);
175   }
176 }
177 
reset(value_stream * state)178 void coding_method::reset(value_stream* state) {
179   assert(state->rp == state->rplimit);  // not in mid-stream, please
180   //assert(this == vs0.cm);
181   state[0] = vs0;
182   if (uValues != null) {
183     uValues->reset(state->helper());
184   }
185 }
186 
187 maybe_inline
parse(byte * & rp,int B,int H)188 uint coding::parse(byte* &rp, int B, int H) {
189   int L = 256-H;
190   byte* ptr = rp;
191   // hand peel the i==0 part of the loop:
192   uint b_i = *ptr++ & 0xFF;
193   if (B == 1 || b_i < (uint)L)
194     { rp = ptr; return b_i; }
195   uint sum = b_i;
196   uint H_i = H;
197   assert(B <= B_MAX);
198   for (int i = 2; i <= B_MAX; i++) { // easy for compilers to unroll if desired
199     b_i = *ptr++ & 0xFF;
200     sum += b_i * H_i;
201     if (i == B || b_i < (uint)L)
202       { rp = ptr; return sum; }
203     H_i *= H;
204   }
205   assert(false);
206   return 0;
207 }
208 
209 maybe_inline
parse_lgH(byte * & rp,int B,int H,int lgH)210 uint coding::parse_lgH(byte* &rp, int B, int H, int lgH) {
211   assert(H == (1<<lgH));
212   int L = 256-(1<<lgH);
213   byte* ptr = rp;
214   // hand peel the i==0 part of the loop:
215   uint b_i = *ptr++ & 0xFF;
216   if (B == 1 || b_i < (uint)L)
217     { rp = ptr; return b_i; }
218   uint sum = b_i;
219   uint lg_H_i = lgH;
220   assert(B <= B_MAX);
221   for (int i = 2; i <= B_MAX; i++) { // easy for compilers to unroll if desired
222     b_i = *ptr++ & 0xFF;
223     sum += b_i << lg_H_i;
224     if (i == B || b_i < (uint)L)
225       { rp = ptr; return sum; }
226     lg_H_i += lgH;
227   }
228   assert(false);
229   return 0;
230 }
231 
232 static const char ERB[] = "EOF reading band";
233 
234 maybe_inline
parseMultiple(byte * & rp,int N,byte * limit,int B,int H)235 void coding::parseMultiple(byte* &rp, int N, byte* limit, int B, int H) {
236   if (N < 0) {
237     abort("bad value count");
238     return;
239   }
240   byte* ptr = rp;
241   if (B == 1 || H == 256) {
242     size_t len = (size_t)N*B;
243     if (len / B != (size_t)N || ptr+len > limit) {
244       abort(ERB);
245       return;
246     }
247     rp = ptr+len;
248     return;
249   }
250   // Note:  We assume rp has enough zero-padding.
251   int L = 256-H;
252   int n = B;
253   while (N > 0) {
254     ptr += 1;
255     if (--n == 0) {
256       // end of encoding at B bytes, regardless of byte value
257     } else {
258       int b = (ptr[-1] & 0xFF);
259       if (b >= L) {
260         // keep going, unless we find a byte < L
261         continue;
262       }
263     }
264     // found the last byte
265     N -= 1;
266     n = B;   // reset length counter
267     // do an error check here
268     if (ptr > limit) {
269       abort(ERB);
270       return;
271     }
272   }
273   rp = ptr;
274   return;
275 }
276 
hasHelper()277 bool value_stream::hasHelper() {
278   // If my coding method is a pop-style method,
279   // then I need a second value stream to transmit
280   // unfavored values.
281   // This can be determined by examining fValues.
282   return cm->fValues != null;
283 }
284 
init(byte * rp_,byte * rplimit_,coding * defc)285 void value_stream::init(byte* rp_, byte* rplimit_, coding* defc) {
286   rp = rp_;
287   rplimit = rplimit_;
288   sum = 0;
289   cm = null;  // no need in the simple case
290   setCoding(defc);
291 }
292 
setCoding(coding * defc)293 void value_stream::setCoding(coding* defc) {
294   if (defc == null) {
295     unpack_abort("bad coding");
296     defc = coding::findByIndex(_meta_canon_min);  // random pick for recovery
297   }
298 
299   c = (*defc);
300 
301   // choose cmk
302   cmk = cmk_ERROR;
303   switch (c.spec) {
304   case BYTE1_spec:      cmk = cmk_BYTE1;        break;
305   case CHAR3_spec:      cmk = cmk_CHAR3;        break;
306   case UNSIGNED5_spec:  cmk = cmk_UNSIGNED5;    break;
307   case DELTA5_spec:     cmk = cmk_DELTA5;       break;
308   case BCI5_spec:       cmk = cmk_BCI5;         break;
309   case BRANCH5_spec:    cmk = cmk_BRANCH5;      break;
310   default:
311     if (c.D() == 0) {
312       switch (c.S()) {
313       case 0:  cmk = cmk_BHS0;  break;
314       case 1:  cmk = cmk_BHS1;  break;
315       default: cmk = cmk_BHS;   break;
316       }
317     } else {
318       if (c.S() == 1) {
319         if (c.isFullRange)   cmk = cmk_BHS1D1full;
320         if (c.isSubrange)    cmk = cmk_BHS1D1sub;
321       }
322       if (cmk == cmk_ERROR)  cmk = cmk_BHSD1;
323     }
324   }
325 }
326 
327 static maybe_inline
getPopValue(value_stream * self,uint uval)328 int getPopValue(value_stream* self, uint uval) {
329   if (uval > 0) {
330     // note that the initial parse performed a range check
331     assert(uval <= (uint)self->cm->fVlength);
332     return self->cm->fValues[uval-1];
333   } else {
334     // take an unfavored value
335     return self->helper()->getInt();
336   }
337 }
338 
339 maybe_inline
sumInUnsignedRange(int x,int y)340 int coding::sumInUnsignedRange(int x, int y) {
341   assert(isSubrange);
342   int range = (int)(umax+1);
343   assert(range > 0);
344   x += y;
345   if (x != (int)((jlong)(x-y) + (jlong)y)) {
346     // 32-bit overflow interferes with range reduction.
347     // Back off from the overflow by adding a multiple of range:
348     if (x < 0) {
349       x -= range;
350       assert(x >= 0);
351     } else {
352       x += range;
353       assert(x < 0);
354     }
355   }
356   if (x < 0) {
357     x += range;
358     if (x >= 0)  return x;
359   } else if (x >= range) {
360     x -= range;
361     if (x < range)  return x;
362   } else {
363     // in range
364     return x;
365   }
366   // do it the hard way
367   x %= range;
368   if (x < 0)  x += range;
369   return x;
370 }
371 
372 static maybe_inline
getDeltaValue(value_stream * self,uint uval,bool isSubrange)373 int getDeltaValue(value_stream* self, uint uval, bool isSubrange) {
374   assert((uint)(self->c.isSubrange) == (uint)isSubrange);
375   assert(self->c.isSubrange | self->c.isFullRange);
376   if (isSubrange)
377     return self->sum = self->c.sumInUnsignedRange(self->sum, (int)uval);
378   else
379     return self->sum += (int) uval;
380 }
381 
hasValue()382 bool value_stream::hasValue() {
383   if (rp < rplimit)      return true;
384   if (cm == null)        return false;
385   if (cm->next == null)  return false;
386   cm->next->reset(this);
387   return hasValue();
388 }
389 
getInt()390 int value_stream::getInt() {
391   if (rp >= rplimit) {
392     // Advance to next coding segment.
393     if (rp > rplimit || cm == null || cm->next == null) {
394       // Must perform this check and throw an exception on bad input.
395       unpack_abort(ERB);
396       return 0;
397     }
398     cm->next->reset(this);
399     return getInt();
400   }
401 
402   CODING_PRIVATE(c.spec);
403   uint uval;
404   enum {
405     B5 = 5,
406     B3 = 3,
407     H128 = 128,
408     H64 = 64,
409     H4 = 4
410   };
411   switch (cmk) {
412   case cmk_BHS:
413     assert(D == 0);
414     uval = coding::parse(rp, B, H);
415     if (S == 0)
416       return (int) uval;
417     return decode_sign(S, uval);
418 
419   case cmk_BHS0:
420     assert(S == 0 && D == 0);
421     uval = coding::parse(rp, B, H);
422     return (int) uval;
423 
424   case cmk_BHS1:
425     assert(S == 1 && D == 0);
426     uval = coding::parse(rp, B, H);
427     return DECODE_SIGN_S1(uval);
428 
429   case cmk_BYTE1:
430     assert(c.spec == BYTE1_spec);
431     assert(B == 1 && H == 256 && S == 0 && D == 0);
432     return *rp++ & 0xFF;
433 
434   case cmk_CHAR3:
435     assert(c.spec == CHAR3_spec);
436     assert(B == B3 && H == H128 && S == 0 && D == 0);
437     return coding::parse_lgH(rp, B3, H128, 7);
438 
439   case cmk_UNSIGNED5:
440     assert(c.spec == UNSIGNED5_spec);
441     assert(B == B5 && H == H64 && S == 0 && D == 0);
442     return coding::parse_lgH(rp, B5, H64, 6);
443 
444   case cmk_BHSD1:
445     assert(D == 1);
446     uval = coding::parse(rp, B, H);
447     if (S != 0)
448       uval = (uint) decode_sign(S, uval);
449     return getDeltaValue(this, uval, (bool)c.isSubrange);
450 
451   case cmk_BHS1D1full:
452     assert(S == 1 && D == 1 && c.isFullRange);
453     uval = coding::parse(rp, B, H);
454     uval = (uint) DECODE_SIGN_S1(uval);
455     return getDeltaValue(this, uval, false);
456 
457   case cmk_BHS1D1sub:
458     assert(S == 1 && D == 1 && c.isSubrange);
459     uval = coding::parse(rp, B, H);
460     uval = (uint) DECODE_SIGN_S1(uval);
461     return getDeltaValue(this, uval, true);
462 
463   case cmk_DELTA5:
464     assert(c.spec == DELTA5_spec);
465     assert(B == B5 && H == H64 && S == 1 && D == 1 && c.isFullRange);
466     uval = coding::parse_lgH(rp, B5, H64, 6);
467     sum += DECODE_SIGN_S1(uval);
468     return sum;
469 
470   case cmk_BCI5:
471     assert(c.spec == BCI5_spec);
472     assert(B == B5 && H == H4 && S == 0 && D == 0);
473     return coding::parse_lgH(rp, B5, H4, 2);
474 
475   case cmk_BRANCH5:
476     assert(c.spec == BRANCH5_spec);
477     assert(B == B5 && H == H4 && S == 2 && D == 0);
478     uval = coding::parse_lgH(rp, B5, H4, 2);
479     return decode_sign(S, uval);
480 
481   case cmk_pop:
482     uval = coding::parse(rp, B, H);
483     if (S != 0) {
484       uval = (uint) decode_sign(S, uval);
485     }
486     if (D != 0) {
487       assert(c.isSubrange | c.isFullRange);
488       if (c.isSubrange)
489         sum = c.sumInUnsignedRange(sum, (int) uval);
490       else
491         sum += (int) uval;
492       uval = (uint) sum;
493     }
494     return getPopValue(this, uval);
495 
496   case cmk_pop_BHS0:
497     assert(S == 0 && D == 0);
498     uval = coding::parse(rp, B, H);
499     return getPopValue(this, uval);
500 
501   case cmk_pop_BYTE1:
502     assert(c.spec == BYTE1_spec);
503     assert(B == 1 && H == 256 && S == 0 && D == 0);
504     return getPopValue(this, *rp++ & 0xFF);
505 
506   default:
507     break;
508   }
509   assert(false);
510   return 0;
511 }
512 
513 static maybe_inline
moreCentral(int x,int y)514 int moreCentral(int x, int y) {  // used to find end of Pop.{F}
515   // Suggested implementation from the Pack200 specification:
516   uint kx = (x >> 31) ^ (x << 1);
517   uint ky = (y >> 31) ^ (y << 1);
518   return (kx < ky? x: y);
519 }
520 //static maybe_inline
521 //int moreCentral2(int x, int y, int min) {
522 //  // Strict implementation of buggy 150.7 specification.
523 //  // The bug is that the spec. says absolute-value ties are broken
524 //  // in favor of positive numbers, but the suggested implementation
525 //  // (also mentioned in the spec.) breaks ties in favor of negative numbers.
526 //  if ((x + y) != 0)
527 //    return min;
528 //  else
529 //    // return the other value, which breaks a tie in the positive direction
530 //    return (x > y)? x: y;
531 //}
532 
533 static const byte* no_meta[] = {null};
534 #define NO_META (*(byte**)no_meta)
535 enum { POP_FAVORED_N = -2 };
536 
537 // mode bits
538 #define DISABLE_RUN  1  // used immediately inside ACodee
539 #define DISABLE_POP  2  // used recursively in all pop sub-bands
540 
541 // This function knows all about meta-coding.
init(byte * & band_rp,byte * band_limit,byte * & meta_rp,int mode,coding * defc,int N,intlist * valueSink)542 void coding_method::init(byte* &band_rp, byte* band_limit,
543                          byte* &meta_rp, int mode,
544                          coding* defc, int N,
545                          intlist* valueSink) {
546   assert(N != 0);
547 
548   assert(u != null);  // must be pre-initialized
549   //if (u == null)  u = unpacker::current();  // expensive
550 
551   int op = (meta_rp == null) ? _meta_default :  (*meta_rp++ & 0xFF);
552   coding* foundc = null;
553   coding* to_free = null;
554 
555   if (op == _meta_default) {
556     foundc = defc;
557     // and fall through
558 
559   } else if (op >= _meta_canon_min && op <= _meta_canon_max) {
560     foundc = coding::findByIndex(op);
561     // and fall through
562 
563   } else if (op == _meta_arb) {
564     int args = (*meta_rp++ & 0xFF);
565     // args = (D:[0..1] + 2*S[0..2] + 8*(B:[1..5]-1))
566     int D = ((args >> 0) & 1);
567     int S = ((args >> 1) & 3);
568     int B = ((args >> 3) & -1) + 1;
569     // & (H[1..256]-1)
570     int H = (*meta_rp++ & 0xFF) + 1;
571     foundc = coding::findBySpec(B, H, S, D);
572     to_free = foundc;  // findBySpec may dynamically allocate
573     if (foundc == null) {
574       abort("illegal arb. coding");
575       return;
576     }
577     // and fall through
578 
579   } else if (op >= _meta_run && op < _meta_pop) {
580     int args = (op - _meta_run);
581     // args: KX:[0..3] + 4*(KBFlag:[0..1]) + 8*(ABDef:[0..2])
582     int KX     = ((args >> 0) & 3);
583     int KBFlag = ((args >> 2) & 1);
584     int ABDef  = ((args >> 3) & -1);
585     assert(ABDef <= 2);
586     // & KB: one of [0..255] if KBFlag=1
587     int KB     = (!KBFlag? 3: (*meta_rp++ & 0xFF));
588     int K      = (KB+1) << (KX * 4);
589     int N2 = (N >= 0) ? N-K : N;
590     if (N == 0 || (N2 <= 0 && N2 != N)) {
591       abort("illegal run encoding");
592       return;
593     }
594     if ((mode & DISABLE_RUN) != 0) {
595       abort("illegal nested run encoding");
596       return;
597     }
598 
599     // & Enc{ ACode } if ADef=0  (ABDef != 1)
600     // No direct nesting of 'run' in ACode, but in BCode it's OK.
601     int disRun = mode | DISABLE_RUN;
602     if (ABDef == 1) {
603       this->init(band_rp, band_limit, NO_META, disRun, defc, K, valueSink);
604     } else {
605       this->init(band_rp, band_limit, meta_rp, disRun, defc, K, valueSink);
606     }
607     CHECK;
608 
609     // & Enc{ BCode } if BDef=0  (ABDef != 2)
610     coding_method* tail = U_NEW(coding_method, 1);
611     CHECK_NULL(tail);
612     tail->u = u;
613 
614     // The 'run' codings may be nested indirectly via 'pop' codings.
615     // This means that this->next may already be filled in, if
616     // ACode was of type 'pop' with a 'run' token coding.
617     // No problem:  Just chain the upcoming BCode onto the end.
618     for (coding_method* self = this; ; self = self->next) {
619       if (self->next == null) {
620         self->next = tail;
621         break;
622       }
623     }
624 
625     if (ABDef == 2) {
626       tail->init(band_rp, band_limit, NO_META, mode, defc, N2, valueSink);
627     } else {
628       tail->init(band_rp, band_limit, meta_rp, mode, defc, N2, valueSink);
629     }
630     // Note:  The preceding calls to init should be tail-recursive.
631 
632     return;  // done; no falling through
633 
634   } else if (op >= _meta_pop && op < _meta_limit) {
635     int args = (op - _meta_pop);
636     // args: (FDef:[0..1]) + 2*UDef:[0..1] + 4*(TDefL:[0..11])
637     int FDef  = ((args >> 0) & 1);
638     int UDef  = ((args >> 1) & 1);
639     int TDefL = ((args >> 2) & -1);
640     assert(TDefL <= 11);
641     int TDef  = (TDefL > 0);
642     int TL    = (TDefL <= 6) ? (2 << TDefL) : (256 - (4 << (11-TDefL)));
643     int TH    = (256-TL);
644     if (N <= 0) {
645       abort("illegal pop encoding");
646       return;
647     }
648     if ((mode & DISABLE_POP) != 0) {
649       abort("illegal nested pop encoding");
650       return;
651     }
652 
653     // No indirect nesting of 'pop', but 'run' is OK.
654     int disPop = DISABLE_POP;
655 
656     // & Enc{ FCode } if FDef=0
657     int FN = POP_FAVORED_N;
658     assert(valueSink == null);
659     intlist fValueSink; fValueSink.init();
660     coding_method fval;
661     BYTES_OF(fval).clear(); fval.u = u;
662     if (FDef != 0) {
663       fval.init(band_rp, band_limit, NO_META, disPop, defc, FN, &fValueSink);
664     } else {
665       fval.init(band_rp, band_limit, meta_rp, disPop, defc, FN, &fValueSink);
666     }
667     bytes fvbuf;
668     fValues  = (u->saveTo(fvbuf, fValueSink.b), (int*) fvbuf.ptr);
669     fVlength = fValueSink.length();  // i.e., the parameter K
670     fValueSink.free();
671     CHECK;
672 
673     // Skip the first {F} run in all subsequent passes.
674     // The next call to this->init(...) will set vs0.rp to point after the {F}.
675 
676     // & Enc{ TCode } if TDef=0  (TDefL==0)
677     if (TDef != 0) {
678       coding* tcode = coding::findBySpec(1, 256);  // BYTE1
679       // find the most narrowly sufficient code:
680       for (int B = 2; B <= B_MAX; B++) {
681         if (fVlength <= tcode->umax)  break;  // found it
682         tcode->free();
683         tcode = coding::findBySpec(B, TH);
684         CHECK_NULL(tcode);
685       }
686       if (!(fVlength <= tcode->umax)) {
687         abort("pop.L value too small");
688         return;
689       }
690       this->init(band_rp, band_limit, NO_META, disPop, tcode, N, null);
691       tcode->free();
692     } else {
693       this->init(band_rp, band_limit, meta_rp, disPop,  defc, N, null);
694     }
695     CHECK;
696 
697     // Count the number of zero tokens right now.
698     // Also verify that they are in bounds.
699     int UN = 0;   // one {U} for each zero in {T}
700     value_stream vs = vs0;
701     for (int i = 0; i < N; i++) {
702       uint val = vs.getInt();
703       if (val == 0)  UN += 1;
704       if (!(val <= (uint)fVlength)) {
705         abort("pop token out of range");
706         return;
707       }
708     }
709     vs.done();
710 
711     // & Enc{ UCode } if UDef=0
712     if (UN != 0) {
713       uValues = U_NEW(coding_method, 1);
714       CHECK_NULL(uValues);
715       uValues->u = u;
716       if (UDef != 0) {
717         uValues->init(band_rp, band_limit, NO_META, disPop, defc, UN, null);
718       } else {
719         uValues->init(band_rp, band_limit, meta_rp, disPop, defc, UN, null);
720       }
721     } else {
722       if (UDef == 0) {
723         int uop = (*meta_rp++ & 0xFF);
724         if (uop > _meta_canon_max)
725           // %%% Spec. requires the more strict (uop != _meta_default).
726           abort("bad meta-coding for empty pop/U");
727       }
728     }
729 
730     // Bug fix for 6259542
731     // Last of all, adjust vs0.cmk to the 'pop' flavor
732     for (coding_method* self = this; self != null; self = self->next) {
733         coding_method_kind cmk2 = cmk_pop;
734         switch (self->vs0.cmk) {
735         case cmk_BHS0:   cmk2 = cmk_pop_BHS0;   break;
736         case cmk_BYTE1:  cmk2 = cmk_pop_BYTE1;  break;
737         default: break;
738         }
739         self->vs0.cmk = cmk2;
740         if (self != this) {
741           assert(self->fValues == null); // no double init
742           self->fValues  = this->fValues;
743           self->fVlength = this->fVlength;
744           assert(self->uValues == null); // must stay null
745         }
746     }
747 
748     return;  // done; no falling through
749 
750   } else {
751     abort("bad meta-coding");
752     return;
753   }
754 
755   // Common code here skips a series of values with one coding.
756   assert(foundc != null);
757 
758   assert(vs0.cmk == cmk_ERROR);  // no garbage, please
759   assert(vs0.rp == null);  // no garbage, please
760   assert(vs0.rplimit == null);  // no garbage, please
761   assert(vs0.sum == 0);  // no garbage, please
762 
763   vs0.init(band_rp, band_limit, foundc);
764 
765   // Done with foundc.  Free if necessary.
766   if (to_free != null) {
767     to_free->free();
768     to_free = null;
769   }
770   foundc = null;
771 
772   coding& c = vs0.c;
773   CODING_PRIVATE(c.spec);
774   // assert sane N
775   assert((uint)N < INT_MAX_VALUE || N == POP_FAVORED_N);
776 
777   // Look at the values, or at least skip over them quickly.
778   if (valueSink == null) {
779     // Skip and ignore values in the first pass.
780     c.parseMultiple(band_rp, N, band_limit, B, H);
781   } else if (N >= 0) {
782     // Pop coding, {F} sequence, initial run of values...
783     assert((mode & DISABLE_POP) != 0);
784     value_stream vs = vs0;
785     for (int n = 0; n < N; n++) {
786       int val = vs.getInt();
787       valueSink->add(val);
788     }
789     band_rp = vs.rp;
790   } else {
791     // Pop coding, {F} sequence, final run of values...
792     assert((mode & DISABLE_POP) != 0);
793     assert(N == POP_FAVORED_N);
794     int min = INT_MIN_VALUE;  // farthest from the center
795     // min2 is based on the buggy specification of centrality in version 150.7
796     // no known implementations transmit this value, but just in case...
797     //int min2 = INT_MIN_VALUE;
798     int last = 0;
799     // if there were initial runs, find the potential sentinels in them:
800     for (int i = 0; i < valueSink->length(); i++) {
801       last = valueSink->get(i);
802       min = moreCentral(min, last);
803       //min2 = moreCentral2(min2, last, min);
804     }
805     value_stream vs = vs0;
806     for (;;) {
807       int val = vs.getInt();
808       if (valueSink->length() > 0 &&
809           (val == last || val == min)) //|| val == min2
810         break;
811       valueSink->add(val);
812       CHECK;
813       last = val;
814       min = moreCentral(min, last);
815       //min2 = moreCentral2(min2, last, min);
816     }
817     band_rp = vs.rp;
818   }
819   CHECK;
820 
821   // Get an accurate upper limit now.
822   vs0.rplimit = band_rp;
823   vs0.cm = this;
824 
825   return; // success
826 }
827 
828 coding basic_codings[] = {
829   // This one is not a usable irregular coding, but is used by cp_Utf8_chars.
830   CODING_INIT(3,128,0,0),
831 
832   // Fixed-length codings:
833   CODING_INIT(1,256,0,0),
834   CODING_INIT(1,256,1,0),
835   CODING_INIT(1,256,0,1),
836   CODING_INIT(1,256,1,1),
837   CODING_INIT(2,256,0,0),
838   CODING_INIT(2,256,1,0),
839   CODING_INIT(2,256,0,1),
840   CODING_INIT(2,256,1,1),
841   CODING_INIT(3,256,0,0),
842   CODING_INIT(3,256,1,0),
843   CODING_INIT(3,256,0,1),
844   CODING_INIT(3,256,1,1),
845   CODING_INIT(4,256,0,0),
846   CODING_INIT(4,256,1,0),
847   CODING_INIT(4,256,0,1),
848   CODING_INIT(4,256,1,1),
849 
850   // Full-range variable-length codings:
851   CODING_INIT(5,  4,0,0),
852   CODING_INIT(5,  4,1,0),
853   CODING_INIT(5,  4,2,0),
854   CODING_INIT(5, 16,0,0),
855   CODING_INIT(5, 16,1,0),
856   CODING_INIT(5, 16,2,0),
857   CODING_INIT(5, 32,0,0),
858   CODING_INIT(5, 32,1,0),
859   CODING_INIT(5, 32,2,0),
860   CODING_INIT(5, 64,0,0),
861   CODING_INIT(5, 64,1,0),
862   CODING_INIT(5, 64,2,0),
863   CODING_INIT(5,128,0,0),
864   CODING_INIT(5,128,1,0),
865   CODING_INIT(5,128,2,0),
866 
867   CODING_INIT(5,  4,0,1),
868   CODING_INIT(5,  4,1,1),
869   CODING_INIT(5,  4,2,1),
870   CODING_INIT(5, 16,0,1),
871   CODING_INIT(5, 16,1,1),
872   CODING_INIT(5, 16,2,1),
873   CODING_INIT(5, 32,0,1),
874   CODING_INIT(5, 32,1,1),
875   CODING_INIT(5, 32,2,1),
876   CODING_INIT(5, 64,0,1),
877   CODING_INIT(5, 64,1,1),
878   CODING_INIT(5, 64,2,1),
879   CODING_INIT(5,128,0,1),
880   CODING_INIT(5,128,1,1),
881   CODING_INIT(5,128,2,1),
882 
883   // Variable length subrange codings:
884   CODING_INIT(2,192,0,0),
885   CODING_INIT(2,224,0,0),
886   CODING_INIT(2,240,0,0),
887   CODING_INIT(2,248,0,0),
888   CODING_INIT(2,252,0,0),
889 
890   CODING_INIT(2,  8,0,1),
891   CODING_INIT(2,  8,1,1),
892   CODING_INIT(2, 16,0,1),
893   CODING_INIT(2, 16,1,1),
894   CODING_INIT(2, 32,0,1),
895   CODING_INIT(2, 32,1,1),
896   CODING_INIT(2, 64,0,1),
897   CODING_INIT(2, 64,1,1),
898   CODING_INIT(2,128,0,1),
899   CODING_INIT(2,128,1,1),
900   CODING_INIT(2,192,0,1),
901   CODING_INIT(2,192,1,1),
902   CODING_INIT(2,224,0,1),
903   CODING_INIT(2,224,1,1),
904   CODING_INIT(2,240,0,1),
905   CODING_INIT(2,240,1,1),
906   CODING_INIT(2,248,0,1),
907   CODING_INIT(2,248,1,1),
908 
909   CODING_INIT(3,192,0,0),
910   CODING_INIT(3,224,0,0),
911   CODING_INIT(3,240,0,0),
912   CODING_INIT(3,248,0,0),
913   CODING_INIT(3,252,0,0),
914 
915   CODING_INIT(3,  8,0,1),
916   CODING_INIT(3,  8,1,1),
917   CODING_INIT(3, 16,0,1),
918   CODING_INIT(3, 16,1,1),
919   CODING_INIT(3, 32,0,1),
920   CODING_INIT(3, 32,1,1),
921   CODING_INIT(3, 64,0,1),
922   CODING_INIT(3, 64,1,1),
923   CODING_INIT(3,128,0,1),
924   CODING_INIT(3,128,1,1),
925   CODING_INIT(3,192,0,1),
926   CODING_INIT(3,192,1,1),
927   CODING_INIT(3,224,0,1),
928   CODING_INIT(3,224,1,1),
929   CODING_INIT(3,240,0,1),
930   CODING_INIT(3,240,1,1),
931   CODING_INIT(3,248,0,1),
932   CODING_INIT(3,248,1,1),
933 
934   CODING_INIT(4,192,0,0),
935   CODING_INIT(4,224,0,0),
936   CODING_INIT(4,240,0,0),
937   CODING_INIT(4,248,0,0),
938   CODING_INIT(4,252,0,0),
939 
940   CODING_INIT(4,  8,0,1),
941   CODING_INIT(4,  8,1,1),
942   CODING_INIT(4, 16,0,1),
943   CODING_INIT(4, 16,1,1),
944   CODING_INIT(4, 32,0,1),
945   CODING_INIT(4, 32,1,1),
946   CODING_INIT(4, 64,0,1),
947   CODING_INIT(4, 64,1,1),
948   CODING_INIT(4,128,0,1),
949   CODING_INIT(4,128,1,1),
950   CODING_INIT(4,192,0,1),
951   CODING_INIT(4,192,1,1),
952   CODING_INIT(4,224,0,1),
953   CODING_INIT(4,224,1,1),
954   CODING_INIT(4,240,0,1),
955   CODING_INIT(4,240,1,1),
956   CODING_INIT(4,248,0,1),
957   CODING_INIT(4,248,1,1),
958   CODING_INIT(0,0,0,0)
959 };
960 #define BASIC_INDEX_LIMIT \
961         (int)(sizeof(basic_codings)/sizeof(basic_codings[0])-1)
962 
findByIndex(int idx)963 coding* coding::findByIndex(int idx) {
964 #ifndef PRODUCT
965   /* Tricky assert here, constants and gcc complains about it without local. */
966   int index_limit = BASIC_INDEX_LIMIT;
967   assert(_meta_canon_min == 1 && _meta_canon_max+1 == index_limit);
968 #endif
969   if (idx >= _meta_canon_min && idx <= _meta_canon_max)
970     return basic_codings[idx].init();
971   else
972     return null;
973 }
974 
975 #ifndef PRODUCT
string()976 const char* coding::string() {
977   CODING_PRIVATE(spec);
978   bytes buf;
979   buf.malloc(100);
980   char maxS[20], minS[20];
981   sprintf(maxS, "%d", max);
982   sprintf(minS, "%d", min);
983   if (max == INT_MAX_VALUE)  strcpy(maxS, "max");
984   if (min == INT_MIN_VALUE)  strcpy(minS, "min");
985   sprintf((char*)buf.ptr, "(%d,%d,%d,%d) L=%d r=[%s,%s]",
986           B,H,S,D,L,minS,maxS);
987   return (const char*) buf.ptr;
988 }
989 #endif
990