native/common-unpack/coding.cpp

/*
 * Copyright (c) 2002, 2009, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.  Oracle designates this
 * particular file as subject to the "Classpath" exception as provided
 * by Oracle in the LICENSE file that accompanied this code.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 */

// -*- C++ -*-
// Small program for unpacking specially compressed Java packages.
// John R. Rose

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <stdarg.h>

#include "jni_util.h"

#include "defines.h"
#include "bytes.h"
#include "utils.h"
#include "coding.h"

#include "constants.h"
#include "unpack.h"

extern coding basic_codings[];

#define CODING_PRIVATE(spec) \
  int spec_ = spec; \
  int B = CODING_B(spec_); \
  int H = CODING_H(spec_); \
  int L = 256 - H; \
  int S = CODING_S(spec_); \
  int D = CODING_D(spec_)

#define IS_NEG_CODE(S, codeVal) \
  ( (((int)(codeVal)+1) & ((1<<S)-1)) == 0 )

#define DECODE_SIGN_S1(ux) \
  ( ((uint)(ux) >> 1) ^ -((int)(ux) & 1) )

static maybe_inline
int decode_sign(int S, uint ux) {  // == Coding.decodeSign32
  assert(S > 0);
  uint sigbits = (ux >> S);
  if (IS_NEG_CODE(S, ux))
    return (int)(    ~sigbits);
  else
    return (int)(ux - sigbits);
  // Note that (int)(ux-sigbits) can be negative, if ux is large enough.
}

coding* coding::init() {
  if (umax > 0)  return this;  // already done
  assert(spec != 0);  // sanity

  // fill in derived fields
  CODING_PRIVATE(spec);

  // Return null if 'arb(BHSD)' parameter constraints are not met:
  if (B < 1 || B > B_MAX)  return null;
  if (H < 1 || H > 256)    return null;
  if (S < 0 || S > 2)      return null;
  if (D < 0 || D > 1)      return null;
  if (B == 1 && H != 256)  return null;  // 1-byte coding must be fixed-size
  if (B >= 5 && H == 256)  return null;  // no 5-byte fixed-size coding

  // first compute the range of the coding, in 64 bits
  jlong range = 0;
  {
    jlong H_i = 1;
    for (int i = 0; i < B; i++) {
      range += H_i;
      H_i *= H;
    }
    range *= L;
    range += H_i;
  }
  assert(range > 0);  // no useless codings, please

  int this_umax;

  // now, compute min and max
  if (range >= ((jlong)1 << 32)) {
    this_umax  = INT_MAX_VALUE;
    this->umin = INT_MIN_VALUE;
    this->max  = INT_MAX_VALUE;
    this->min  = INT_MIN_VALUE;
  } else {
    this_umax = (range > INT_MAX_VALUE) ? INT_MAX_VALUE : (int)range-1;
    this->max = this_umax;
    this->min = this->umin = 0;
    if (S != 0 && range != 0) {
      int Smask = (1<<S)-1;
      jlong maxPosCode = range-1;
      jlong maxNegCode = range-1;
      while (IS_NEG_CODE(S,  maxPosCode))  --maxPosCode;
      while (!IS_NEG_CODE(S, maxNegCode))  --maxNegCode;
      int maxPos = decode_sign(S, (uint)maxPosCode);
      if (maxPos < 0)
        this->max = INT_MAX_VALUE;  // 32-bit wraparound
      else
        this->max = maxPos;
      if (maxNegCode < 0)
        this->min = 0;  // No negative codings at all.
      else
        this->min = decode_sign(S, (uint)maxNegCode);
    }
  }

  assert(!(isFullRange | isSigned | isSubrange)); // init
  if (min < 0)
    this->isSigned = true;
  if (max < INT_MAX_VALUE && range <= INT_MAX_VALUE)
    this->isSubrange = true;
  if (max == INT_MAX_VALUE && min == INT_MIN_VALUE)
    this->isFullRange = true;

  // do this last, to reduce MT exposure (should have a membar too)
  this->umax = this_umax;

  return this;
}

coding* coding::findBySpec(int spec) {
  for (coding* scan = &basic_codings[0]; ; scan++) {
    if (scan->spec == spec)
      return scan->init();
    if (scan->spec == 0)
      break;
  }
  coding* ptr = NEW(coding, 1);
  CHECK_NULL_RETURN(ptr, 0);
  coding* c = ptr->initFrom(spec);
  if (c == null) {
    mtrace('f', ptr, 0);
    ::free(ptr);
  } else
    // else caller should free it...
    c->isMalloc = true;
  return c;
}

coding* coding::findBySpec(int B, int H, int S, int D) {
  if (B < 1 || B > B_MAX)  return null;
  if (H < 1 || H > 256)    return null;
  if (S < 0 || S > 2)      return null;
  if (D < 0 || D > 1)      return null;
  return findBySpec(CODING_SPEC(B, H, S, D));
}

void coding::free() {
  if (isMalloc) {
    mtrace('f', this, 0);
    ::free(this);
  }
}

void coding_method::reset(value_stream* state) {
  assert(state->rp == state->rplimit);  // not in mid-stream, please
  //assert(this == vs0.cm);
  state[0] = vs0;
  if (uValues != null) {
    uValues->reset(state->helper());
  }
}

maybe_inline
uint coding::parse(byte* &rp, int B, int H) {
  int L = 256-H;
  byte* ptr = rp;
  // hand peel the i==0 part of the loop:
  uint b_i = *ptr++ & 0xFF;
  if (B == 1 || b_i < (uint)L)
    { rp = ptr; return b_i; }
  uint sum = b_i;
  uint H_i = H;
  assert(B <= B_MAX);
  for (int i = 2; i <= B_MAX; i++) { // easy for compilers to unroll if desired
    b_i = *ptr++ & 0xFF;
    sum += b_i * H_i;
    if (i == B || b_i < (uint)L)
      { rp = ptr; return sum; }
    H_i *= H;
  }
  assert(false);
  return 0;
}

maybe_inline
uint coding::parse_lgH(byte* &rp, int B, int H, int lgH) {
  assert(H == (1<<lgH));
  int L = 256-(1<<lgH);
  byte* ptr = rp;
  // hand peel the i==0 part of the loop:
  uint b_i = *ptr++ & 0xFF;
  if (B == 1 || b_i < (uint)L)
    { rp = ptr; return b_i; }
  uint sum = b_i;
  uint lg_H_i = lgH;
  assert(B <= B_MAX);
  for (int i = 2; i <= B_MAX; i++) { // easy for compilers to unroll if desired
    b_i = *ptr++ & 0xFF;
    sum += b_i << lg_H_i;
    if (i == B || b_i < (uint)L)
      { rp = ptr; return sum; }
    lg_H_i += lgH;
  }
  assert(false);
  return 0;
}

static const char ERB[] = "EOF reading band";

maybe_inline
void coding::parseMultiple(byte* &rp, int N, byte* limit, int B, int H) {
  if (N < 0) {
    abort("bad value count");
    return;
  }
  byte* ptr = rp;
  if (B == 1 || H == 256) {
    size_t len = (size_t)N*B;
    if (len / B != (size_t)N || ptr+len > limit) {
      abort(ERB);
      return;
    }
    rp = ptr+len;
    return;
  }
  // Note:  We assume rp has enough zero-padding.
  int L = 256-H;
  int n = B;
  while (N > 0) {
    ptr += 1;
    if (--n == 0) {
      // end of encoding at B bytes, regardless of byte value
    } else {
      int b = (ptr[-1] & 0xFF);
      if (b >= L) {
        // keep going, unless we find a byte < L
        continue;
      }
    }
    // found the last byte
    N -= 1;
    n = B;   // reset length counter
    // do an error check here
    if (ptr > limit) {
      abort(ERB);
      return;
    }
  }
  rp = ptr;
  return;
}

bool value_stream::hasHelper() {
  // If my coding method is a pop-style method,
  // then I need a second value stream to transmit
  // unfavored values.
  // This can be determined by examining fValues.
  return cm->fValues != null;
}

void value_stream::init(byte* rp_, byte* rplimit_, coding* defc) {
  rp = rp_;
  rplimit = rplimit_;
  sum = 0;
  cm = null;  // no need in the simple case
  setCoding(defc);
}

void value_stream::setCoding(coding* defc) {
  if (defc == null) {
    unpack_abort("bad coding");
    defc = coding::findByIndex(_meta_canon_min);  // random pick for recovery
  }

  c = (*defc);

  // choose cmk
  cmk = cmk_ERROR;
  switch (c.spec) {
  case BYTE1_spec:      cmk = cmk_BYTE1;        break;
  case CHAR3_spec:      cmk = cmk_CHAR3;        break;
  case UNSIGNED5_spec:  cmk = cmk_UNSIGNED5;    break;
  case DELTA5_spec:     cmk = cmk_DELTA5;       break;
  case BCI5_spec:       cmk = cmk_BCI5;         break;
  case BRANCH5_spec:    cmk = cmk_BRANCH5;      break;
  default:
    if (c.D() == 0) {
      switch (c.S()) {
      case 0:  cmk = cmk_BHS0;  break;
      case 1:  cmk = cmk_BHS1;  break;
      default: cmk = cmk_BHS;   break;
      }
    } else {
      if (c.S() == 1) {
        if (c.isFullRange)   cmk = cmk_BHS1D1full;
        if (c.isSubrange)    cmk = cmk_BHS1D1sub;
      }
      if (cmk == cmk_ERROR)  cmk = cmk_BHSD1;
    }
  }
}

static maybe_inline
int getPopValue(value_stream* self, uint uval) {
  if (uval > 0) {
    // note that the initial parse performed a range check
    assert(uval <= (uint)self->cm->fVlength);
    return self->cm->fValues[uval-1];
  } else {
    // take an unfavored value
    return self->helper()->getInt();
  }
}

maybe_inline
int coding::sumInUnsignedRange(int x, int y) {
  assert(isSubrange);
  int range = (int)(umax+1);
  assert(range > 0);
  x += y;
  if (x != (int)((jlong)(x-y) + (jlong)y)) {
    // 32-bit overflow interferes with range reduction.
    // Back off from the overflow by adding a multiple of range:
    if (x < 0) {
      x -= range;
      assert(x >= 0);
    } else {
      x += range;
      assert(x < 0);
    }
  }
  if (x < 0) {
    x += range;
    if (x >= 0)  return x;
  } else if (x >= range) {
    x -= range;
    if (x < range)  return x;
  } else {
    // in range
    return x;
  }
  // do it the hard way
  x %= range;
  if (x < 0)  x += range;
  return x;
}

static maybe_inline
int getDeltaValue(value_stream* self, uint uval, bool isSubrange) {
  assert((uint)(self->c.isSubrange) == (uint)isSubrange);
  assert(self->c.isSubrange | self->c.isFullRange);
  if (isSubrange)
    return self->sum = self->c.sumInUnsignedRange(self->sum, (int)uval);
  else
    return self->sum += (int) uval;
}

bool value_stream::hasValue() {
  if (rp < rplimit)      return true;
  if (cm == null)        return false;
  if (cm->next == null)  return false;
  cm->next->reset(this);
  return hasValue();
}

int value_stream::getInt() {
  if (rp >= rplimit) {
    // Advance to next coding segment.
    if (rp > rplimit || cm == null || cm->next == null) {
      // Must perform this check and throw an exception on bad input.
      unpack_abort(ERB);
      return 0;
    }
    cm->next->reset(this);
    return getInt();
  }

  CODING_PRIVATE(c.spec);
  uint uval;
  enum {
    B5 = 5,
    B3 = 3,
    H128 = 128,
    H64 = 64,
    H4 = 4
  };
  switch (cmk) {
  case cmk_BHS:
    assert(D == 0);
    uval = coding::parse(rp, B, H);
    if (S == 0)
      return (int) uval;
    return decode_sign(S, uval);

  case cmk_BHS0:
    assert(S == 0 && D == 0);
    uval = coding::parse(rp, B, H);
    return (int) uval;

  case cmk_BHS1:
    assert(S == 1 && D == 0);
    uval = coding::parse(rp, B, H);
    return DECODE_SIGN_S1(uval);

  case cmk_BYTE1:
    assert(c.spec == BYTE1_spec);
    assert(B == 1 && H == 256 && S == 0 && D == 0);
    return *rp++ & 0xFF;

  case cmk_CHAR3:
    assert(c.spec == CHAR3_spec);
    assert(B == B3 && H == H128 && S == 0 && D == 0);
    return coding::parse_lgH(rp, B3, H128, 7);

  case cmk_UNSIGNED5:
    assert(c.spec == UNSIGNED5_spec);
    assert(B == B5 && H == H64 && S == 0 && D == 0);
    return coding::parse_lgH(rp, B5, H64, 6);

  case cmk_BHSD1:
    assert(D == 1);
    uval = coding::parse(rp, B, H);
    if (S != 0)
      uval = (uint) decode_sign(S, uval);
    return getDeltaValue(this, uval, (bool)c.isSubrange);

  case cmk_BHS1D1full:
    assert(S == 1 && D == 1 && c.isFullRange);
    uval = coding::parse(rp, B, H);
    uval = (uint) DECODE_SIGN_S1(uval);
    return getDeltaValue(this, uval, false);

  case cmk_BHS1D1sub:
    assert(S == 1 && D == 1 && c.isSubrange);
    uval = coding::parse(rp, B, H);
    uval = (uint) DECODE_SIGN_S1(uval);
    return getDeltaValue(this, uval, true);

  case cmk_DELTA5:
    assert(c.spec == DELTA5_spec);
    assert(B == B5 && H == H64 && S == 1 && D == 1 && c.isFullRange);
    uval = coding::parse_lgH(rp, B5, H64, 6);
    sum += DECODE_SIGN_S1(uval);
    return sum;

  case cmk_BCI5:
    assert(c.spec == BCI5_spec);
    assert(B == B5 && H == H4 && S == 0 && D == 0);
    return coding::parse_lgH(rp, B5, H4, 2);

  case cmk_BRANCH5:
    assert(c.spec == BRANCH5_spec);
    assert(B == B5 && H == H4 && S == 2 && D == 0);
    uval = coding::parse_lgH(rp, B5, H4, 2);
    return decode_sign(S, uval);

  case cmk_pop:
    uval = coding::parse(rp, B, H);
    if (S != 0) {
      uval = (uint) decode_sign(S, uval);
    }
    if (D != 0) {
      assert(c.isSubrange | c.isFullRange);
      if (c.isSubrange)
        sum = c.sumInUnsignedRange(sum, (int) uval);
      else
        sum += (int) uval;
      uval = (uint) sum;
    }
    return getPopValue(this, uval);

  case cmk_pop_BHS0:
    assert(S == 0 && D == 0);
    uval = coding::parse(rp, B, H);
    return getPopValue(this, uval);

  case cmk_pop_BYTE1:
    assert(c.spec == BYTE1_spec);
    assert(B == 1 && H == 256 && S == 0 && D == 0);
    return getPopValue(this, *rp++ & 0xFF);

  default:
    break;
  }
  assert(false);
  return 0;
}

static maybe_inline
int moreCentral(int x, int y) {  // used to find end of Pop.{F}
  // Suggested implementation from the Pack200 specification:
  uint kx = (x >> 31) ^ (x << 1);
  uint ky = (y >> 31) ^ (y << 1);
  return (kx < ky? x: y);
}
//static maybe_inline
//int moreCentral2(int x, int y, int min) {
//  // Strict implementation of buggy 150.7 specification.
//  // The bug is that the spec. says absolute-value ties are broken
//  // in favor of positive numbers, but the suggested implementation
//  // (also mentioned in the spec.) breaks ties in favor of negative numbers.
//  if ((x + y) != 0)
//    return min;
//  else
//    // return the other value, which breaks a tie in the positive direction
//    return (x > y)? x: y;
//}

static const byte* no_meta[] = {null};
#define NO_META (*(byte**)no_meta)
enum { POP_FAVORED_N = -2 };

// mode bits
#define DISABLE_RUN  1  // used immediately inside ACodee
#define DISABLE_POP  2  // used recursively in all pop sub-bands

// This function knows all about meta-coding.
void coding_method::init(byte* &band_rp, byte* band_limit,
                         byte* &meta_rp, int mode,
                         coding* defc, int N,
                         intlist* valueSink) {
  assert(N != 0);

  assert(u != null);  // must be pre-initialized
  //if (u == null)  u = unpacker::current();  // expensive

  int op = (meta_rp == null) ? _meta_default :  (*meta_rp++ & 0xFF);
  coding* foundc = null;
  coding* to_free = null;

  if (op == _meta_default) {
    foundc = defc;
    // and fall through

  } else if (op >= _meta_canon_min && op <= _meta_canon_max) {
    foundc = coding::findByIndex(op);
    // and fall through

  } else if (op == _meta_arb) {
    int args = (*meta_rp++ & 0xFF);
    // args = (D:[0..1] + 2*S[0..2] + 8*(B:[1..5]-1))
    int D = ((args >> 0) & 1);
    int S = ((args >> 1) & 3);
    int B = ((args >> 3) & -1) + 1;
    // & (H[1..256]-1)
    int H = (*meta_rp++ & 0xFF) + 1;
    foundc = coding::findBySpec(B, H, S, D);
    to_free = foundc;  // findBySpec may dynamically allocate
    if (foundc == null) {
      abort("illegal arb. coding");
      return;
    }
    // and fall through

  } else if (op >= _meta_run && op < _meta_pop) {
    int args = (op - _meta_run);
    // args: KX:[0..3] + 4*(KBFlag:[0..1]) + 8*(ABDef:[0..2])
    int KX     = ((args >> 0) & 3);
    int KBFlag = ((args >> 2) & 1);
    int ABDef  = ((args >> 3) & -1);
    assert(ABDef <= 2);
    // & KB: one of [0..255] if KBFlag=1
    int KB     = (!KBFlag? 3: (*meta_rp++ & 0xFF));
    int K      = (KB+1) << (KX * 4);
    int N2 = (N >= 0) ? N-K : N;
    if (N == 0 || (N2 <= 0 && N2 != N)) {
      abort("illegal run encoding");
      return;
    }
    if ((mode & DISABLE_RUN) != 0) {
      abort("illegal nested run encoding");
      return;
    }

    // & Enc{ ACode } if ADef=0  (ABDef != 1)
    // No direct nesting of 'run' in ACode, but in BCode it's OK.
    int disRun = mode | DISABLE_RUN;
    if (ABDef == 1) {
      this->init(band_rp, band_limit, NO_META, disRun, defc, K, valueSink);
    } else {
      this->init(band_rp, band_limit, meta_rp, disRun, defc, K, valueSink);
    }
    CHECK;

    // & Enc{ BCode } if BDef=0  (ABDef != 2)
    coding_method* tail = U_NEW(coding_method, 1);
    CHECK_NULL(tail);
    tail->u = u;

    // The 'run' codings may be nested indirectly via 'pop' codings.
    // This means that this->next may already be filled in, if
    // ACode was of type 'pop' with a 'run' token coding.
    // No problem:  Just chain the upcoming BCode onto the end.
    for (coding_method* self = this; ; self = self->next) {
      if (self->next == null) {
        self->next = tail;
        break;
      }
    }

    if (ABDef == 2) {
      tail->init(band_rp, band_limit, NO_META, mode, defc, N2, valueSink);
    } else {
      tail->init(band_rp, band_limit, meta_rp, mode, defc, N2, valueSink);
    }
    // Note:  The preceding calls to init should be tail-recursive.

    return;  // done; no falling through

  } else if (op >= _meta_pop && op < _meta_limit) {
    int args = (op - _meta_pop);
    // args: (FDef:[0..1]) + 2*UDef:[0..1] + 4*(TDefL:[0..11])
    int FDef  = ((args >> 0) & 1);
    int UDef  = ((args >> 1) & 1);
    int TDefL = ((args >> 2) & -1);
    assert(TDefL <= 11);
    int TDef  = (TDefL > 0);
    int TL    = (TDefL <= 6) ? (2 << TDefL) : (256 - (4 << (11-TDefL)));
    int TH    = (256-TL);
    if (N <= 0) {
      abort("illegal pop encoding");
      return;
    }
    if ((mode & DISABLE_POP) != 0) {
      abort("illegal nested pop encoding");
      return;
    }

    // No indirect nesting of 'pop', but 'run' is OK.
    int disPop = DISABLE_POP;

    // & Enc{ FCode } if FDef=0
    int FN = POP_FAVORED_N;
    assert(valueSink == null);
    intlist fValueSink; fValueSink.init();
    coding_method fval;
    BYTES_OF(fval).clear(); fval.u = u;
    if (FDef != 0) {
      fval.init(band_rp, band_limit, NO_META, disPop, defc, FN, &fValueSink);
    } else {
      fval.init(band_rp, band_limit, meta_rp, disPop, defc, FN, &fValueSink);
    }
    bytes fvbuf;
    fValues  = (u->saveTo(fvbuf, fValueSink.b), (int*) fvbuf.ptr);
    fVlength = fValueSink.length();  // i.e., the parameter K
    fValueSink.free();
    CHECK;

    // Skip the first {F} run in all subsequent passes.
    // The next call to this->init(...) will set vs0.rp to point after the {F}.

    // & Enc{ TCode } if TDef=0  (TDefL==0)
    if (TDef != 0) {
      coding* tcode = coding::findBySpec(1, 256);  // BYTE1
      // find the most narrowly sufficient code:
      for (int B = 2; B <= B_MAX; B++) {
        if (fVlength <= tcode->umax)  break;  // found it
        tcode->free();
        tcode = coding::findBySpec(B, TH);
        CHECK_NULL(tcode);
      }
      if (!(fVlength <= tcode->umax)) {
        abort("pop.L value too small");
        return;
      }
      this->init(band_rp, band_limit, NO_META, disPop, tcode, N, null);
      tcode->free();
    } else {
      this->init(band_rp, band_limit, meta_rp, disPop,  defc, N, null);
    }
    CHECK;

    // Count the number of zero tokens right now.
    // Also verify that they are in bounds.
    int UN = 0;   // one {U} for each zero in {T}
    value_stream vs = vs0;
    for (int i = 0; i < N; i++) {
      uint val = vs.getInt();
      if (val == 0)  UN += 1;
      if (!(val <= (uint)fVlength)) {
        abort("pop token out of range");
        return;
      }
    }
    vs.done();

    // & Enc{ UCode } if UDef=0
    if (UN != 0) {
      uValues = U_NEW(coding_method, 1);
      CHECK_NULL(uValues);
      uValues->u = u;
      if (UDef != 0) {
        uValues->init(band_rp, band_limit, NO_META, disPop, defc, UN, null);
      } else {
        uValues->init(band_rp, band_limit, meta_rp, disPop, defc, UN, null);
      }
    } else {
      if (UDef == 0) {
        int uop = (*meta_rp++ & 0xFF);
        if (uop > _meta_canon_max)
          // %%% Spec. requires the more strict (uop != _meta_default).
          abort("bad meta-coding for empty pop/U");
      }
    }

    // Bug fix for 6259542
    // Last of all, adjust vs0.cmk to the 'pop' flavor
    for (coding_method* self = this; self != null; self = self->next) {
        coding_method_kind cmk2 = cmk_pop;
        switch (self->vs0.cmk) {
        case cmk_BHS0:   cmk2 = cmk_pop_BHS0;   break;
        case cmk_BYTE1:  cmk2 = cmk_pop_BYTE1;  break;
        default: break;
        }
        self->vs0.cmk = cmk2;
        if (self != this) {
          assert(self->fValues == null); // no double init
          self->fValues  = this->fValues;
          self->fVlength = this->fVlength;
          assert(self->uValues == null); // must stay null
        }
    }

    return;  // done; no falling through

  } else {
    abort("bad meta-coding");
    return;
  }

  // Common code here skips a series of values with one coding.
  assert(foundc != null);

  assert(vs0.cmk == cmk_ERROR);  // no garbage, please
  assert(vs0.rp == null);  // no garbage, please
  assert(vs0.rplimit == null);  // no garbage, please
  assert(vs0.sum == 0);  // no garbage, please

  vs0.init(band_rp, band_limit, foundc);

  // Done with foundc.  Free if necessary.
  if (to_free != null) {
    to_free->free();
    to_free = null;
  }
  foundc = null;

  coding& c = vs0.c;
  CODING_PRIVATE(c.spec);
  // assert sane N
  assert((uint)N < INT_MAX_VALUE || N == POP_FAVORED_N);

  // Look at the values, or at least skip over them quickly.
  if (valueSink == null) {
    // Skip and ignore values in the first pass.
    c.parseMultiple(band_rp, N, band_limit, B, H);
  } else if (N >= 0) {
    // Pop coding, {F} sequence, initial run of values...
    assert((mode & DISABLE_POP) != 0);
    value_stream vs = vs0;
    for (int n = 0; n < N; n++) {
      int val = vs.getInt();
      valueSink->add(val);
    }
    band_rp = vs.rp;
  } else {
    // Pop coding, {F} sequence, final run of values...
    assert((mode & DISABLE_POP) != 0);
    assert(N == POP_FAVORED_N);
    int min = INT_MIN_VALUE;  // farthest from the center
    // min2 is based on the buggy specification of centrality in version 150.7
    // no known implementations transmit this value, but just in case...
    //int min2 = INT_MIN_VALUE;
    int last = 0;
    // if there were initial runs, find the potential sentinels in them:
    for (int i = 0; i < valueSink->length(); i++) {
      last = valueSink->get(i);
      min = moreCentral(min, last);
      //min2 = moreCentral2(min2, last, min);
    }
    value_stream vs = vs0;
    for (;;) {
      int val = vs.getInt();
      if (valueSink->length() > 0 &&
          (val == last || val == min)) //|| val == min2
        break;
      valueSink->add(val);
      CHECK;
      last = val;
      min = moreCentral(min, last);
      //min2 = moreCentral2(min2, last, min);
    }
    band_rp = vs.rp;
  }
  CHECK;

  // Get an accurate upper limit now.
  vs0.rplimit = band_rp;
  vs0.cm = this;

  return; // success
}

coding basic_codings[] = {
  // This one is not a usable irregular coding, but is used by cp_Utf8_chars.
  CODING_INIT(3,128,0,0),

  // Fixed-length codings:
  CODING_INIT(1,256,0,0),
  CODING_INIT(1,256,1,0),
  CODING_INIT(1,256,0,1),
  CODING_INIT(1,256,1,1),
  CODING_INIT(2,256,0,0),
  CODING_INIT(2,256,1,0),
  CODING_INIT(2,256,0,1),
  CODING_INIT(2,256,1,1),
  CODING_INIT(3,256,0,0),
  CODING_INIT(3,256,1,0),
  CODING_INIT(3,256,0,1),
  CODING_INIT(3,256,1,1),
  CODING_INIT(4,256,0,0),
  CODING_INIT(4,256,1,0),
  CODING_INIT(4,256,0,1),
  CODING_INIT(4,256,1,1),

  // Full-range variable-length codings:
  CODING_INIT(5,  4,0,0),
  CODING_INIT(5,  4,1,0),
  CODING_INIT(5,  4,2,0),
  CODING_INIT(5, 16,0,0),
  CODING_INIT(5, 16,1,0),
  CODING_INIT(5, 16,2,0),
  CODING_INIT(5, 32,0,0),
  CODING_INIT(5, 32,1,0),
  CODING_INIT(5, 32,2,0),
  CODING_INIT(5, 64,0,0),
  CODING_INIT(5, 64,1,0),
  CODING_INIT(5, 64,2,0),
  CODING_INIT(5,128,0,0),
  CODING_INIT(5,128,1,0),
  CODING_INIT(5,128,2,0),

  CODING_INIT(5,  4,0,1),
  CODING_INIT(5,  4,1,1),
  CODING_INIT(5,  4,2,1),
  CODING_INIT(5, 16,0,1),
  CODING_INIT(5, 16,1,1),
  CODING_INIT(5, 16,2,1),
  CODING_INIT(5, 32,0,1),
  CODING_INIT(5, 32,1,1),
  CODING_INIT(5, 32,2,1),
  CODING_INIT(5, 64,0,1),
  CODING_INIT(5, 64,1,1),
  CODING_INIT(5, 64,2,1),
  CODING_INIT(5,128,0,1),
  CODING_INIT(5,128,1,1),
  CODING_INIT(5,128,2,1),

  // Variable length subrange codings:
  CODING_INIT(2,192,0,0),
  CODING_INIT(2,224,0,0),
  CODING_INIT(2,240,0,0),
  CODING_INIT(2,248,0,0),
  CODING_INIT(2,252,0,0),

  CODING_INIT(2,  8,0,1),
  CODING_INIT(2,  8,1,1),
  CODING_INIT(2, 16,0,1),
  CODING_INIT(2, 16,1,1),
  CODING_INIT(2, 32,0,1),
  CODING_INIT(2, 32,1,1),
  CODING_INIT(2, 64,0,1),
  CODING_INIT(2, 64,1,1),
  CODING_INIT(2,128,0,1),
  CODING_INIT(2,128,1,1),
  CODING_INIT(2,192,0,1),
  CODING_INIT(2,192,1,1),
  CODING_INIT(2,224,0,1),
  CODING_INIT(2,224,1,1),
  CODING_INIT(2,240,0,1),
  CODING_INIT(2,240,1,1),
  CODING_INIT(2,248,0,1),
  CODING_INIT(2,248,1,1),

  CODING_INIT(3,192,0,0),
  CODING_INIT(3,224,0,0),
  CODING_INIT(3,240,0,0),
  CODING_INIT(3,248,0,0),
  CODING_INIT(3,252,0,0),

  CODING_INIT(3,  8,0,1),
  CODING_INIT(3,  8,1,1),
  CODING_INIT(3, 16,0,1),
  CODING_INIT(3, 16,1,1),
  CODING_INIT(3, 32,0,1),
  CODING_INIT(3, 32,1,1),
  CODING_INIT(3, 64,0,1),
  CODING_INIT(3, 64,1,1),
  CODING_INIT(3,128,0,1),
  CODING_INIT(3,128,1,1),
  CODING_INIT(3,192,0,1),
  CODING_INIT(3,192,1,1),
  CODING_INIT(3,224,0,1),
  CODING_INIT(3,224,1,1),
  CODING_INIT(3,240,0,1),
  CODING_INIT(3,240,1,1),
  CODING_INIT(3,248,0,1),
  CODING_INIT(3,248,1,1),

  CODING_INIT(4,192,0,0),
  CODING_INIT(4,224,0,0),
  CODING_INIT(4,240,0,0),
  CODING_INIT(4,248,0,0),
  CODING_INIT(4,252,0,0),

  CODING_INIT(4,  8,0,1),
  CODING_INIT(4,  8,1,1),
  CODING_INIT(4, 16,0,1),
  CODING_INIT(4, 16,1,1),
  CODING_INIT(4, 32,0,1),
  CODING_INIT(4, 32,1,1),
  CODING_INIT(4, 64,0,1),
  CODING_INIT(4, 64,1,1),
  CODING_INIT(4,128,0,1),
  CODING_INIT(4,128,1,1),
  CODING_INIT(4,192,0,1),
  CODING_INIT(4,192,1,1),
  CODING_INIT(4,224,0,1),
  CODING_INIT(4,224,1,1),
  CODING_INIT(4,240,0,1),
  CODING_INIT(4,240,1,1),
  CODING_INIT(4,248,0,1),
  CODING_INIT(4,248,1,1),
  CODING_INIT(0,0,0,0)
};
#define BASIC_INDEX_LIMIT \
        (int)(sizeof(basic_codings)/sizeof(basic_codings[0])-1)

coding* coding::findByIndex(int idx) {
#ifndef PRODUCT
  /* Tricky assert here, constants and gcc complains about it without local. */
  int index_limit = BASIC_INDEX_LIMIT;
  assert(_meta_canon_min == 1 && _meta_canon_max+1 == index_limit);
#endif
  if (idx >= _meta_canon_min && idx <= _meta_canon_max)
    return basic_codings[idx].init();
  else
    return null;
}

#ifndef PRODUCT
const char* coding::string() {
  CODING_PRIVATE(spec);
  bytes buf;
  buf.malloc(100);
  char maxS[20], minS[20];
  sprintf(maxS, "%d", max);
  sprintf(minS, "%d", min);
  if (max == INT_MAX_VALUE)  strcpy(maxS, "max");
  if (min == INT_MIN_VALUE)  strcpy(minS, "min");
  sprintf((char*)buf.ptr, "(%d,%d,%d,%d) L=%d r=[%s,%s]",
          B,H,S,D,L,minS,maxS);
  return (const char*) buf.ptr;
}
#endif