1 /*
2  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * This code is free software; you can redistribute it and/or modify it
6  * under the terms of the GNU General Public License version 2 only, as
7  * published by the Free Software Foundation.  Oracle designates this
8  * particular file as subject to the "Classpath" exception as provided
9  * by Oracle in the LICENSE file that accompanied this code.
10  *
11  * This code is distributed in the hope that it will be useful, but WITHOUT
12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14  * version 2 for more details (a copy is included in the LICENSE file that
15  * accompanied this code).
16  *
17  * You should have received a copy of the GNU General Public License version
18  * 2 along with this work; if not, write to the Free Software Foundation,
19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20  *
21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22  * or visit www.oracle.com if you need additional information or have any
23  * questions.
24  */
25 
26 package com.sun.java.util.jar.pack;
27 
28 import java.io.ByteArrayOutputStream;
29 import java.io.IOException;
30 import java.io.InputStream;
31 import java.io.OutputStream;
32 import java.util.Arrays;
33 import java.util.HashSet;
34 import java.util.Set;
35 import static com.sun.java.util.jar.pack.Constants.*;
36 
37 /**
38  * Population-based coding.
39  * See the section "Encodings of Uncorrelated Values" in the Pack200 spec.
40  * @author John Rose
41  */
42 // This tactic alone reduces the final zipped rt.jar by about a percent.
43 class PopulationCoding implements CodingMethod {
44     Histogram vHist;   // histogram of all values
45     int[]     fValues; // list of favored values
46     int       fVlen;   // inclusive max index
47     long[]    symtab;  // int map of favored value -> token [1..#fValues]
48 
49     CodingMethod favoredCoding;
50     CodingMethod tokenCoding;
51     CodingMethod unfavoredCoding;
52 
53     int L = -1;  //preferred L value for tokenCoding
54 
setFavoredValues(int[] fValues, int fVlen)55     public void setFavoredValues(int[] fValues, int fVlen) {
56         // Note:  {f} is allFavoredValues[1..fvlen], not [0..fvlen-1].
57         // This is because zero is an exceptional favored value index.
58         assert(fValues[0] == 0);  // must be empty
59         assert(this.fValues == null);  // do not do this twice
60         this.fValues = fValues;
61         this.fVlen   = fVlen;
62         if (L >= 0) {
63             setL(L);  // reassert
64         }
65     }
setFavoredValues(int[] fValues)66     public void setFavoredValues(int[] fValues) {
67         int lfVlen = fValues.length-1;
68         setFavoredValues(fValues, lfVlen);
69     }
setHistogram(Histogram vHist)70     public void setHistogram(Histogram vHist) {
71         this.vHist = vHist;
72     }
setL(int L)73     public void setL(int L) {
74         this.L = L;
75         if (L >= 0 && fValues != null && tokenCoding == null) {
76             tokenCoding = fitTokenCoding(fVlen, L);
77             assert(tokenCoding != null);
78         }
79     }
80 
fitTokenCoding(int fVlen, int L)81     public static Coding fitTokenCoding(int fVlen, int L) {
82         // Find the smallest B s.t. (B,H,0) covers fVlen.
83         if (fVlen < 256)
84             // H/L do not matter when B==1
85             return BandStructure.BYTE1;
86         Coding longest = BandStructure.UNSIGNED5.setL(L);
87         if (!longest.canRepresentUnsigned(fVlen))
88             return null;  // failure; L is too sharp and fVlen too large
89         Coding tc = longest;
90         for (Coding shorter = longest; ; ) {
91             shorter = shorter.setB(shorter.B()-1);
92             if (shorter.umax() < fVlen)
93                 break;
94             tc = shorter;  // shorten it by reducing B
95         }
96         return tc;
97     }
98 
setFavoredCoding(CodingMethod favoredCoding)99     public void setFavoredCoding(CodingMethod favoredCoding) {
100         this.favoredCoding = favoredCoding;
101     }
setTokenCoding(CodingMethod tokenCoding)102     public void setTokenCoding(CodingMethod tokenCoding) {
103         this.tokenCoding = tokenCoding;
104         this.L = -1;
105         if (tokenCoding instanceof Coding && fValues != null) {
106             Coding tc = (Coding) tokenCoding;
107             if (tc == fitTokenCoding(fVlen, tc.L()))
108                 this.L = tc.L();
109             // Otherwise, it's a non-default coding.
110         }
111     }
setUnfavoredCoding(CodingMethod unfavoredCoding)112     public void setUnfavoredCoding(CodingMethod unfavoredCoding) {
113         this.unfavoredCoding = unfavoredCoding;
114     }
115 
favoredValueMaxLength()116     public int favoredValueMaxLength() {
117         if (L == 0)
118             return Integer.MAX_VALUE;
119         else
120             return BandStructure.UNSIGNED5.setL(L).umax();
121     }
122 
resortFavoredValues()123     public void resortFavoredValues() {
124         Coding tc = (Coding) tokenCoding;
125         // Make a local copy before reordering.
126         fValues = BandStructure.realloc(fValues, 1+fVlen);
127         // Resort favoredValues within each byte-size cadre.
128         int fillp = 1;  // skip initial zero
129         for (int n = 1; n <= tc.B(); n++) {
130             int nmax = tc.byteMax(n);
131             if (nmax > fVlen)
132                 nmax = fVlen;
133             if (nmax < tc.byteMin(n))
134                 break;
135             int low = fillp;
136             int high = nmax+1;
137             if (high == low)  continue;
138             assert(high > low)
139                 : high+"!>"+low;
140             assert(tc.getLength(low) == n)
141                 : n+" != len("+(low)+") == "+
142                   tc.getLength(low);
143             assert(tc.getLength(high-1) == n)
144                 : n+" != len("+(high-1)+") == "+
145                   tc.getLength(high-1);
146             int midTarget = low + (high-low)/2;
147             int mid = low;
148             // Divide the values into cadres, and sort within each.
149             int prevCount = -1;
150             int prevLimit = low;
151             for (int i = low; i < high; i++) {
152                 int val = fValues[i];
153                 int count = vHist.getFrequency(val);
154                 if (prevCount != count) {
155                     if (n == 1) {
156                         // For the single-byte encoding, keep strict order
157                         // among frequency groups.
158                         Arrays.sort(fValues, prevLimit, i);
159                     } else if (Math.abs(mid - midTarget) >
160                                Math.abs(i   - midTarget)) {
161                         // Find a single inflection point
162                         // close to the middle of the byte-size cadre.
163                         mid = i;
164                     }
165                     prevCount = count;
166                     prevLimit = i;
167                 }
168             }
169             if (n == 1) {
170                 Arrays.sort(fValues, prevLimit, high);
171             } else {
172                 // Sort up to the midpoint, if any.
173                 Arrays.sort(fValues, low, mid);
174                 Arrays.sort(fValues, mid, high);
175             }
176             assert(tc.getLength(low) == tc.getLength(mid));
177             assert(tc.getLength(low) == tc.getLength(high-1));
178             fillp = nmax+1;
179         }
180         assert(fillp == fValues.length);
181 
182         // Reset symtab.
183         symtab = null;
184     }
185 
getToken(int value)186     public int getToken(int value) {
187         if (symtab == null)
188             symtab = makeSymtab();
189         int pos = Arrays.binarySearch(symtab, (long)value << 32);
190         if (pos < 0)  pos = -pos-1;
191         if (pos < symtab.length && value == (int)(symtab[pos] >>> 32))
192             return (int)symtab[pos];
193         else
194             return 0;
195     }
196 
encodeValues(int[] values, int start, int end)197     public int[][] encodeValues(int[] values, int start, int end) {
198         // Compute token sequence.
199         int[] tokens = new int[end-start];
200         int nuv = 0;
201         for (int i = 0; i < tokens.length; i++) {
202             int val = values[start+i];
203             int tok = getToken(val);
204             if (tok != 0)
205                 tokens[i] = tok;
206             else
207                 nuv += 1;
208         }
209         // Compute unfavored value sequence.
210         int[] unfavoredValues = new int[nuv];
211         nuv = 0;  // reset
212         for (int i = 0; i < tokens.length; i++) {
213             if (tokens[i] != 0)  continue;  // already covered
214             int val = values[start+i];
215             unfavoredValues[nuv++] = val;
216         }
217         assert(nuv == unfavoredValues.length);
218         return new int[][]{ tokens, unfavoredValues };
219     }
220 
makeSymtab()221     private long[] makeSymtab() {
222         long[] lsymtab = new long[fVlen];
223         for (int token = 1; token <= fVlen; token++) {
224             lsymtab[token-1] = ((long)fValues[token] << 32) | token;
225         }
226         // Index by value:
227         Arrays.sort(lsymtab);
228         return lsymtab;
229     }
230 
getTailCoding(CodingMethod c)231     private Coding getTailCoding(CodingMethod c) {
232         while (c instanceof AdaptiveCoding)
233             c = ((AdaptiveCoding)c).tailCoding;
234         return (Coding) c;
235     }
236 
237     // CodingMethod methods.
writeArrayTo(OutputStream out, int[] a, int start, int end)238     public void writeArrayTo(OutputStream out, int[] a, int start, int end) throws IOException {
239         int[][] vals = encodeValues(a, start, end);
240         writeSequencesTo(out, vals[0], vals[1]);
241     }
writeSequencesTo(OutputStream out, int[] tokens, int[] uValues)242     void writeSequencesTo(OutputStream out, int[] tokens, int[] uValues) throws IOException {
243         favoredCoding.writeArrayTo(out, fValues, 1, 1+fVlen);
244         getTailCoding(favoredCoding).writeTo(out, computeSentinelValue());
245         tokenCoding.writeArrayTo(out, tokens, 0, tokens.length);
246         if (uValues.length > 0)
247             unfavoredCoding.writeArrayTo(out, uValues, 0, uValues.length);
248     }
249 
computeSentinelValue()250    int computeSentinelValue() {
251         Coding fc = getTailCoding(favoredCoding);
252         if (fc.isDelta()) {
253             // repeat the last favored value, using delta=0
254             return 0;
255         } else {
256             // else repeat the shorter of the min or last value
257             int min = fValues[1];
258             int last = min;
259             // (remember that fVlen is an inclusive limit in fValues)
260             for (int i = 2; i <= fVlen; i++) {
261                 last = fValues[i];
262                 min = moreCentral(min, last);
263             }
264             int endVal;
265             if (fc.getLength(min) <= fc.getLength(last))
266                 return min;
267             else
268                 return last;
269         }
270    }
271 
readArrayFrom(InputStream in, int[] a, int start, int end)272     public void readArrayFrom(InputStream in, int[] a, int start, int end) throws IOException {
273         // Parameters are fCode, L, uCode.
274         setFavoredValues(readFavoredValuesFrom(in, end-start));
275         // Read the tokens.  Read them into the final array, for the moment.
276         tokenCoding.readArrayFrom(in, a, start, end);
277         // Decode the favored tokens.
278         int headp = 0, tailp = -1;
279         int uVlen = 0;
280         for (int i = start; i < end; i++) {
281             int tok = a[i];
282             if (tok == 0) {
283                 // Make a linked list, and decode in a second pass.
284                 if (tailp < 0) {
285                     headp = i;
286                 } else {
287                     a[tailp] = i;
288                 }
289                 tailp = i;
290                 uVlen += 1;
291             } else {
292                 a[i] = fValues[tok];
293             }
294         }
295         // Walk the linked list of "zero" locations, decoding unfavored vals.
296         int[] uValues = new int[uVlen];
297         if (uVlen > 0)
298             unfavoredCoding.readArrayFrom(in, uValues, 0, uVlen);
299         for (int i = 0; i < uVlen; i++) {
300             int nextp = a[headp];
301             a[headp] = uValues[i];
302             headp = nextp;
303         }
304     }
305 
readFavoredValuesFrom(InputStream in, int maxForDebug)306     int[] readFavoredValuesFrom(InputStream in, int maxForDebug) throws IOException {
307         int[] lfValues = new int[1000];  // realloc as needed
308         // The set uniqueValuesForDebug records all favored values.
309         // As each new value is added, we assert that the value
310         // was not already in the set.
311         Set<Integer> uniqueValuesForDebug = null;
312         assert((uniqueValuesForDebug = new HashSet<>()) != null);
313         int fillp = 1;
314         maxForDebug += fillp;
315         int min = Integer.MIN_VALUE;  // farthest from the center
316         //int min2 = Integer.MIN_VALUE;  // emulate buggy 150.7 spec.
317         int last = 0;
318         CodingMethod fcm = favoredCoding;
319         while (fcm instanceof AdaptiveCoding) {
320             AdaptiveCoding ac = (AdaptiveCoding) fcm;
321             int len = ac.headLength;
322             while (fillp + len > lfValues.length) {
323                 lfValues = BandStructure.realloc(lfValues);
324             }
325             int newFillp = fillp + len;
326             ac.headCoding.readArrayFrom(in, lfValues, fillp, newFillp);
327             while (fillp < newFillp) {
328                 int val = lfValues[fillp++];
329                 assert(uniqueValuesForDebug.add(val));
330                 assert(fillp <= maxForDebug);
331                 last = val;
332                 min = moreCentral(min, val);
333                 //min2 = moreCentral2(min2, val, min);
334             }
335             fcm = ac.tailCoding;
336         }
337         Coding fc = (Coding) fcm;
338         if (fc.isDelta()) {
339             for (long state = 0;;) {
340                 // Read a new value:
341                 state += fc.readFrom(in);
342                 int val;
343                 if (fc.isSubrange())
344                     val = fc.reduceToUnsignedRange(state);
345                 else
346                     val = (int)state;
347                 state = val;
348                 if (fillp > 1 && (val == last || val == min)) //|| val == min2
349                     break;
350                 if (fillp == lfValues.length)
351                     lfValues = BandStructure.realloc(lfValues);
352                 lfValues[fillp++] = val;
353                 assert(uniqueValuesForDebug.add(val));
354                 assert(fillp <= maxForDebug);
355                 last = val;
356                 min = moreCentral(min, val);
357                 //min2 = moreCentral(min2, val);
358             }
359         } else {
360             for (;;) {
361                 int val = fc.readFrom(in);
362                 if (fillp > 1 && (val == last || val == min)) //|| val == min2
363                     break;
364                 if (fillp == lfValues.length)
365                     lfValues = BandStructure.realloc(lfValues);
366                 lfValues[fillp++] = val;
367                 assert(uniqueValuesForDebug.add(val));
368                 assert(fillp <= maxForDebug);
369                 last = val;
370                 min = moreCentral(min, val);
371                 //min2 = moreCentral2(min2, val, min);
372             }
373         }
374         return BandStructure.realloc(lfValues, fillp);
375     }
376 
moreCentral(int x, int y)377     private static int moreCentral(int x, int y) {
378         int kx = (x >> 31) ^ (x << 1);
379         int ky = (y >> 31) ^ (y << 1);
380         // bias kx/ky to get an unsigned comparison:
381         kx -= Integer.MIN_VALUE;
382         ky -= Integer.MIN_VALUE;
383         int xy = (kx < ky? x: y);
384         // assert that this ALU-ish version is the same:
385         assert(xy == moreCentralSlow(x, y));
386         return xy;
387     }
388 //  private static int moreCentral2(int x, int y, int min) {
389 //      // Strict implementation of buggy 150.7 specification.
390 //      // The bug is that the spec. says absolute-value ties are broken
391 //      // in favor of positive numbers, but the suggested implementation
392 //      // (also mentioned in the spec.) breaks ties in favor of negatives.
393 //      if (x + y == 0)  return (x > y? x : y);
394 //      return min;
395 //  }
moreCentralSlow(int x, int y)396     private static int moreCentralSlow(int x, int y) {
397         int ax = x;
398         if (ax < 0)  ax = -ax;
399         if (ax < 0)  return y;  //x is MIN_VALUE
400         int ay = y;
401         if (ay < 0)  ay = -ay;
402         if (ay < 0)  return x;  //y is MIN_VALUE
403         if (ax < ay)  return x;
404         if (ax > ay)  return y;
405         // At this point the absolute values agree, and the negative wins.
406         return x < y ? x : y;
407     }
408 
409     static final int[] LValuesCoded
410         = { -1, 4, 8, 16, 32, 64, 128, 192, 224, 240, 248, 252 };
411 
getMetaCoding(Coding dflt)412     public byte[] getMetaCoding(Coding dflt) {
413         int K = fVlen;
414         int LCoded = 0;
415         if (tokenCoding instanceof Coding) {
416             Coding tc = (Coding) tokenCoding;
417             if (tc.B() == 1) {
418                 LCoded = 1;
419             } else if (L >= 0) {
420                 assert(L == tc.L());
421                 for (int i = 1; i < LValuesCoded.length; i++) {
422                     if (LValuesCoded[i] == L) { LCoded = i; break; }
423                 }
424             }
425         }
426         CodingMethod tokenDflt = null;
427         if (LCoded != 0 && tokenCoding == fitTokenCoding(fVlen, L)) {
428             // A simple L value is enough to recover the tokenCoding.
429             tokenDflt = tokenCoding;
430         }
431         int FDef = (favoredCoding == dflt)?1:0;
432         int UDef = (unfavoredCoding == dflt || unfavoredCoding == null)?1:0;
433         int TDef = (tokenCoding == tokenDflt)?1:0;
434         int TDefL = (TDef == 1) ? LCoded : 0;
435         assert(TDef == ((TDefL>0)?1:0));
436         ByteArrayOutputStream bytes = new ByteArrayOutputStream(10);
437         bytes.write(_meta_pop + FDef + 2*UDef + 4*TDefL);
438         try {
439             if (FDef == 0)  bytes.write(favoredCoding.getMetaCoding(dflt));
440             if (TDef == 0)  bytes.write(tokenCoding.getMetaCoding(dflt));
441             if (UDef == 0)  bytes.write(unfavoredCoding.getMetaCoding(dflt));
442         } catch (IOException ee) {
443             throw new RuntimeException(ee);
444         }
445         return bytes.toByteArray();
446     }
parseMetaCoding(byte[] bytes, int pos, Coding dflt, CodingMethod res[])447     public static int parseMetaCoding(byte[] bytes, int pos, Coding dflt, CodingMethod res[]) {
448         int op = bytes[pos++] & 0xFF;
449         if (op < _meta_pop || op >= _meta_limit)  return pos-1; // backup
450         op -= _meta_pop;
451         int FDef = op % 2;
452         int UDef = (op / 2) % 2;
453         int TDefL = (op / 4);
454         int TDef = (TDefL > 0)?1:0;
455         int L = LValuesCoded[TDefL];
456         CodingMethod[] FCode = {dflt}, TCode = {null}, UCode = {dflt};
457         if (FDef == 0)
458             pos = BandStructure.parseMetaCoding(bytes, pos, dflt, FCode);
459         if (TDef == 0)
460             pos = BandStructure.parseMetaCoding(bytes, pos, dflt, TCode);
461         if (UDef == 0)
462             pos = BandStructure.parseMetaCoding(bytes, pos, dflt, UCode);
463         PopulationCoding pop = new PopulationCoding();
464         pop.L = L;  // might be -1
465         pop.favoredCoding   = FCode[0];
466         pop.tokenCoding     = TCode[0];  // might be null!
467         pop.unfavoredCoding = UCode[0];
468         res[0] = pop;
469         return pos;
470     }
471 
keyString(CodingMethod m)472     private String keyString(CodingMethod m) {
473         if (m instanceof Coding)
474             return ((Coding)m).keyString();
475         if (m == null)
476             return "none";
477         return m.toString();
478     }
toString()479     public String toString() {
480         PropMap p200 = Utils.currentPropMap();
481         boolean verbose
482             = (p200 != null &&
483                p200.getBoolean(Utils.COM_PREFIX+"verbose.pop"));
484         StringBuilder res = new StringBuilder(100);
485         res.append("pop(").append("fVlen=").append(fVlen);
486         if (verbose && fValues != null) {
487             res.append(" fV=[");
488             for (int i = 1; i <= fVlen; i++) {
489                 res.append(i==1?"":",").append(fValues[i]);
490             }
491             res.append(";").append(computeSentinelValue());
492             res.append("]");
493         }
494         res.append(" fc=").append(keyString(favoredCoding));
495         res.append(" tc=").append(keyString(tokenCoding));
496         res.append(" uc=").append(keyString(unfavoredCoding));
497         res.append(")");
498         return res.toString();
499     }
500 }
501