1 /*
2  * Copyright (c) 2009, 2012, Oracle and/or its affiliates. All rights reserved.
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * This code is free software; you can redistribute it and/or modify it
6  * under the terms of the GNU General Public License version 2 only, as
7  * published by the Free Software Foundation.
8  *
9  * This code is distributed in the hope that it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12  * version 2 for more details (a copy is included in the LICENSE file that
13  * accompanied this code).
14  *
15  * You should have received a copy of the GNU General Public License version
16  * 2 along with this work; if not, write to the Free Software Foundation,
17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18  *
19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20  * or visit www.oracle.com if you need additional information or have any
21  * questions.
22  */
23 
24 /*
25  * @test
26  * @bug 6843578
27  * @summary Test old and new implementation of db charsets
28  * @build IBM930_OLD IBM933_OLD IBM935_OLD IBM937_OLD IBM939_OLD IBM942_OLD IBM943_OLD IBM948_OLD IBM949_OLD IBM950_OLD IBM970_OLD IBM942C_OLD IBM943C_OLD IBM949C_OLD IBM1381_OLD IBM1383_OLD EUC_CN_OLD EUC_KR_OLD GBK_OLD Johab_OLD MS932_OLD MS936_OLD MS949_OLD MS950_OLD SJIS_OLD PCK_OLD EUC_JP_OLD EUC_JP_LINUX_OLD EUC_JP_Open_OLD
29  * @modules java.base/sun.nio.cs jdk.charsets/sun.nio.cs.ext
30  * @run main TestIBMDB
31  */
32 
33 import java.nio.charset.*;
34 import java.nio.*;
35 import java.util.*;
36 
37 public class TestIBMDB {
38     static class Time {
39         long t;
40     }
41     static int iteration = 200;
42 
decode(byte[] bb, Charset cs, boolean testDirect, Time t)43     static char[] decode(byte[] bb, Charset cs, boolean testDirect, Time t)
44         throws Exception {
45         String csn = cs.name();
46         CharsetDecoder dec = cs.newDecoder();
47         ByteBuffer bbf;
48         CharBuffer cbf;
49         if (testDirect) {
50             bbf = ByteBuffer.allocateDirect(bb.length);
51             cbf = ByteBuffer.allocateDirect(bb.length*2).asCharBuffer();
52             bbf.put(bb);
53         } else {
54             bbf = ByteBuffer.wrap(bb);
55             cbf = CharBuffer.allocate(bb.length);
56         }
57         CoderResult cr = null;
58         long t1 = System.nanoTime()/1000;
59         for (int i = 0; i < iteration; i++) {
60             bbf.rewind();
61             cbf.clear();
62             dec.reset();
63             cr = dec.decode(bbf, cbf, true);
64         }
65         long t2 = System.nanoTime()/1000;
66         t.t = (t2 - t1)/iteration;
67         if (cr != CoderResult.UNDERFLOW) {
68             System.out.println("DEC-----------------");
69             int pos = bbf.position();
70             System.out.printf("  cr=%s, bbf.pos=%d, bb[pos]=%x,%x,%x,%x%n",
71                               cr.toString(), pos,
72                               bb[pos++]&0xff, bb[pos++]&0xff,bb[pos++]&0xff, bb[pos++]&0xff);
73             throw new RuntimeException("Decoding err: " + csn);
74         }
75         char[] cc = new char[cbf.position()];
76         cbf.flip(); cbf.get(cc);
77         return cc;
78 
79     }
80 
decodeCR(byte[] bb, Charset cs, boolean testDirect)81     static CoderResult decodeCR(byte[] bb, Charset cs, boolean testDirect)
82         throws Exception {
83         CharsetDecoder dec = cs.newDecoder();
84         ByteBuffer bbf;
85         CharBuffer cbf;
86         if (testDirect) {
87             bbf = ByteBuffer.allocateDirect(bb.length);
88             cbf = ByteBuffer.allocateDirect(bb.length*2).asCharBuffer();
89             bbf.put(bb).flip();
90         } else {
91             bbf = ByteBuffer.wrap(bb);
92             cbf = CharBuffer.allocate(bb.length);
93         }
94         CoderResult cr = null;
95         for (int i = 0; i < iteration; i++) {
96             bbf.rewind();
97             cbf.clear();
98             dec.reset();
99             cr = dec.decode(bbf, cbf, true);
100         }
101         return cr;
102     }
103 
encode(char[] cc, Charset cs, boolean testDirect, Time t)104     static byte[] encode(char[] cc, Charset cs, boolean testDirect, Time t)
105         throws Exception {
106         ByteBuffer bbf;
107         CharBuffer cbf;
108         CharsetEncoder enc = cs.newEncoder();
109         String csn = cs.name();
110         if (testDirect) {
111             bbf = ByteBuffer.allocateDirect(cc.length * 4);
112             cbf = ByteBuffer.allocateDirect(cc.length * 2).asCharBuffer();
113             cbf.put(cc).flip();
114         } else {
115             bbf = ByteBuffer.allocate(cc.length * 4);
116             cbf = CharBuffer.wrap(cc);
117         }
118         CoderResult cr = null;
119         long t1 = System.nanoTime()/1000;
120         for (int i = 0; i < iteration; i++) {
121             cbf.rewind();
122             bbf.clear();
123             enc.reset();
124             cr = enc.encode(cbf, bbf, true);
125         }
126         long t2 = System.nanoTime()/1000;
127         t.t = (t2 - t1)/iteration;
128         if (cr != CoderResult.UNDERFLOW) {
129             System.out.println("ENC-----------------");
130             int pos = cbf.position();
131             System.out.printf("  cr=%s, cbf.pos=%d, cc[pos]=%x%n",
132                               cr.toString(), pos, cc[pos]&0xffff);
133             throw new RuntimeException("Encoding err: " + csn);
134         }
135         byte[] bb = new byte[bbf.position()];
136         bbf.flip(); bbf.get(bb);
137         return bb;
138     }
139 
encodeCR(char[] cc, Charset cs, boolean testDirect)140     static CoderResult encodeCR(char[] cc, Charset cs, boolean testDirect)
141         throws Exception {
142         ByteBuffer bbf;
143         CharBuffer cbf;
144         CharsetEncoder enc = cs.newEncoder();
145         if (testDirect) {
146             bbf = ByteBuffer.allocateDirect(cc.length * 4);
147             cbf = ByteBuffer.allocateDirect(cc.length * 2).asCharBuffer();
148             cbf.put(cc).flip();
149         } else {
150             bbf = ByteBuffer.allocate(cc.length * 4);
151             cbf = CharBuffer.wrap(cc);
152         }
153         CoderResult cr = null;
154         for (int i = 0; i < iteration; i++) {
155             cbf.rewind();
156             bbf.clear();
157             enc.reset();
158             cr = enc.encode(cbf, bbf, true);
159         }
160         return cr;
161     }
162 
printEntry(char c, Charset cs)163     static void printEntry(char c, Charset cs) {
164         byte[] bb = new String(new char[] {c}).getBytes(cs);
165         for (byte b:bb)
166             System.out.printf("%x", b&0xff);
167         System.out.printf("    %x", c & 0xffff);
168         String s2 = new String(bb, cs);
169         System.out.printf("    %x%n", s2.charAt(0) & 0xffff);
170     }
171 
172     // check and compare canEncoding/Encoding
checkEncoding(Charset oldCS, Charset newCS)173     static char[] checkEncoding(Charset oldCS, Charset newCS)
174         throws Exception {
175         System.out.printf("Encoding <%s> <%s>...%n", oldCS.name(), newCS.name());
176         CharsetEncoder encOLD = oldCS.newEncoder();
177         CharsetEncoder encNew = newCS.newEncoder();
178         char[] cc = new char[0x10000];
179         int pos = 0;
180         boolean is970 = "x-IBM970-Old".equals(oldCS.name());
181 
182         for (char c = 0; c < 0xffff; c++) {
183             boolean canOld = encOLD.canEncode(c);
184             boolean canNew = encNew.canEncode(c);
185 
186             if (is970 && c == 0x2299)
187                 continue;
188 
189             if (canOld != canNew) {
190                 if (canNew) {
191                     System.out.printf("      NEW(only): ");
192                     printEntry(c, newCS);
193                 } else {
194                     if (is970) {
195                         byte[] bb = new String(new char[] {c}).getBytes(oldCS);
196                         if (bb.length == 2 && bb[0] == (byte)0xa2 && bb[1] == (byte)0xc1) {
197                         // we know 970 has bogus nnnn -> a2c1 -> 2299
198                             continue;
199                         }
200                     }
201                     System.out.printf("      OLD(only): ");
202                     printEntry(c, oldCS);
203                 }
204             } else if (canNew) {
205                 byte[] bbNew = new String(new char[] {c}).getBytes(newCS);
206                 byte[] bbOld = new String(new char[] {c}).getBytes(oldCS);
207                 if (!Arrays.equals(bbNew, bbOld)) {
208                     System.out.printf("      c->b NEW: ");
209                     printEntry(c, newCS);
210                     System.out.printf("      c->b OLD: ");
211                     printEntry(c, oldCS);
212                 } else {
213                     String sNew = new String(bbNew, newCS);
214                     String sOld = new String(bbOld, oldCS);
215                     if (!sNew.equals(sOld)) {
216                         System.out.printf("      b2c NEW (c=%x):", c&0xffff);
217                         printEntry(sNew.charAt(0), newCS);
218                         System.out.printf("      b2c OLD:");
219                         printEntry(sOld.charAt(0), oldCS);
220                     }
221                 }
222             }
223             if (canNew & canOld) {  // added only both for now
224                 cc[pos++] = c;
225             }
226         }
227         return Arrays.copyOf(cc, pos);
228     }
229 
230 
231     // check and compare canEncoding/Encoding
checkDecoding(Charset oldCS, Charset newCS)232     static void checkDecoding(Charset oldCS, Charset newCS)
233         throws Exception
234     {
235         System.out.printf("Decoding <%s> <%s>...%n", oldCS.name(), newCS.name());
236         boolean isEBCDIC = oldCS.name().startsWith("x-IBM93");
237 
238         //Try singlebyte first
239         byte[] bb = new byte[1];
240         System.out.printf("       trying SB...%n");
241         for (int b = 0; b < 0x100; b++) {
242             bb[0] = (byte)b;
243             String sOld = new String(bb, oldCS);
244             String sNew = new String(bb, newCS);
245             if (!sOld.equals(sNew)) {
246                 System.out.printf("        b=%x:  %x/%d(old)  %x/%d(new)%n",
247                                   b& 0xff,
248                                   sOld.charAt(0) & 0xffff, sOld.length(),
249                                   sNew.charAt(0) & 0xffff, sNew.length());
250             }
251         }
252 
253         System.out.printf("       trying DB...%n");
254         bb = new byte[isEBCDIC?4:2];
255         int b1Min = 0x40;
256         int b1Max = 0xfe;
257         for (int b1 = 0x40; b1 < 0xff; b1++) {
258             if (!isEBCDIC) {
259                 // decodable singlebyte b1
260                 bb[0] = (byte)b1;
261                 String sOld = new String(bb, oldCS);
262                 String sNew = new String(bb, newCS);
263                 if (!sOld.equals(sNew)) {
264                     if (sOld.length() != 2 && sOld.charAt(0) != 0) {
265                         // only prints we are NOT expected. above two are known issue
266                         System.out.printf("        b1=%x:  %x/%d(old)  %x/%d(new)%n",
267                                           b1 & 0xff,
268                                           sOld.charAt(0) & 0xffff, sOld.length(),
269                                           sNew.charAt(0) & 0xffff, sNew.length());
270                         continue;
271                     }
272                 }
273             }
274             for (int b2 = 0x40; b2 < 0xff; b2++) {
275                 if (isEBCDIC) {
276                     bb[0] = 0x0e;
277                     bb[1] = (byte)b1;
278                     bb[2] = (byte)b2;
279                     bb[3] = 0x0f;
280                 } else {
281                     bb[0] = (byte)b1;
282                     bb[1] = (byte)b2;
283                 }
284                 String sOld = new String(bb, oldCS);
285                 String sNew = new String(bb, newCS);
286                 //if (!sOld.equals(sNew)) {
287                 if (sOld.charAt(0) != sNew.charAt(0)) {
288 
289 if (sOld.charAt(0) == 0 && sNew.charAt(0) == 0xfffd)
290     continue; // known issude in old implementation
291 
292                     System.out.printf("        bb=<%x,%x>  c(old)=%x,  c(new)=%x%n",
293                         b1, b2, sOld.charAt(0) & 0xffff, sNew.charAt(0) & 0xffff);
294                 }
295             }
296         }
297     }
298 
checkInit(String csn)299     static void checkInit(String csn) throws Exception {
300         System.out.printf("Check init <%s>...%n", csn);
301         Charset.forName("Big5");    // load in the ExtendedCharsets
302         long t1 = System.nanoTime()/1000;
303         Charset cs = Charset.forName(csn);
304         long t2 = System.nanoTime()/1000;
305         System.out.printf("    charset     :%d%n", t2 - t1);
306         t1 = System.nanoTime()/1000;
307             cs.newDecoder();
308         t2 = System.nanoTime()/1000;
309         System.out.printf("    new Decoder :%d%n", t2 - t1);
310 
311         t1 = System.nanoTime()/1000;
312             cs.newEncoder();
313         t2 = System.nanoTime()/1000;
314         System.out.printf("    new Encoder :%d%n", t2 - t1);
315     }
316 
compare(Charset cs1, Charset cs2, char[] cc)317     static void compare(Charset cs1, Charset cs2, char[] cc) throws Exception {
318         System.gc();    // enqueue finalizable objects
319         Thread.sleep(1000);
320         System.gc();    // enqueue finalizable objects
321 
322         String csn1 = cs1.name();
323         String csn2 = cs2.name();
324         System.out.printf("Diff     <%s> <%s>...%n", csn1, csn2);
325 
326         Time t1 = new Time();
327         Time t2 = new Time();
328 
329         byte[] bb1 = encode(cc, cs1, false, t1);
330         byte[] bb2 = encode(cc, cs2, false, t2);
331 
332         System.out.printf("    Encoding TimeRatio %s/%s: %d,%d :%f%n",
333                           csn2, csn1,
334                           t2.t, t1.t,
335                           (double)(t2.t)/(t1.t));
336         if (!Arrays.equals(bb1, bb2)) {
337             System.out.printf("        encoding failed%n");
338         }
339 
340         char[] cc2 = decode(bb1, cs2, false, t2);
341         char[] cc1 = decode(bb1, cs1, false, t1);
342         System.out.printf("    Decoding TimeRatio %s/%s: %d,%d :%f%n",
343                           csn2, csn1,
344                           t2.t, t1.t,
345                           (double)(t2.t)/(t1.t));
346         if (!Arrays.equals(cc1, cc2)) {
347             System.out.printf("        decoding failed%n");
348         }
349 
350         bb1 = encode(cc, cs1, true, t1);
351         bb2 = encode(cc, cs2, true, t2);
352 
353         System.out.printf("    Encoding(dir) TimeRatio %s/%s: %d,%d :%f%n",
354                           csn2, csn1,
355                           t2.t, t1.t,
356                           (double)(t2.t)/(t1.t));
357 
358         if (!Arrays.equals(bb1, bb2))
359             System.out.printf("        encoding (direct) failed%n");
360 
361         cc1 = decode(bb1, cs1, true, t1);
362         cc2 = decode(bb1, cs2, true, t2);
363         System.out.printf("    Decoding(dir) TimeRatio %s/%s: %d,%d :%f%n",
364                           csn2, csn1,
365                           t2.t, t1.t,
366                           (double)(t2.t)/(t1.t));
367         if (!Arrays.equals(cc1, cc2)) {
368             System.out.printf("        decoding (direct) failed%n");
369         }
370     }
371 
372     /* The first byte is the length of malformed bytes
373         byte[][] malformed = {
374             {5, (byte)0xF8, (byte)0x80, (byte)0x80, (byte)0x9F, (byte)0x80, (byte)0xC0 },
375         };
376     */
377 
checkMalformed(Charset cs, byte[][] malformed)378     static void checkMalformed(Charset cs, byte[][] malformed)
379         throws Exception
380     {
381         boolean failed = false;
382         String csn = cs.name();
383         System.out.printf("Check malformed <%s>...%n", csn);
384         for (boolean direct: new boolean[] {false, true}) {
385             for (byte[] bins : malformed) {
386                 int mlen = bins[0];
387                 byte[] bin = Arrays.copyOfRange(bins, 1, bins.length);
388                 CoderResult cr = decodeCR(bin, cs, direct);
389                 String ashex = "";
390                 for (int i = 0; i < bin.length; i++) {
391                     if (i > 0) ashex += " ";
392                         ashex += Integer.toString((int)bin[i] & 0xff, 16);
393                 }
394                 if (!cr.isMalformed()) {
395                     System.out.printf("        FAIL(direct=%b): [%s] not malformed. -->cr=%s\n", direct, ashex, cr.toString());
396                     failed = true;
397                 } else if (cr.length() != mlen) {
398                     System.out.printf("        FAIL(direct=%b): [%s] malformed[len=%d].\n", direct, ashex, cr.length());
399                     failed = true;
400                 }
401             }
402         }
403         if (failed)
404             throw new RuntimeException("Check malformed failed " + csn);
405     }
406 
check(CharsetDecoder dec, byte[] bytes, boolean direct, int[] flow)407     static boolean check(CharsetDecoder dec, byte[] bytes, boolean direct, int[] flow) {
408         int inPos = flow[0];
409         int inLen = flow[1];
410         int outPos = flow[2];
411         int outLen = flow[3];
412         int expedInPos = flow[4];
413         int expedOutPos = flow[5];
414         CoderResult expedCR = (flow[6]==0)?CoderResult.UNDERFLOW
415                                           :CoderResult.OVERFLOW;
416         ByteBuffer bbf;
417         CharBuffer cbf;
418         if (direct) {
419             bbf = ByteBuffer.allocateDirect(inPos + bytes.length);
420             cbf = ByteBuffer.allocateDirect((outPos + outLen)*2).asCharBuffer();
421         } else {
422             bbf = ByteBuffer.allocate(inPos + bytes.length);
423             cbf = CharBuffer.allocate(outPos + outLen);
424         }
425         bbf.position(inPos);
426         bbf.put(bytes).flip().position(inPos).limit(inPos + inLen);
427         cbf.position(outPos);
428         dec.reset();
429         CoderResult cr = dec.decode(bbf, cbf, false);
430         if (cr != expedCR ||
431             bbf.position() != expedInPos ||
432             cbf.position() != expedOutPos) {
433             System.out.printf("Expected(direct=%5b): [", direct);
434             for (int i:flow) System.out.print(" " + i);
435             System.out.println("]  CR=" + cr +
436                                ", inPos=" + bbf.position() +
437                                ", outPos=" + cbf.position());
438             return false;
439         }
440         return true;
441     }
442 
checkUnderOverflow(Charset cs)443     static void checkUnderOverflow(Charset cs) throws Exception {
444         String csn = cs.name();
445         System.out.printf("Check under/overflow <%s>...%n", csn);
446         CharsetDecoder dec = cs.newDecoder();
447         boolean failed = false;
448 
449         //7f, a1a1, 8ea2a1a1, 8ea3a1a1, 8ea7a1a1
450         //0   1 2   3         7         11
451         byte[] bytes = new String("\u007f\u3000\u4e42\u4e28\ud840\udc55").getBytes("EUC_TW");
452         int    inlen = bytes.length;
453 
454         int MAXOFF = 20;
455         for (int inoff = 0; inoff < MAXOFF; inoff++) {
456             for (int outoff = 0; outoff < MAXOFF; outoff++) {
457         int[][] Flows = {
458             //inpos, inLen, outPos,  outLen, inPosEP,    outposEP,   under(0)/over(1)
459             //overflow
460             {inoff,  inlen, outoff,  1,      inoff + 1,  outoff + 1, 1},
461             {inoff,  inlen, outoff,  2,      inoff + 3,  outoff + 2, 1},
462             {inoff,  inlen, outoff,  3,      inoff + 7,  outoff + 3, 1},
463             {inoff,  inlen, outoff,  4,      inoff + 11, outoff + 4, 1},
464             {inoff,  inlen, outoff,  5,      inoff + 11, outoff + 4, 1},
465             {inoff,  inlen, outoff,  6,      inoff + 15, outoff + 6, 0},
466             //underflow
467             {inoff,  1,     outoff,  6,      inoff + 1,  outoff + 1, 0},
468             {inoff,  2,     outoff,  6,      inoff + 1,  outoff + 1, 0},
469             {inoff,  3,     outoff,  6,      inoff + 3,  outoff + 2, 0},
470             {inoff,  4,     outoff,  6,      inoff + 3,  outoff + 2, 0},
471             {inoff,  5,     outoff,  6,      inoff + 3,  outoff + 2, 0},
472             {inoff,  8,     outoff,  6,      inoff + 7,  outoff + 3, 0},
473             {inoff,  9,     outoff,  6,      inoff + 7,  outoff + 3, 0},
474             {inoff, 10,     outoff,  6,      inoff + 7,  outoff + 3, 0},
475             {inoff, 11,     outoff,  6,      inoff +11,  outoff + 4, 0},
476             {inoff, 12,     outoff,  6,      inoff +11,  outoff + 4, 0},
477             {inoff, 15,     outoff,  6,      inoff +15,  outoff + 6, 0},
478             // 2-byte under/overflow
479             {inoff,  2,     outoff,  1,      inoff + 1,  outoff + 1, 0},
480             {inoff,  3,     outoff,  1,      inoff + 1,  outoff + 1, 1},
481             {inoff,  3,     outoff,  2,      inoff + 3,  outoff + 2, 0},
482         };
483         for (boolean direct: new boolean[] {false, true}) {
484             for (int[] flow: Flows) {
485                 if (!check(dec, bytes, direct, flow))
486                     failed = true;
487             }
488         }}}
489         if (failed)
490             throw new RuntimeException("Check under/overflow failed " + csn);
491     }
492 
493     static String[] csnames = new String[] {
494 
495         "IBM930",
496         "IBM933",
497         "IBM935",
498         "IBM937",
499         "IBM939",
500         "IBM942",
501         "IBM943",
502         "IBM948",
503         "IBM949",
504         "IBM950",
505         "IBM970",
506         "IBM942C",
507         "IBM943C",
508         "IBM949C",
509         "IBM1381",
510         "IBM1383",
511 
512         "EUC_CN",
513         "EUC_KR",
514         "GBK",
515         "Johab",
516         "MS932",
517         "MS936",
518         "MS949",
519         "MS950",
520 
521         "EUC_JP",
522         "EUC_JP_LINUX",
523         "EUC_JP_Open",
524         "SJIS",
525         "PCK",
526     };
527 
main(String[] args)528     public static void main(String[] args) throws Exception {
529         for (String csname: csnames) {
530             System.out.printf("-----------------------------------%n");
531             String oldname = csname + "_OLD";
532             if ("EUC_JP_Open".equals(csname))
533                 csname = "eucjp-open";
534             checkInit(csname);
535             Charset csOld = (Charset)Class.forName(oldname).newInstance();
536             Charset csNew = Charset.forName(csname);
537             char[] cc = checkEncoding(csOld, csNew);
538             checkDecoding(csOld, csNew);
539             compare(csNew, csOld, cc);
540 
541             if (csname.startsWith("x-IBM93")) {
542                 //ecdbic
543                 checkMalformed(csNew, new byte[][] {
544                     {1, 0x26, 0x0f, 0x27},         // in SBSC, no SI
545                     {1, 0x0e, 0x41, 0x41, 0xe},    // in DBSC, no SO
546                     {2, 0x0e, 0x40, 0x41, 0xe},    // illegal DB
547                 });
548             } else if (csname.equals("x-IBM970") ||
549                        csname.equals("x-IBM1383")) {
550                 //euc_simple
551                 checkMalformed(csNew, new byte[][] {
552                     {1, 0x26, (byte)0x8f, 0x27},                   // SS2
553                     {1, (byte)0xa1, (byte)0xa1, (byte)0x8e, 0x51}, // SS3
554                 });
555             }
556         }
557     }
558 }
559