1 /*
2  * Copyright (c) 2017, 2020, Oracle and/or its affiliates. All rights reserved.
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
image_downsize_gd(image * im)5  * This code is free software; you can redistribute it and/or modify it
6  * under the terms of the GNU General Public License version 2 only, as
7  * published by the Free Software Foundation.
8  *
9  * This code is distributed in the hope that it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12  * version 2 for more details (a copy is included in the LICENSE file that
13  * accompanied this code).
14  *
15  * You should have received a copy of the GNU General Public License version
16  * 2 along with this work; if not, write to the Free Software Foundation,
17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18  *
19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20  * or visit www.oracle.com if you need additional information or have any
21  * questions.
22  */
23 
24 /* @test
25  * @bug 8186801 8186751
26  * @summary Test the charset mappings
27  * @modules jdk.charsets
28  */
29 
30 import java.io.*;
31 import java.nio.*;
32 import java.nio.file.*;
33 import java.nio.charset.*;
34 import java.util.*;
35 import java.util.function.*;
36 import java.util.regex.*;
37 import java.util.stream.*;
38 
39 public class TestCharsetMapping {
40 
41     private static final int BUFSIZ = 8192;     // Initial buffer size
42     private static final int MAXERRS = 10;      // Errors reported per test
43 
44     private static final PrintStream log = System.out;
45 
46     // Set by -v on the command line
47     private static boolean verbose = false;
48 
49     // Test modes
50     private static final int ENCODE = 1;
51     private static final int DECODE = 2;
52 
53     // Utilities
54     private static ByteBuffer expand(ByteBuffer bb) {
55         ByteBuffer nbb = ByteBuffer.allocate(bb.capacity() * 2);
56         bb.flip();
57         nbb.put(bb);
58         return nbb;
59     }
60 
61     private static CharBuffer expand(CharBuffer cb) {
62         CharBuffer ncb = CharBuffer.allocate(cb.capacity() * 2);
63         cb.flip();
64         ncb.put(cb);
65         return ncb;
66     }
67 
68     private static byte[] parseBytes(String s) {
69         int nb = s.length() / 2;
70         byte[] bs = new byte[nb];
71         for (int i = 0; i < nb; i++) {
72             int j = i * 2;
73             if (j + 2 > s.length())
74                 throw new RuntimeException("Malformed byte string: " + s);
75             bs[i] = (byte)Integer.parseInt(s.substring(j, j + 2), 16);
76         }
77         return bs;
78     }
79 
80     private static String printBytes(byte[] bs) {
81         StringBuffer sb = new StringBuffer();
82         for (int i = 0; i < bs.length; i++) {
83             sb.append(Integer.toHexString((bs[i] >> 4) & 0xf));
84             sb.append(Integer.toHexString((bs[i] >> 0) & 0xf));
85         }
86         return sb.toString();
87     }
88 
89     private static String printCodePoint(int cp) {
90         StringBuffer sb = new StringBuffer();
91         sb.append("U+");
92         if (cp > 0xffff)
93             sb.append(Integer.toHexString((cp >> 16) & 0xf));
94         sb.append(Integer.toHexString((cp >> 12) & 0xf));
95         sb.append(Integer.toHexString((cp >> 8) & 0xf));
96         sb.append(Integer.toHexString((cp >> 4) & 0xf));
97         sb.append(Integer.toHexString((cp >> 0) & 0xf));
98         return sb.toString();
99     }
100 
101     private static int getCodePoint(CharBuffer cb) {
102         char c = cb.get();
103         if (Character.isHighSurrogate(c))
104             return Character.toCodePoint(c, cb.get());
105         else
106             return c;
107     }
108 
109     private static String plural(int n) {
110         return (n == 1 ? "" : "s");
111     }
112 
113     // TestCharsetMapping
114     private CharsetInfo csinfo;
115     private CharsetDecoder decoder = null;
116     private CharsetEncoder encoder = null;
117 
118     // Stateful dbcs encoding has leading shift byte '0x0e'
119     // and trailing shift byte '0x0f'.
120     // The flag variable shiftHackDBCS is 'true' for stateful
121     // EBCDIC encodings, which indicates the need of adding/
122     // removing the shift bytes.
123     private boolean shiftHackDBCS = false;
124 
125     private TestCharsetMapping(CharsetInfo csinfo) throws Exception {
126         this.csinfo = csinfo;
127         this.encoder = csinfo.cs.newEncoder()
128             .onUnmappableCharacter(CodingErrorAction.REPLACE)
129             .onMalformedInput(CodingErrorAction.REPLACE);
130         this.decoder = csinfo.cs.newDecoder()
131             .onUnmappableCharacter(CodingErrorAction.REPLACE)
132             .onMalformedInput(CodingErrorAction.REPLACE);
133     }
134 
135     private class Test {
136         // An instance of this class tests all mappings for
137         // a particular bytesPerChar value
138         private int bytesPerChar;
139 
140         // Reference data from .map/nr/c2b files
141         private ByteBuffer refBytes = ByteBuffer.allocate(BUFSIZ);
142         private CharBuffer refChars = CharBuffer.allocate(BUFSIZ);
143 
144         private ByteBuffer dRefBytes = ByteBuffer.allocateDirect(BUFSIZ);
145         private CharBuffer dRefChars = ByteBuffer.allocateDirect(BUFSIZ*2).asCharBuffer();
146 
147         private Test(int bpc) {
148             bytesPerChar = bpc;
149         }
150 
151         // shiftHackDBCS can add the leading/trailing shift bytesa
152         private void put(byte[] bs) {
153             if (refBytes.remaining() < bytesPerChar)
154                 refBytes = expand(refBytes);
155             refBytes.put(bs);
156         }
157 
158         private void put(byte[] bs, char[] cc) {
159             if (bs.length != bytesPerChar)
160                 throw new IllegalArgumentException(bs.length
161                                                    + " != "
162                                                    + bytesPerChar);
163             if (refBytes.remaining() < bytesPerChar)
164                 refBytes = expand(refBytes);
165             refBytes.put(bs);
166             if (refChars.remaining() < cc.length)
image_downsize_gd_fixed_point(image * im)167                 refChars = expand(refChars);
168             refChars.put(cc);
169         }
170 
171         private boolean decode(ByteBuffer refBytes, CharBuffer refChars)
172             throws Exception {
173             log.println("    decode" + (refBytes.isDirect()?" (direct)":""));
174             CharBuffer out = decoder.decode(refBytes);
175 
176             refBytes.rewind();
177             byte[] bs = new byte[bytesPerChar];
178             int e = 0;
179 
180             if (shiftHackDBCS && bytesPerChar == 2 && refBytes.get() != (byte)0x0e) {
181                 log.println("Missing leading byte");
182             }
183 
184             while (refChars.hasRemaining()) {
185                 refBytes.get(bs);
186                 int rcp = getCodePoint(refChars);
187                 int ocp = getCodePoint(out);
188                 if (rcp != ocp) {
189                     log.println("      Error: "
190                                 + printBytes(bs)
191                                 + " --> "
192                                 + printCodePoint(ocp)
193                                 + ", expected "
194                                 + printCodePoint(rcp));
195                     if (++e >= MAXERRS) {
196                         log.println("      Too many errors, giving up");
197                         break;
198                     }
199                 }
200                 if (verbose) {
201                     log.println("      "
202                                 + printBytes(bs)
203                                 + " --> "
204                                 + printCodePoint(rcp));
205                 }
206             }
207 
208             if (shiftHackDBCS && bytesPerChar == 2 && refBytes.get() != (byte)0x0f) {
209                 log.println("Missing trailing byte");
210             }
211 
212             if (e == 0 && (refChars.hasRemaining() || out.hasRemaining())) {
213                 // Paranoia: Didn't consume everything
214                 throw new IllegalStateException();
215             }
216             refBytes.rewind();
217             refChars.rewind();
218             return (e == 0);
219         }
220 
221         private boolean encode(ByteBuffer refBytes, CharBuffer refChars)
222             throws Exception {
223             log.println("    encode" + (refBytes.isDirect()?" (direct)":""));
224             ByteBuffer out = encoder.encode(refChars);
225             refChars.rewind();
226 
227             if (shiftHackDBCS && bytesPerChar == 2 && out.get() != refBytes.get()) {
228                 log.println("Missing leading byte");
229                 return false;
230             }
231 
232             byte[] rbs = new byte[bytesPerChar];
233             byte[] obs = new byte[bytesPerChar];
234             int e = 0;
235             while (refChars.hasRemaining()) {
236                 int cp = getCodePoint(refChars);
237                 refBytes.get(rbs);
238                 out.get(obs);
239                 boolean eq = true;
240                 for (int i = 0; i < bytesPerChar; i++)
241                     eq &= rbs[i] == obs[i];
242                 if (!eq) {
243                     log.println("      Error: "
244                                 + printCodePoint(cp)
245                                 + " --> "
246                                 + printBytes(obs)
247                                 + ", expected "
248                                 + printBytes(rbs));
249                     if (++e >= MAXERRS) {
250                         log.println("      Too many errors, giving up");
251                         break;
252                     }
253                 }
254                 if (verbose) {
255                     log.println("      "
256                                 + printCodePoint(cp)
257                                 + " --> "
258                                 + printBytes(rbs));
259                 }
260             }
261 
262             if (shiftHackDBCS && bytesPerChar == 2 && out.get() != refBytes.get()) {
263                 log.println("Missing trailing byte");
264                 return false;
265             }
266 
267             if (e == 0 && (refBytes.hasRemaining() || out.hasRemaining())) {
268                 // Paranoia: Didn't consume everything
269                 throw new IllegalStateException();
270             }
271 
272             refBytes.rewind();
273             refChars.rewind();
274             return (e == 0);
275         }
276 
277         private boolean run(int mode) throws Exception {
278             log.println("  " + bytesPerChar
279                         + " byte" + plural(bytesPerChar) + "/char");
280 
281             if (dRefBytes.capacity() < refBytes.capacity()) {
282                 dRefBytes = ByteBuffer.allocateDirect(refBytes.capacity());
283             }
284             if (dRefChars.capacity() < refChars.capacity()) {
285                 dRefChars = ByteBuffer.allocateDirect(refChars.capacity()*2)
286                                       .asCharBuffer();
287             }
288             refBytes.flip();
289             refChars.flip();
290             dRefBytes.clear();
291             dRefChars.clear();
292 
293             dRefBytes.put(refBytes).flip();
294             dRefChars.put(refChars).flip();
295             refBytes.flip();
296             refChars.flip();
297 
298             boolean rv = true;
299             if (mode != ENCODE) {
300                 rv &= decode(refBytes, refChars);
301                 rv &= decode(dRefBytes, dRefChars);
302             }
303             if (mode != DECODE) {
304                 rv &= encode(refBytes, refChars);
305                 rv &= encode(dRefBytes, dRefChars);
306             }
307             return rv;
308         }
309     }
310 
311     // Maximum bytes/char being tested
312     private int maxBytesPerChar = 0;
313 
314     // Tests, indexed by bytesPerChar - 1
315     private Test[] tests;
316 
317     private void clearTests() {
318         maxBytesPerChar = 0;
319         tests = new Test[0];
320     }
321 
322     // Find the test for the given bytes/char value,
323     // expanding the test array if needed
324     //
325     private Test testFor(int bpc) {
326         if (bpc > maxBytesPerChar) {
327             Test[] ts = new Test[bpc];
328             System.arraycopy(tests, 0, ts, 0, maxBytesPerChar);
329             for (int i = maxBytesPerChar; i < bpc; i++)
330                 ts[i] = new Test(i + 1);
331             tests = ts;
332             maxBytesPerChar = bpc;
333         }
334         return tests[bpc - 1];
335     }
336 
337     private boolean testStringConv() throws Exception {
338         if (shiftHackDBCS) {
339             log.println("  string de/encoding   skipped for ebcdic");
340             return true;
341         }
342         boolean rv = true;
343         log.println("  string de/encoding");
344         // for new String()
345         ByteArrayOutputStream baosDec = new ByteArrayOutputStream();
346         StringBuilder sbDec = new StringBuilder();
347         // for String.getBytes()
348         ByteArrayOutputStream baosEnc = new ByteArrayOutputStream();
349         StringBuilder sbEnc = new StringBuilder();
350 
351         for (Entry e : csinfo.mappings) {
352             baosDec.write(e.bs);
353             sbDec.append(Character.toChars(e.cp));
354             if (e.cp2 != 0)
355                 sbDec.append(e.cp2);
356 
357             // non-roundtrip b2c, and c2b
358             if (csinfo.nr != null && csinfo.nr.containsKey(e.bb) ||
359                 csinfo.c2b != null && !csinfo.c2b.containsKey(e.cp))
360                 continue;
361             baosEnc.write(e.bs);
362             sbEnc.append(Character.toChars(e.cp));
363             if (e.cp2 != 0)
364                 sbEnc.append(e.cp2);
365         }
366         log.println("    new String()");
367         if (!new String(baosDec.toByteArray(), csinfo.csName).equals(sbDec.toString())) {
368             log.println("      Error: new String() failed");
369             rv = false;
370         }
371         log.println("    String.getBytes()");
372         if (!Arrays.equals(baosEnc.toByteArray(), sbEnc.toString().getBytes(csinfo.csName))) {
373             log.println("      Error: String().getBytes() failed");
374             rv = false;
375         }
376         return rv;
377     }
378 
379     private boolean run() throws Exception {
380         boolean rv = true;
381         shiftHackDBCS = csinfo.type.equals("ebcdic");    // isStateful;
382 
383         // (1) new String()/String.getBytes()
384         rv &= testStringConv();
385 
386         // (2) DECODE:
387         clearTests();
388         if (shiftHackDBCS) {
389             testFor(2).put(new byte[] { 0x0e });
390         }
391         csinfo.mappings.forEach(e -> {
392                 if (e.cp2 != 0)
393                     return;          // skip composite (base+cc) for now
394                 byte[] bs = e.bs;
395                 char[] cc = Character.toChars(e.cp);
396                 testFor(bs.length).put(bs, cc);
397             });
398         if (shiftHackDBCS) {
399             testFor(2).put(new byte[] { 0x0f });
400         }
401         for (int i = 0; i < maxBytesPerChar; i++) {
402             rv &= tests[i].run(DECODE);
403         }
404 
405         // (3) ENCODE:
406         clearTests();
407         if (shiftHackDBCS) {
408             testFor(2).put(new byte[] { 0x0e });
409         }
410         csinfo.mappings.forEach(e -> {
411                 if (e.cp2 != 0)
412                     return;          // skip composite (base+cc) for now
413                 if (csinfo.nr != null && csinfo.nr.containsKey(e.bb))
414                     return;          // non-roundtrip b2c
415                 if (csinfo.c2b != null && csinfo.c2b.containsKey(e.cp))
416                     return;          // c2b only mapping
417                 byte[] bs = e.bs;
418                 char[] cc = Character.toChars(e.cp);
419                 testFor(bs.length).put(bs, cc);
420             });
421         if (csinfo.c2b != null)
422             csinfo.c2b.values().forEach(e -> {
423                     byte[] bs = e.bs;
424                     char[] cc = Character.toChars(e.cp);
425                     testFor(bs.length).put(bs, cc);
426                 });
427         if (shiftHackDBCS) {
428             testFor(2).put(new byte[] { 0x0f });
429         }
430         for (int i = 0; i < maxBytesPerChar; i++) {
431             rv &= tests[i].run(ENCODE);
432         }
433         return rv;
434     }
435 
436     private static class Entry {
437         byte[] bs;   // byte sequence reps
438         int cp;      // Unicode codepoint
439         int cp2;     // CC of composite
440         long bb;     // bs in "long" form for nr lookup;
441     }
442 
443     private final static int  UNMAPPABLE = 0xFFFD;
444     private static final Pattern ptn = Pattern.compile("(?:0x)?(\\p{XDigit}++)\\s++(?:U\\+|0x)?(\\p{XDigit}++)(?:\\s++#.*)?");
445     private static final int G_BS  = 1;
446     private static final int G_CP  = 2;
447     private static final int G_CP2 = 3;
448 
449     private static class CharsetInfo {
450         Charset  cs;
451         String   pkgName;
452         String   clzName;
453         String   csName;
454         String   hisName;
455         String   type;
456         boolean  isInternal;
457         Set<String> aliases = new HashSet<>();
458 
459         // mapping entries
460         List<Entry> mappings;
461         Map<Long, Entry> nr;       // bytes -> entry
462         Map<Integer, Entry> c2b;   // cp -> entry
463 
464         CharsetInfo(String csName, String clzName) {
465             this.csName = csName;
466             this.clzName = clzName;
467         }
468 
469         private Entry parse(Matcher m) {
470             Entry e = new Entry();
471             e.bb = Long.parseLong(m.group(G_BS), 16);
472             if (e.bb < 0x100)
473                 e.bs = new byte[] { (byte)e.bb };
474             else
475                 e.bs = parseBytes(m.group(G_BS));
476             e.cp = Integer.parseInt(m.group(G_CP), 16);
477             if (G_CP2 <= m.groupCount() && m.group(G_CP2) != null)
478                e.cp2 = Integer.parseInt(m.group(G_CP2), 16);
479             else
480                e.cp2 = 0;
481             return e;
482         }
483 
484         boolean loadMappings(Path dir) throws IOException {
485             // xxx.map
486             Path path = dir.resolve(clzName + ".map");
487             if (!Files.exists(path)) {
488                 return false;
489             }
490             Matcher m = ptn.matcher("");
491             mappings = Files.lines(path)
492                 .filter(ln -> !ln.startsWith("#") && m.reset(ln).lookingAt())
493                 .map(ln -> parse(m))
494                 .filter(e -> e.cp != UNMAPPABLE)  // non-mapping
495                 .collect(Collectors.toList());
496             // xxx.nr
497             path = dir.resolve(clzName + ".nr");
498             if (Files.exists(path)) {
499                 nr = Files.lines(path)
500                     .filter(ln -> !ln.startsWith("#") && m.reset(ln).lookingAt())
501                     .map(ln -> parse(m))
502                     .collect(Collectors.toMap(e -> e.bb, Function.identity()));
503             }
504             // xxx.c2b
505             path = dir.resolve(clzName + ".c2b");
506             if (Files.exists(path)) {
507                 c2b = Files.lines(path)
508                     .filter(ln -> !ln.startsWith("#") && m.reset(ln).lookingAt())
509                     .map(ln -> parse(m))
510                     .collect(Collectors.toMap(e -> e.cp, Function.identity()));
511             }
512             return true;
513         }
514     }
515 
516     private static Set<CharsetInfo> charsets(Path cslist) throws IOException {
517         Set<CharsetInfo> charsets = new LinkedHashSet<>();
518         Iterator<String> itr = Files.readAllLines(cslist).iterator();
519         CharsetInfo cs = null;
520 
521         while (itr.hasNext()) {
522             String line = itr.next();
523             if (line.startsWith("#") || line.length() == 0) {
524                 continue;
525             }
526             String[] tokens = line.split("\\s+");
527             if (tokens.length < 2) {
528                 continue;
529             }
530             if ("charset".equals(tokens[0])) {
531                 if (cs != null) {
532                     charsets.add(cs);
533                     cs = null;
534                 }
535                 if (tokens.length < 3) {
536                     throw new RuntimeException("Error: incorrect charset line [" + line + "]");
537                 }
538                 cs = new CharsetInfo(tokens[1], tokens[2]);
539             } else {
540                 String key = tokens[1];              // leading empty str
541                 switch (key) {
542                     case "alias":
543                         if (tokens.length < 3) {
544                             throw new RuntimeException("Error: incorrect alias line [" + line + "]");
545                         }
546                         cs.aliases.add(tokens[2]);   // ALIAS_NAME
547                         break;
548                     case "package":
549                         cs.pkgName = tokens[2];
550                         break;
551                     case "type":
552                         cs.type = tokens[2];
553                         break;
554                     case "hisname":
555                         cs.hisName = tokens[2];
556                         break;
557                     case "internal":
558                         cs.isInternal = Boolean.parseBoolean(tokens[2]);
559                         break;
560                     default:  // ignore
561                 }
562             }
563         }
564         if (cs != null) {
565             charsets.add(cs);
566         }
567         return charsets;
568     }
569 
570     public static void main(String args[]) throws Exception {
571         Path dir = Paths.get(System.getProperty("test.src", ".") +
572                              "/../../../../make/data/charsetmapping");
573         if (!Files.exists(dir)) {
574             // not inside jdk repo, no mappings, exit silently
575             log.println("Nothing done, not in a jdk repo: ");
576             return;
577         }
578         if (args.length > 0 && "-v".equals(args[0])) {
579             // For debugging: java CoderTest [-v]
580             verbose = true;
581         }
582 
583         int errors = 0;
584         int tested = 0;
585         int skipped = 0;
586         int known = 0;
587 
588         for (CharsetInfo csinfo : charsets(dir.resolve("charsets"))) {
589             String csname = csinfo.csName;
590 
591             if (csinfo.isInternal) {
592                 continue;
593             }
594 
595             log.printf("%ntesting: %-16s", csname);
596 
597             if (!Charset.isSupported(csname)) {
598                 errors++;
599                 log.println("    [error: charset is not supported]");
600                 continue;
601             }
602 
603             Charset cs = csinfo.cs = Charset.forName(csinfo.csName);
604             // test name()
605             if (!cs.name().equals(csinfo.csName)) {
606                 errors++;
607                 log.printf("    [error: wrong csname: " + csinfo.csName
608                            + " vs " + cs.name() + "]");
609             }
610             // test aliases()
611             if (!cs.aliases().equals(csinfo.aliases)) {
612                 errors++;
613                 log.printf("    [error wrong aliases]");
614                 if (verbose) {
615                     log.println();
616                     log.println("    expected: " + csinfo.aliases);
617                     log.println("         got: " + cs.aliases());
618                 }
619             }
620 
621             if (csinfo.type.equals("source")) {
622                 log.println("    [skipped: source based]");
623                 skipped++;
624                 continue;
625             }
626 
627             if (!csinfo.loadMappings(dir)) {
628                 log.println("    [error loading mappings failed]");
629                 errors++;
630                 continue;
631             }
632 
633             tested++;
634             log.println();
635             if (!new TestCharsetMapping(csinfo).run()) {
636 
637                 /////////////// known nr/c2b issues ////////////////
638                 if (csinfo.csName.equals("x-IBM948") ||
639                     csinfo.csName.equals("x-IBM950") ||
640                     csinfo.csName.equals("x-IBM937") ||
641                     csinfo.csName.equals("x-IBM1383"))
642                 {
643                     log.println("    [**** skipped, KNOWN nr/c2b mapping issue]");
644                     known++;
645                     continue;
646                 }
647 
648                 errors++;
649             }
650         }
651 
652         log.println();
653         log.println(tested + " charset" + plural(tested) + " tested, "
654                     + skipped + " skipped, " + known + " known issue(s)");
655         log.println();
656         if (errors > 0)
657             throw new Exception("Errors detected in "
658                                 + errors + " charset" + plural(errors));
659     }
660 }
661