1 /*
2  * Copyright (c) 2008, Oracle and/or its affiliates. All rights reserved.
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * This code is free software; you can redistribute it and/or modify it
6  * under the terms of the GNU General Public License version 2 only, as
7  * published by the Free Software Foundation.
8  *
9  * This code is distributed in the hope that it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12  * version 2 for more details (a copy is included in the LICENSE file that
13  * accompanied this code).
14  *
15  * You should have received a copy of the GNU General Public License version
16  * 2 along with this work; if not, write to the Free Software Foundation,
17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18  *
19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20  * or visit www.oracle.com if you need additional information or have any
21  * questions.
22  */
23 
24 /* @test
25  * @bug 4779029 4924625 6392664 6730652
26  * @summary Test decoding of various permutations of valid ISO-2022-CN byte sequences
27  * @modules jdk.charsets
28  */
29 
30 /*
31  * Regression test for NIO ISO-2022-CN decoder. Passes various valid
32  * ISO-2022-CN byte sequences to the decoder using the java.io
33  * InputStreamReader API
34  */
35 
36 import java.io.*;
37 import java.nio.*;
38 import java.nio.charset.*;
39 
40 public class TestISO2022CNDecoder
41 {
42     private static String encodingName = "ISO2022CN";
43 
44     //
45     // Positive tests -- test both output and input processing against
46     // various "known good" data
47     //
decodeTest( byte encoded[], char decoded[], String label)48     private static boolean decodeTest (
49         byte encoded[],
50         char decoded[],
51         String label)
52     {
53         boolean retval = true;
54         int i = 0;
55 
56         try {
57             //
58             // Ensure that reading decodes correctly
59             //
60             ByteArrayInputStream in;
61             InputStreamReader reader;
62 
63             in = new ByteArrayInputStream(encoded);
64             reader = new InputStreamReader(in, encodingName);
65 
66             for (i = 0; i < decoded.length; i++) {
67                 int c = reader.read();
68 
69                 if (c != decoded[i]) {
70                     System.err.print(label + ": read failed, char " + i);
71                     System.err.print(" ... expected 0x"
72                             + Integer.toHexString(decoded[i]));
73                     if (c == -1)
74                         System.err.println(", got EOF");
75                     else
76                         System.err.println(", got 0x"
77                             + Integer.toHexString(c));
78                     retval = false;
79                     if (c == -1)
80                         return retval;
81                 }
82             }
83 
84             int testChar;
85             if ((testChar = reader.read()) != -1) {
86                 System.err.println(label + ": read failed, no EOF");
87                 System.err.println("testChar is " +
88                         Integer.toHexString((int)testChar));
89                 return false;
90             }
91             String decodedString = new String(encoded, "ISO2022CN");
92 
93             for (i = 0; i < decodedString.length(); i++) {
94                 if (decodedString.charAt(i) != decoded[i])
95                     System.err.println(label + ": read failed, char " + i);
96             }
97 
98             CharsetDecoder dec = Charset.forName("ISO2022CN")
99                 .newDecoder()
100                 .onUnmappableCharacter(CodingErrorAction.REPLACE)
101                 .onMalformedInput(CodingErrorAction.REPLACE);
102             ByteBuffer bb = ByteBuffer.allocateDirect(encoded.length).put(encoded);
103             bb.flip();
104             CharBuffer cb = ByteBuffer.allocateDirect(2*encoded.length*(int)dec.maxCharsPerByte())
105                                       .asCharBuffer();
106             if (bb.hasArray() || cb.hasArray()) {
107                 System.err.println(label + ": directBuffer failed, ");
108                 return false;
109             }
110             if (!dec.decode(bb, cb, true).isUnderflow()) {
111                 System.err.println(label + ": decoder's decode() failed!");
112                 return false;
113             }
114             cb.flip();
115             for (i = 0; i < cb.limit(); i++) {
116                 if (cb.get() != decoded[i])
117                     System.err.println(label + ": decoder failed, char " + i);
118             }
119 
120         } catch (Exception e) {
121             System.err.println(label + ": failed "
122                 + "(i = " + i + "), "
123                 + e.getClass().getName()
124                 + ", " + e.getMessage());
125             e.printStackTrace();
126             return false;
127         }
128         return retval;
129     }
130 
equal(CoderResult a, CoderResult b)131     private static boolean equal(CoderResult a, CoderResult b) {
132         return (a == CoderResult.OVERFLOW && b == CoderResult.OVERFLOW) ||
133             (a == CoderResult.UNDERFLOW && b == CoderResult.UNDERFLOW) ||
134             ((a.isError() == b.isError()) &&
135              (a.isMalformed() == b.isMalformed()) &&
136              (a.isUnmappable() == b.isUnmappable()) &&
137              (a.length() == b.length()));
138     }
139 
decodeResultTest(byte encoded[], CoderResult expected, String label)140     private static boolean decodeResultTest (byte encoded[],
141                                              CoderResult expected,
142                                              String label) {
143         CharsetDecoder dec = Charset.forName("ISO2022CN").newDecoder();
144         ByteBuffer bb = ByteBuffer.wrap(encoded);
145         CharBuffer cb = CharBuffer.allocate(encoded.length*(int)dec.maxCharsPerByte());
146         CoderResult result = dec.decode(bb, cb, true);
147         if (!equal(result, expected)) {
148             System.err.println(label + ": decoder's decode() failed!");
149             return false;
150         }
151 
152         bb = ByteBuffer.allocateDirect(encoded.length).put(encoded);
153         bb.flip();
154         cb = ByteBuffer.allocateDirect(2*encoded.length*(int)dec.maxCharsPerByte())
155             .asCharBuffer();
156         if (bb.hasArray() || cb.hasArray()) {
157             System.err.println(label + ": directBuffer failed, ");
158             return false;
159         }
160         result = dec.reset().decode(bb, cb, true);
161         if (!equal(result, expected)) {
162             System.err.println(label + ": decoder's decode() - direct failed!");
163             return false;
164         }
165         return true;
166     }
167 
168     //
169     // Negative tests -- only for input processing, make sure that
170     // invalid or corrupt characters are rejected.
171     //
negative(byte encoded [], String label)172     private static boolean negative (byte encoded [], String label)
173     {
174         try {
175             ByteArrayInputStream in;
176             InputStreamReader reader;
177             int c;
178 
179             in = new ByteArrayInputStream(encoded);
180             reader = new InputStreamReader(in, encodingName);
181 
182             c = reader.read();
183             System.err.print (label + ": read failed, ");
184 
185             if (c == -1)
186                 System.err.println("reported EOF");
187             else
188                 System.err.println("returned char 0x"
189                     + Integer.toHexString(c)
190                     + ", expected exception");
191             return false;
192 
193         } catch (CharConversionException e) {
194             return true;
195 
196         } catch (Throwable t) {
197             System.err.println(label + ": failed, threw "
198                 + t.getClass().getName()
199                 + ", " + t.getMessage());
200         }
201         return false;
202     }
203 
decodeTest6392664()204     private static boolean decodeTest6392664 () {
205         try {
206             CharsetDecoder dec = Charset.forName("ISO-2022-CN-GB").newDecoder();
207             dec.decode(ByteBuffer.wrap(new byte[] {(byte)0x0e, (byte)0x42, (byte)0x43 }));
208         } catch (Exception e) {
209             e.printStackTrace();
210             return false;
211         }
212         return true;
213     }
214 
215     //
216     // TEST #0: 7-bit unshifted values,
217     // shift-in of a valid decodable GB2312-80
218     // character and an unmappable GB2312-80 char
219     // This is a positive test.
220     //
221     private static byte test0_bytes[] = {
222         (byte)0x00,
223         (byte)0x01, (byte)0x02, (byte)0x03,
224         (byte)0x0E, (byte)0x21, (byte)0x2f,
225         (byte)0x0E, (byte)0xDD, (byte)0x9f
226     };
227 
228     private static char test0_chars[] = {
229         0x0000,
230         0x0001, 0x0002, 0x0003,
231         0x2019,
232         0xFFFD
233     };
234 
235     private static byte test1_bytes[] = {
236         (byte)0x1b, (byte)0x24, (byte)0x29, (byte)0x41, (byte)0x21,
237         (byte)0x2f };
238 
239     private static char test1_chars[] = {
240         0x21, 0x2f
241     };
242 
243     private static byte test2_bytes[] = {
244         (byte)0x0e,
245         (byte)0x1b, (byte)0x24, (byte)0x29, (byte)0x41,
246         (byte)0x21, (byte)0x2f };
247 
248     private static char test2_chars[] = {
249         0x2019
250     };
251 
252     private static byte test3_bytes[] = {
253         (byte)0x1b, (byte)0x24, (byte)0x29, (byte)0x41,
254         (byte)0x0e,
255         (byte)0x21, (byte)0x2f };
256 
257     private static byte test3a_bytes[] = {
258         (byte)0x1b, (byte)0x24, (byte)0x41,
259         (byte)0x0e,
260         (byte)0x21, (byte)0x2f };
261 
262     private static char test3_chars[] = {
263         0x2019
264     };
265 
266     private static byte test4_bytes[] = {
267         (byte)0x1b, (byte)0x24, (byte)0x29, (byte)0x41,
268         (byte)0x0f,
269         (byte)0x21, (byte)0x2f };
270 
271     private static char test4_chars[] = {
272         0x21, 0x2f
273     };
274 
275     private static byte test5_bytes[] = {
276         (byte)0x1b, (byte)0x24, (byte)0x29, (byte)0x41,
277         (byte)0x0e, (byte)0x21, (byte)0x2e,
278         (byte)0x0f, (byte)0x21, (byte)0x2f };
279 
280     private static char test5_chars[] = {
281         0x2018, 0x21, 0x2f
282     };
283 
284     private static byte test6_bytes[] = {
285         (byte)0x1b, (byte)0x24, (byte)0x29, (byte)0x41,
286         (byte)0x0e, (byte)0x21, (byte)0x2e,
287         (byte)0x21, (byte)0x2f };
288 
289     private static char test6_chars[] = {
290         0x2018, 0x2019
291     };
292 
293     private static byte test7_bytes[] = {
294         (byte)0x1b, (byte)0x24, (byte)0x29, (byte)'G',
295         (byte)0x0e, (byte)0x21, (byte)0x2e,
296         (byte)0x21, (byte)0x2f };
297 
298     private static char test7_chars[] = {
299         0xFE50, 0xFE51
300     };
301 
302     private static byte test8_bytes[] = {
303         (byte)0x1b, (byte)0x24, (byte)0x29, (byte)'G',
304         (byte)0x0e, (byte)0x21, (byte)0x2e,
305         (byte)0x0f, (byte)0x21, (byte)0x2f };
306 
307     private static char test8_chars[] = {
308         0xFE50, 0x21, 0x2f
309     };
310 
311     private static byte test9_bytes[] = {
312         (byte)0x1b, (byte)0x24, (byte)0x2a, (byte)'H',
313         (byte)0x1b, (byte)0x4e,
314         (byte)0x21, (byte)0x2f };
315 
316     private static char test9_chars[] = {
317         0x4e0e
318     };
319 
320     /*
321      * Plane 3 support provided for compatibility with
322      * sun.io ISO2022_CN decoder. Officially ISO-2022-CN
323      * just handles planes 1/2 of CNS-11643 (1986)
324      * Test case data below verifies this compatibility
325      *
326      */
327 
328     private static byte test10_bytes[] = {
329         (byte)0x1b, (byte)0x24, (byte)'+', (byte)'I',
330         (byte)0x1b, (byte)0x4f,
331         (byte)0x21, (byte)0x2f };
332 
333     private static char test10_chars[] = {
334         0x51e2
335     };
336 
337     private static byte test11_bytes[] = {
338         (byte)0x1b, (byte)0x24, (byte)0x29, (byte)0x41, //SO Designator
339         (byte)0x0e,                                     //SO
340         (byte)0x21, (byte)0x2e,                         //GB2312 char
341         (byte)0x1b, (byte)0x24, (byte)0x2a, (byte)'H',  //SS2 Designator
342         (byte)0x1b, (byte)0x4e,                         //SS2
343         (byte)0x21, (byte)0x2f,                         //CNS-P2 char
344         (byte)0x21, (byte)0x2f                          //GB2312 char
345     };
346 
347     private static char test11_chars[] = {
348         0x2018,
349         0x4e0e,
350         0x2019
351     };
352 
353     private static byte test12_bytes[] = {
354         (byte)0x1b, (byte)0x24, (byte)0x29, (byte)0x41, //SO Designator
355         (byte)0x0e,                                     //SO
356         (byte)0x21, (byte)0x2e,                         //GB2312 char
357         (byte)0x1b, (byte)0x24, (byte)'+', (byte)'I',  //SS3 Designator
358         (byte)0x1b, (byte)0x4f,                         //SS3
359         (byte)0x21, (byte)0x2f,                         //CNS-P2 char
360         (byte)0x21, (byte)0x2f                          //GB2312 char
361     };
362 
363     private static char test12_chars[] = {
364         0x2018,
365         0x51e2,
366         0x2019
367     };
368 
369 
370     private static byte test13_bytes[] = {
371         (byte)0x0f0,   // byte with MSB
372     };
373 
374     private static char test13_chars[] = {
375         0x00f0,
376     };
377 
378     private static byte test14_bytes[] = {
379         (byte)0x0E, (byte)0x21, (byte)0x2f,
380         (byte)0x0E, (byte)0xDD, (byte)0x9f
381     };
382     private static CoderResult test14_result = CoderResult.unmappableForLength(2);
383 
384     // Current ISO2022CN treats the "out of range" code points as "unmappable"
385     private static byte test15_bytes[] = {
386         (byte)0x1b, (byte)0x4f,      // SS3
387         (byte)0x20, (byte)0x2f,      // "out of range" CNS-P2 char
388     };
389     private static  CoderResult test15_result = CoderResult.unmappableForLength(4);
390 
encodeTest6730652()391     private static boolean encodeTest6730652 () throws Exception {
392         //sample p3 codepoints
393         String strCNSP3 = "\u4e28\u4e36\u4e3f\u4e85\u4e05\u4e04\u5369\u53b6\u4e2a\u4e87\u4e49\u51e2\u56b8\u56b9\u56c4\u8053\u92b0";
394         return strCNSP3.equals(new String(strCNSP3.getBytes("x-ISO-2022-CN-CNS"), "x-ISO-2022-CN-CNS"));
395     }
396 
397     /**
398      * Main program to test ISO2022CN conformance
399      *
400      */
main(String argv [])401     public static void main (String argv []) throws Exception
402     {
403         boolean pass = true;
404 
405         System.out.println ("");
406         System.out.println ("------ checking ISO2022CN decoder -----");
407 
408         // This regtest must be the first one.
409         pass &= decodeTest6392664();
410 
411         try {
412             new InputStreamReader (System.in, "ISO2022CN");
413         } catch (Exception e) {
414             encodingName = "ISO2022CN";
415             System.out.println ("... requires nonstandard encoding name "
416                     + encodingName);
417             pass &= false;
418         }
419 
420         //
421         // Positive tests -- good data is dealt with correctly
422         //
423         pass &= decodeTest(test0_bytes, test0_chars, "first batch");
424         pass &= decodeTest(test1_bytes, test1_chars, "escapes1");
425         pass &= decodeTest(test2_bytes, test2_chars, "escapes2");
426         pass &= decodeTest(test3_bytes, test3_chars, "escapes3");
427         pass &= decodeTest(test3a_bytes, test3_chars, "escapes3a");
428         pass &= decodeTest(test4_bytes, test4_chars, "escapes4");
429         pass &= decodeTest(test5_bytes, test5_chars, "escapes5");
430         pass &= decodeTest(test6_bytes, test6_chars, "escapes6");
431         pass &= decodeTest(test7_bytes, test7_chars, "escapes7");
432         pass &= decodeTest(test8_bytes, test8_chars, "escapes8");
433         pass &= decodeTest(test9_bytes, test9_chars, "escapes9");
434         pass &= decodeTest(test10_bytes, test10_chars, "escapes10");
435         pass &= decodeTest(test11_bytes, test11_chars, "escapes11");
436         pass &= decodeTest(test12_bytes, test12_chars, "escapes12");
437         pass &= decodeTest(test13_bytes, test13_chars, "escapes13");
438         pass &= decodeResultTest(test14_bytes, test14_result, "escapes14");
439         pass &= decodeResultTest(test15_bytes, test15_result, "escapes15");
440 
441         pass &= encodeTest6730652 ();
442 
443         // PASS/FAIL status is what the whole thing is about.
444         //
445         if (! pass) {
446             throw new Exception("FAIL -- incorrect ISO-2022-CN");
447         }
448 
449     }
450 }
451