1 /*
2  * Copyright (c) 2008, Oracle and/or its affiliates. All rights reserved.
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * This code is free software; you can redistribute it and/or modify it
6  * under the terms of the GNU General Public License version 2 only, as
7  * published by the Free Software Foundation.
8  *
9  * This code is distributed in the hope that it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12  * version 2 for more details (a copy is included in the LICENSE file that
13  * accompanied this code).
14  *
15  * You should have received a copy of the GNU General Public License version
16  * 2 along with this work; if not, write to the Free Software Foundation,
17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18  *
19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20  * or visit www.oracle.com if you need additional information or have any
21  * questions.
22  */
23 
24 /*
25  * @test
26  * @bug 4831163 5053096 5056440 8022224
27  * @summary NIO charset basic verification of JISAutodetect decoder
28  * @modules jdk.charsets
29  * @author Martin Buchholz
30  */
31 
32 import java.io.*;
33 import java.nio.ByteBuffer;
34 import java.nio.CharBuffer;
35 import java.nio.charset.Charset;
36 import java.nio.charset.CharsetDecoder;
37 import java.nio.charset.CoderResult;
38 import static java.lang.System.*;
39 
40 public class NIOJISAutoDetectTest {
41     private static int failures = 0;
42 
fail(String failureMsg)43     private static void fail(String failureMsg) {
44         System.out.println(failureMsg);
45         failures++;
46     }
47 
check(boolean cond, String msg)48     private static void check(boolean cond, String msg) {
49         if (!cond) {
50             fail("test failed: " + msg);
51             new Exception().printStackTrace();
52         }
53     }
54 
SJISName()55     private static String SJISName() throws Exception {
56         return detectingCharset(new byte[] {(byte)0xbb, (byte)0xdd,
57                                             (byte)0xcf, (byte)0xb2});
58     }
59 
EUCJName()60     private static String EUCJName() throws Exception {
61         return detectingCharset(new byte[] {(byte)0xa4, (byte)0xd2,
62                                             (byte)0xa4, (byte)0xe9});
63     }
64 
detectingCharset(byte[] bytes)65     private static String detectingCharset(byte[] bytes) throws Exception {
66         //----------------------------------------------------------------
67         // Test special public methods of CharsetDecoder while we're here
68         //----------------------------------------------------------------
69         CharsetDecoder cd = Charset.forName("JISAutodetect").newDecoder();
70         check(cd.isAutoDetecting(), "isAutodecting()");
71         check(! cd.isCharsetDetected(), "isCharsetDetected");
72         cd.decode(ByteBuffer.wrap(new byte[] {(byte)'A'}));
73         check(! cd.isCharsetDetected(), "isCharsetDetected");
74         try {
75             cd.detectedCharset();
76             fail("no IllegalStateException");
77         } catch (IllegalStateException e) {}
78         cd.decode(ByteBuffer.wrap(bytes));
79         check(cd.isCharsetDetected(), "isCharsetDetected");
80         Charset cs = cd.detectedCharset();
81         check(cs != null, "cs != null");
82         check(! cs.newDecoder().isAutoDetecting(), "isAutodetecting()");
83         return cs.name();
84     }
85 
main(String[] argv)86     public static void main(String[] argv) throws Exception {
87         //----------------------------------------------------------------
88         // Used to throw BufferOverflowException
89         //----------------------------------------------------------------
90         out.println(new String(new byte[] {0x61}, "JISAutoDetect"));
91 
92         //----------------------------------------------------------------
93         // InputStreamReader(...JISAutoDetect) used to infloop
94         //----------------------------------------------------------------
95         {
96             byte[] bytes = "ABCD\n".getBytes();
97             ByteArrayInputStream bais = new  ByteArrayInputStream(bytes);
98             InputStreamReader isr = new InputStreamReader(bais, "JISAutoDetect");
99             BufferedReader reader = new BufferedReader(isr);
100             check (reader.readLine().equals("ABCD"), "first read gets text");
101             // used to return "ABCD" on second and subsequent reads
102             check (reader.readLine() == null, "second read gets null");
103         }
104 
105         //----------------------------------------------------------------
106         // Check all Japanese chars for sanity
107         //----------------------------------------------------------------
108         String SJIS = SJISName();
109         String EUCJ = EUCJName();
110         out.printf("SJIS charset is %s%n", SJIS);
111         out.printf("EUCJ charset is %s%n", EUCJ);
112 
113         int cnt2022 = 0;
114         int cnteucj = 0;
115         int cntsjis = 0;
116         int cntBAD  = 0;
117         for (char c = '\u0000'; c < '\uffff'; c++) {
118             if (c == '\u001b' || // ESC
119                 c == '\u2014')   // Em-Dash?
120                 continue;
121             String s = new String (new char[] {c});
122 
123             //----------------------------------------------------------------
124             // JISAutoDetect can handle all chars that EUC-JP can,
125             // unless there is an ambiguity with SJIS.
126             //----------------------------------------------------------------
127             byte[] beucj = s.getBytes(EUCJ);
128             String seucj = new String(beucj, EUCJ);
129             if (seucj.equals(s)) {
130                 cnteucj++;
131                 String sauto = new String(beucj, "JISAutoDetect");
132 
133                 if (! sauto.equals(seucj)) {
134                     cntBAD++;
135                     String ssjis = new String(beucj, SJIS);
136                     if (! sauto.equals(ssjis)) {
137                         fail("Autodetection agrees with neither EUC nor SJIS");
138                     }
139                 }
140             } else
141                 continue; // Optimization
142 
143             //----------------------------------------------------------------
144             // JISAutoDetect can handle all chars that ISO-2022-JP can.
145             //----------------------------------------------------------------
146             byte[] b2022 = s.getBytes("ISO-2022-JP");
147             if (new String(b2022, "ISO-2022-JP").equals(s)) {
148                 cnt2022++;
149                 check(new String(b2022,"JISAutoDetect").equals(s),
150                       "ISO2022 autodetection");
151             }
152 
153             //----------------------------------------------------------------
154             // JISAutoDetect can handle almost all chars that SJIS can.
155             //----------------------------------------------------------------
156             byte[] bsjis = s.getBytes(SJIS);
157             if (new String(bsjis, SJIS).equals(s)) {
158                 cntsjis++;
159                 check(new String(bsjis,"JISAutoDetect").equals(s),
160                       "SJIS autodetection");
161             }
162         }
163         out.printf("There are %d ISO-2022-JP-encodable characters.%n", cnt2022);
164         out.printf("There are %d SJIS-encodable characters.%n",        cntsjis);
165         out.printf("There are %d EUC-JP-encodable characters.%n",      cnteucj);
166         out.printf("There are %d characters that are " +
167                    "misdetected as SJIS after being EUC-encoded.%n", cntBAD);
168 
169 
170         //----------------------------------------------------------------
171         // tests for specific byte sequences
172         //----------------------------------------------------------------
173         test("ISO-2022-JP", new byte[] {'A', 'B', 'C'});
174         test("EUC-JP",      new byte[] {'A', 'B', 'C'});
175         test("SJIS",        new byte[] {'A', 'B', 'C'});
176 
177         test("SJIS",
178              new byte[] { 'C', 'o', 'p',  'y',  'r', 'i', 'g',  'h', 't',
179                           ' ', (byte)0xa9, ' ', '1', '9', '9',  '8' });
180 
181         test("SJIS",
182              new byte[] { (byte)0xbb, (byte)0xdd, (byte)0xcf, (byte)0xb2,
183                           (byte)0xb8, (byte)0xdb, (byte)0xbc, (byte)0xbd,
184                           (byte)0xc3, (byte)0xd1, (byte)0xbd, (byte)0xde,
185                           (byte)0x82, (byte)0xc5, (byte)0x82, (byte)0xb7 });
186 
187         test("EUC-JP",
188              new byte[] { (byte)0xa4, (byte)0xd2, (byte)0xa4, (byte)0xe9,
189                           (byte)0xa4, (byte)0xac, (byte)0xa4, (byte)0xca });
190 
191         test("SJIS",
192              new byte[] { (byte)0xbb, (byte)0xdd, (byte)0xcf, (byte)0xb2,
193                           (byte)0xb8, (byte)0xdb, (byte)0xbc, (byte)0xbd,
194                           (byte)0xc3, (byte)0xd1, (byte)0xbd, (byte)0xde});
195 
196         test("SJIS",
197              new byte[] { (byte)0xbb, (byte)0xdd, (byte)0xcf, (byte)0xb2,
198                           (byte)0xb8, (byte)0xdb, (byte)0xbc, (byte)0xbd,
199                           (byte)0xc3, (byte)0xd1, (byte)0xbd });
200 
201         test("SJIS",
202              new byte[] { (byte)0x8f, (byte)0xa1, (byte)0xaa });
203 
204         test("EUC-JP",
205              new byte[] { (byte)0x8f, (byte)0xc5, (byte)0xe0, (byte)0x20});
206 
207         test("EUC-JP",
208              new byte[] { (byte)0xbb, (byte)0xdd, (byte)0xcf, (byte)0xb2,
209                           (byte)0xb8, (byte)0xdb, (byte)0xbc, (byte)0xbd,
210                           (byte)0xc3, (byte)0xd1, (byte)0xbd, (byte)0xde,
211                           (byte)0xa4, (byte)0xc7, (byte)0xa4, (byte)0xb9 });
212 
213         test("ISO-2022-JP",
214              new byte[] { 0x1b, '$', 'B', '#', '4', '$', '5', 0x1b, '(', 'B' });
215 
216 
217         //----------------------------------------------------------------
218         // Check handling of ambiguous end-of-input in middle of first char
219         //----------------------------------------------------------------
220         {
221             CharsetDecoder dc = Charset.forName("x-JISAutoDetect").newDecoder();
222             ByteBuffer bb = ByteBuffer.allocate(128);
223             CharBuffer cb = CharBuffer.allocate(128);
224             bb.put((byte)'A').put((byte)0x8f);
225             bb.flip();
226             CoderResult res = dc.decode(bb,cb,false);
227             check(res.isUnderflow(), "isUnderflow");
228             check(bb.position() == 1, "bb.position()");
229             check(cb.position() == 1, "cb.position()");
230             res = dc.decode(bb,cb,false);
231             check(res.isUnderflow(), "isUnderflow");
232             check(bb.position() == 1, "bb.position()");
233             check(cb.position() == 1, "cb.position()");
234             bb.compact();
235             bb.put((byte)0xa1);
236             bb.flip();
237             res = dc.decode(bb,cb,true);
238             check(res.isUnderflow(), "isUnderflow");
239             check(bb.position() == 2, "bb.position()");
240             check(cb.position() == 2, "cb.position()");
241         }
242 
243         // test #8022224
244         Charset cs = Charset.forName("x-JISAutoDetect");
245         ByteBuffer bb = ByteBuffer.wrap(new byte[] { 'a', 0x1b, 0x24, 0x40 });
246         CharBuffer cb = CharBuffer.wrap(new char[10]);
247         CoderResult cr = cs.newDecoder().decode(bb, cb, false);
248         bb.rewind();
249         cb.clear().limit(1);
250         check(cr == cs.newDecoder().decode(bb, cb, false), "#8022224");
251 
252         if (failures > 0)
253             throw new RuntimeException(failures + " tests failed");
254     }
255 
checkCoderResult(CoderResult result)256     static void checkCoderResult(CoderResult result) {
257         check(result.isUnderflow(),
258               "Unexpected coder result: " + result);
259     }
260 
test(String expectedCharset, byte[] input)261     static void test(String expectedCharset, byte[] input) throws Exception {
262         Charset cs = Charset.forName("x-JISAutoDetect");
263         CharsetDecoder autoDetect = cs.newDecoder();
264 
265         Charset cs2 = Charset.forName(expectedCharset);
266         CharsetDecoder decoder = cs2.newDecoder();
267 
268         ByteBuffer bb = ByteBuffer.allocate(128);
269         CharBuffer charOutput = CharBuffer.allocate(128);
270         CharBuffer charExpected = CharBuffer.allocate(128);
271 
272         bb.put(input);
273         bb.flip();
274         bb.mark();
275 
276         CoderResult result = autoDetect.decode(bb, charOutput, true);
277         checkCoderResult(result);
278         charOutput.flip();
279         String actual = charOutput.toString();
280 
281         bb.reset();
282 
283         result = decoder.decode(bb, charExpected, true);
284         checkCoderResult(result);
285         charExpected.flip();
286         String expected = charExpected.toString();
287 
288         check(actual.equals(expected),
289               String.format("actual=%s expected=%s", actual, expected));
290     }
291 }
292