1 /*
2  * Copyright (c) 2008, 2020, Oracle and/or its affiliates. All rights reserved.
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * This code is free software; you can redistribute it and/or modify it
6  * under the terms of the GNU General Public License version 2 only, as
7  * published by the Free Software Foundation.
8  *
9  * This code is distributed in the hope that it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12  * version 2 for more details (a copy is included in the LICENSE file that
13  * accompanied this code).
14  *
15  * You should have received a copy of the GNU General Public License version
16  * 2 along with this work; if not, write to the Free Software Foundation,
17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18  *
19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20  * or visit www.oracle.com if you need additional information or have any
21  * questions.
22  */
23 
24 /* @test
25  * @bug 6371437 6371422 6371416 6371619 5058184 6371431 6639450 6569191 6577466
26  *      8212794 8220281 8235834
27  * @summary Check if the problems reported in above bugs have been fixed
28  * @modules jdk.charsets
29  */
30 
31 import java.io.*;
32 import java.nio.*;
33 import java.nio.charset.*;
34 import java.util.Arrays;
35 import java.util.Locale;
36 import java.util.HashSet;
37 
38 public class TestIBMBugs {
39 
bug6371437()40     private static void bug6371437() throws Exception {
41         CharsetEncoder converter = Charset.forName("Cp933").newEncoder();
42         converter = converter.onMalformedInput(CodingErrorAction.REPORT);
43         converter = converter.onUnmappableCharacter(CodingErrorAction.REPORT);
44         CharBuffer in = CharBuffer.wrap(new char[] { (char)4352 });
45         try {
46               ByteBuffer out = converter.encode(in);
47         } catch (CharacterCodingException e) { }
48     }
49 
bug6371422()50     private static void bug6371422() throws Exception {
51         String[] charsets = { "Cp949", "Cp949C" };
52         for (int n = 0; n < charsets.length; n++) {
53             String charset = charsets[n];
54             CharsetEncoder converter = Charset.forName(charset).newEncoder();
55             converter = converter.onMalformedInput(CodingErrorAction.REPORT);
56             converter = converter.onUnmappableCharacter(CodingErrorAction.REPORT);
57             int errors = 0;
58             for (int i = 1; i < 0x1ffff; i++) {
59                 if (i >= 0x1100 && i <= 0x11f9)
60                     continue;  //Dont try leading consonant, vowel and trailing
61                                //consonant as a single char
62                 char[] in = (i < 0x10000
63                          ? new char[] { (char)i }
64                              : new char[] { (char)(0xd800 + ((i - 0x10000) >> 10)),
65                               (char)(0xdc00 + ((i - 0x10000) & 0x3ff)) });
66 
67                 try {
68                     ByteBuffer out = converter.encode(CharBuffer.wrap(in));
69                     if (out.remaining() == 0 ||
70                         (out.remaining() == 1 && out.get(0) == 0x00)) {
71                     errors++;
72                     }
73                 } catch (CharacterCodingException e) { }
74             }
75             if (errors > 0)
76                 throw new Exception("Charset "+charset+": "+errors+" errors");
77         }
78     }
79 
bug6371416()80     private static void bug6371416() throws Exception {
81         String[] charsets = { "Cp933", "Cp949", "Cp949C", "Cp970"};
82         for (int n = 0; n < charsets.length; n++) {
83             String charset = charsets[n];
84             CharsetEncoder converter = Charset.forName(charset).newEncoder();
85             converter = converter.onMalformedInput(CodingErrorAction.REPORT);
86             converter = converter.onUnmappableCharacter(CodingErrorAction.REPORT);
87             int errors = 0;
88             for (int i = 0xd800; i < 0xe000; i++) {
89                 char[] in = new char[] { (char)i };
90                 try {
91                     ByteBuffer out = converter.encode(CharBuffer.wrap(in));
92                     if (out.remaining() == 0)
93                         errors++;
94                 } catch (CharacterCodingException e) { }
95             }
96             if (errors > 0)
97                 throw new Exception("Charset "+charset+": "+errors+" errors");
98         }
99     }
100 
bug6371619()101     private static void bug6371619() throws Exception {
102         String encoding = "Cp964";
103         Charset charset = Charset.forName(encoding);
104         CharsetDecoder converter = charset.newDecoder();
105         converter = converter.onMalformedInput(CodingErrorAction.REPORT);
106         converter = converter.onUnmappableCharacter(CodingErrorAction.REPORT);
107         int errors = 0;
108         for (int b = 0x80; b < 0x100; b++)
109             if (!(b == 0x8e ||  // 0x8e is a SS2
110                   (b >= 0x80 && b <= 0x8d) || (b >= 0x90 && b <= 0x9f))) {
111                 ByteBuffer in = ByteBuffer.wrap(new byte[] { (byte)b });
112                 try {
113                     CharBuffer out = converter.decode(in);
114                     if (out.length() == 0) {
115                         errors++;
116                     }
117                 } catch (CharacterCodingException e) { }
118             }
119         if (errors > 0)
120             throw new Exception("Charset "+charset+": "+errors+" errors");
121     }
122 
123 
bug6371431()124     private static void bug6371431() throws Exception {
125         String encoding = "Cp33722";
126         Charset charset = Charset.forName(encoding);
127         CharsetDecoder converter = charset.newDecoder();
128         converter = converter.onMalformedInput(CodingErrorAction.REPORT);
129         converter = converter.onUnmappableCharacter(CodingErrorAction.REPORT);
130         int errors = 0;
131         for (int b = 0xa0; b < 0x100; b++) {
132             ByteBuffer in = ByteBuffer.wrap(new byte[] { (byte)b });
133             try {
134                 CharBuffer out = converter.decode(in);
135                 if (out.length() == 0) {
136                     errors++;
137                 }
138             } catch (CharacterCodingException e) { }
139         }
140         if (errors > 0)
141             throw new Exception("Charset "+charset+": "+errors+" errors");
142     }
143 
bug6639450()144     private static void bug6639450 () throws Exception {
145         byte[] bytes1 = "\\".getBytes("IBM949");
146         "\\".getBytes("IBM949C");
147         byte[] bytes2 = "\\".getBytes("IBM949");
148         if (bytes1.length != 1 || bytes2.length != 1 ||
149             bytes1[0] != (byte)0x82 ||
150             bytes2[0] != (byte)0x82)
151         throw new Exception("IBM949/IBM949C failed");
152     }
153 
bug6569191()154     private static void bug6569191 () throws Exception {
155         byte[] bs = new byte[] { (byte)0x81, (byte)0xad,  // fffd ff6d
156                                  (byte)0x81, (byte)0xae,  // fffd ff6e
157                                  (byte)0x81, (byte)0xaf,  // fffd ff6f
158                                  (byte)0x81, (byte)0xb0,  // fffd ff70
159                                  (byte)0x85, (byte)0x81,  // fffd ->
160                                  (byte)0x85, (byte)0x87,  // 2266 ->
161                                  (byte)0x85, (byte)0xe0,  // 32a4 ->
162                                  (byte)0x85, (byte)0xf0 };// 7165 fffd
163         String s = new String(bs, "Cp943");
164         // see DoubleByte for how the unmappables are handled
165         if (!"\ufffd\uff6d\ufffd\uff6e\ufffd\uff6f\ufffd\uff70\ufffd\u2266\u32a4\u7165\ufffd"
166             .equals(s))
167             throw new Exception("Cp943 failed");
168     }
169 
170 
bug6577466()171     private static void bug6577466 () throws Exception {
172         for (int c = Character.MIN_VALUE; c <= Character.MAX_VALUE; c++){
173             if (!Character.isDefined((char)c)) continue;
174             String s = String.valueOf((char)c);
175             byte[] bb = null;
176             bb = s.getBytes("x-IBM970");
177         }
178     }
179 
bug8213618()180     private static void bug8213618 () throws Exception {
181         String cs = "x-IBM970";
182         byte[] ba = new byte[]{(byte)0xA2,(byte)0xC1};
183         String s = "\u25C9";
184         if (!(new String(ba, cs)).equals(s))
185             throw new Exception("Cp970 failed");
186         if (!Arrays.equals(ba, s.getBytes(cs)))
187             throw new Exception("Cp970 failed");
188         ba = new byte[]{0x3f,0x3f,0x3f};
189         if (!Arrays.equals(ba, "\u6950\u84f1\ucf7f".getBytes(cs)))
190             throw new Exception("Cp970 failed");
191     }
192 
bug8202329()193     private static void bug8202329() throws Exception {
194         String original = "\\\u007E\u00A5\u203E"; // [backslash][tilde][yen][overscore]
195         byte[] expectedBytes; // bytes after conversion
196         String expectedStringfromBytes; // String constructed from bytes
197 
198         Charset charset; // charset used for conversion
199 
200         ByteBuffer bb; // Buffer that holds encoded bytes
201         byte[]  ba; // byte array that holds encoded bytes
202 
203         CharBuffer cb; // Buffer that holds decoded chars
204 
205 
206         // Test IBM943, where \ and ~ are encoded to unmappable i.e., 0x3f
207         // and [yen] and [overscore] are encoded to 0x5c and 0x7e
208         charset = Charset.forName("IBM943");
209         expectedBytes = new byte[] {0x3f, 0x3f, 0x5c, 0x7e};
210         expectedStringfromBytes = "??\u00A5\u203E";
211         bb = charset.encode(original);
212         ba = new byte[bb.remaining()];
213         bb.get(ba, 0, ba.length);
214         if(!Arrays.equals(ba, expectedBytes)) {
215             throw new Exception("IBM943 failed to encode");
216         }
217         cb = charset.decode(ByteBuffer.wrap(expectedBytes));
218         if(!cb.toString().equals(expectedStringfromBytes)) {
219             throw new Exception("IBM943 failed to decode");
220         }
221 
222 
223         // Test IBM943C, where \ and ~ are encoded to 0x5c and 0x7e
224         // and [yen] and [overscore] are encoded to 0x5c and 0x7e
225         charset = Charset.forName("IBM943C");
226         expectedBytes = new byte[] {0x5c, 0x7e, 0x5c, 0x7e};
227         expectedStringfromBytes = "\\~\\~";
228         bb = charset.encode(original);
229         ba = new byte[bb.remaining()];
230         bb.get(ba, 0, ba.length);
231         if(!Arrays.equals(ba, expectedBytes)) {
232             throw new Exception("IBM943C failed to encode");
233         }
234         cb = charset.decode(ByteBuffer.wrap(expectedBytes));
235         if(!cb.toString().equals(expectedStringfromBytes)) {
236             throw new Exception("IBM943C failed to decode");
237         }
238     }
239 
bug8212794()240     private static void bug8212794 () throws Exception {
241         Charset cs = Charset.forName("x-IBM964");
242         byte[] ba = new byte[] {(byte)0x5c, (byte)0x90, (byte)0xa1, (byte)0xa1};
243         char[] ca = new char[] {'\\', '\u0090', '\u3000'};
244         ByteBuffer bb = ByteBuffer.wrap(ba);
245         CharBuffer cb = cs.decode(bb);
246         if(!Arrays.equals(ca, Arrays.copyOf(cb.array(), cb.limit()))) {
247             throw new Exception("IBM964 failed to decode");
248         }
249         cb = CharBuffer.wrap(ca);
250         bb = cs.encode(cb);
251         if(!Arrays.equals(ba, Arrays.copyOf(bb.array(), bb.limit()))) {
252             throw new Exception("IBM964 failed to encode");
253         }
254     }
255 
bug8220281()256     private static void bug8220281 () throws Exception {
257         if (System.getProperty("os.name").contains("AIX")) {
258             /* Following AIX codesets are used for Java default charset. */
259             /* They should be in sun.nio.cs package on AIX platform.     */
260             String[] codesets = new String[] {
261                 "IBM-950", "BIG5-HKSCS", "GB18030", "IBM-1046",
262                 "IBM-1124", "IBM-1129", "IBM-1252", "IBM-856",
263                 "IBM-858", "IBM-921", "IBM-922", "IBM-932", "IBM-943C",
264                 "IBM-eucCN", "IBM-eucJP", "IBM-eucKR", "IBM-eucTW",
265                 "ISO8859-1", "ISO8859-15", "ISO8859-2", "ISO8859-4",
266                 "ISO8859-5", "ISO8859-6", "ISO8859-7", "ISO8859-8",
267                 "ISO8859-9", "TIS-620", "UTF-8", };
268             String[] charsets = new String[] {
269                 "x-IBM950", "Big5-HKSCS", "GB18030", "x-IBM1046",
270                 "x-IBM1124", "x-IBM1129", "windows-1252", "x-IBM856",
271                 "IBM00858", "x-IBM921", "x-IBM922", "x-IBM942C",
272                 "x-IBM943C", "x-IBM1383", "x-IBM29626C", "x-IBM970",
273                 "x-IBM964", "ISO-8859-1", "ISO-8859-15", "ISO-8859-2",
274                 "ISO-8859-4", "ISO-8859-5", "ISO-8859-6", "ISO-8859-7",
275                 "ISO-8859-8", "ISO-8859-9", "TIS-620", "UTF-8", };
276             for(int i = 0; i < codesets.length; i++) {
277                 Charset cs0 = Charset.forName(codesets[i]);
278                 if (!"sun.nio.cs".equals(cs0.getClass().getPackage().getName())) {
279                     throw new Exception(cs0.getClass().getCanonicalName()+" faild");
280                 }
281                 Charset cs1 = Charset.forName(charsets[i]);
282                 if (!cs0.equals(cs1)) {
283                     throw new Exception(codesets[i]+"("+cs0.name()+") failed");
284                 }
285             }
286         }
287         for(Charset cs : Charset.availableCharsets().values()) {
288             String csName = cs.name().toLowerCase(Locale.ROOT);
289             String suffix = null;
290             HashSet<String> aliases = new HashSet<String>();
291             for(String s : cs.aliases()) {
292                 aliases.add(s.toLowerCase(Locale.ROOT));
293             }
294             aliases.add(csName);
295             if (csName.startsWith("x-ibm-")) {
296                 suffix = csName.replaceAll("x-ibm-0*", "");
297             } else if (csName.startsWith("x-ibm")) {
298                 suffix = csName.replaceAll("x-ibm0*", "");
299             } else if (csName.startsWith("ibm-")) {
300                 suffix = csName.replaceAll("ibm-0*", "");
301             } else if (csName.startsWith("ibm")) {
302                 suffix = csName.replaceAll("ibm0*", "");
303             }
304             if ("ibm-thai".equals(csName)) {
305                 suffix = "838";
306             }
307             if (null != suffix) {
308                 while (suffix.length() < 3) {
309                     suffix = "0"+suffix;
310                 }
311                 if (!aliases.contains("cp"+suffix)) {
312                     throw new Exception(cs.name()+"\t"+"cp"+suffix);
313                 }
314                 if (!aliases.contains("ibm"+suffix)) {
315                     throw new Exception(cs.name()+"\t"+"ibm"+suffix);
316                 }
317                 if (!aliases.contains("ibm-"+suffix)) {
318                     throw new Exception(cs.name()+"\t"+"ibm-"+suffix);
319                 }
320                 if (!aliases.contains(suffix)) {
321                     throw new Exception(cs.name()+"\t"+suffix);
322                 }
323             }
324         }
325     }
326 
327     // Following test data is for 8235834
328     private static final byte[] byteIBM943c2b = new byte[] {
329         (byte)0x81, (byte)0x5C, (byte)0x81, (byte)0x60,
330         (byte)0x81, (byte)0x61, (byte)0x81, (byte)0x7C,
331         (byte)0x88, (byte)0xA0, (byte)0x89, (byte)0x8B,
332         (byte)0x89, (byte)0xA8, (byte)0x8A, (byte)0x9A,
333         (byte)0x8B, (byte)0xA0, (byte)0x8B, (byte)0xEB,
334         (byte)0x8C, (byte)0x71, (byte)0x8C, (byte)0x74,
335         (byte)0x8C, (byte)0xB2, (byte)0x8D, (byte)0x8D,
336         (byte)0x8D, (byte)0xF2, (byte)0x8E, (byte)0xC6,
337         (byte)0x8F, (byte)0x4A, (byte)0x8F, (byte)0xD3,
338         (byte)0x8F, (byte)0xDD, (byte)0x90, (byte)0xE4,
339         (byte)0x91, (byte)0x7E, (byte)0x91, (byte)0x89,
340         (byte)0x91, (byte)0xCB, (byte)0x92, (byte)0x5C,
341         (byte)0x92, (byte)0xCD, (byte)0x93, (byte)0x55,
342         (byte)0x93, (byte)0x5E, (byte)0x93, (byte)0x98,
343         (byte)0x93, (byte)0xC0, (byte)0x94, (byte)0x58,
344         (byte)0x94, (byte)0x8D, (byte)0x94, (byte)0xAC,
345         (byte)0x94, (byte)0xAE, (byte)0x96, (byte)0x6A,
346         (byte)0x96, (byte)0xCB, (byte)0x97, (byte)0x89,
347         (byte)0x98, (byte)0x58, (byte)0x9B, (byte)0xA0,
348         (byte)0x9D, (byte)0xB7, (byte)0x9E, (byte)0x94,
349         (byte)0xE3, (byte)0x79, (byte)0xE4, (byte)0x45,
350         (byte)0xE8, (byte)0xF6, (byte)0xFA, (byte)0x55,
351         (byte)0xFA, (byte)0x59,
352     };
353 
354     private static final String strIBM943c2b1 =
355         "\u2015\uFF5E\u2225\uFF0D\u555E\u7130\u9DD7\u5699" +
356         "\u4FE0\u8EC0\u7E6B\u8346\u9E7C\u9EB4\u6805\u5C62" +
357         "\u7E61\u8523\u91AC\u87EC\u6414\u7626\u9A52\u7C1E" +
358         "\u6451\u5861\u985A\u79B1\u7006\u56CA\u525D\u6F51" +
359         "\u91B1\u9830\u9EB5\u840A\u881F\u5C5B\u6522\u688E" +
360         "\u7E48\u8141\u9839\uFFE4\uF86F";
361 
362     private static final String strIBM943c2b2 =
363         "\u2014\u301C\u2016\u2212\u5516\u7114\u9D0E\u565B" +
364         "\u4FA0\u8EAF\u7E4B\u834A\u9E78\u9EB9\u67F5\u5C61" +
365         "\u7E4D\u848B\u91A4\u8749\u63BB\u75E9\u9A28\u7BAA" +
366         "\u63B4\u586B\u985B\u7977\u6D9C\u56A2\u5265\u6E8C" +
367         "\u9197\u982C\u9EBA\u83B1\u874B\u5C4F\u6505\u688D" +
368         "\u7E66\u80FC\u983D\u00A6\u2116";
369 
bug8235834()370     private static void bug8235834 () throws Exception {
371         // 8235834 affects IBM-943 and IBM-943C encoder.
372         // The decoded results of the corresponding characters of IBM-943
373         // and IBM-943C is the same.
374         for (String csName : new String[] {"x-IBM943", "x-IBM943C"}) {
375             Charset cs = Charset.forName(csName);
376             if (!Arrays.equals(byteIBM943c2b, strIBM943c2b1.getBytes(cs))) {
377                 throw new Exception(csName+" failed to encode");
378             }
379             if (!strIBM943c2b2.equals(new String(byteIBM943c2b, cs))) {
380                 throw new Exception(csName+" failed to round-trip conversion");
381             }
382         }
383     }
384 
main(String[] args)385     public static void main (String[] args) throws Exception {
386         bug6577466();
387         // need to be tested before any other IBM949C test case
388         bug6639450();
389         bug6371437();
390         bug6371422();
391         bug6371416();
392         bug6371619();
393         bug6371431();
394         bug6569191();
395         bug8202329();
396         bug8212794();
397         bug8213618();
398         bug8220281();
399         bug8235834();
400     }
401 }
402