1 /* 2 * Copyright (c) 2008, 2020, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 */ 23 24 /* @test 25 * @bug 6371437 6371422 6371416 6371619 5058184 6371431 6639450 6569191 6577466 26 * 8212794 8220281 8235834 27 * @summary Check if the problems reported in above bugs have been fixed 28 * @modules jdk.charsets 29 */ 30 31 import java.io.*; 32 import java.nio.*; 33 import java.nio.charset.*; 34 import java.util.Arrays; 35 import java.util.Locale; 36 import java.util.HashSet; 37 38 public class TestIBMBugs { 39 bug6371437()40 private static void bug6371437() throws Exception { 41 CharsetEncoder converter = Charset.forName("Cp933").newEncoder(); 42 converter = converter.onMalformedInput(CodingErrorAction.REPORT); 43 converter = converter.onUnmappableCharacter(CodingErrorAction.REPORT); 44 CharBuffer in = CharBuffer.wrap(new char[] { (char)4352 }); 45 try { 46 ByteBuffer out = converter.encode(in); 47 } catch (CharacterCodingException e) { } 48 } 49 bug6371422()50 private static void bug6371422() throws Exception { 51 String[] charsets = { "Cp949", "Cp949C" }; 52 for (int n = 0; n < charsets.length; n++) { 53 String charset = charsets[n]; 54 CharsetEncoder converter = Charset.forName(charset).newEncoder(); 55 converter = converter.onMalformedInput(CodingErrorAction.REPORT); 56 converter = converter.onUnmappableCharacter(CodingErrorAction.REPORT); 57 int errors = 0; 58 for (int i = 1; i < 0x1ffff; i++) { 59 if (i >= 0x1100 && i <= 0x11f9) 60 continue; //Dont try leading consonant, vowel and trailing 61 //consonant as a single char 62 char[] in = (i < 0x10000 63 ? new char[] { (char)i } 64 : new char[] { (char)(0xd800 + ((i - 0x10000) >> 10)), 65 (char)(0xdc00 + ((i - 0x10000) & 0x3ff)) }); 66 67 try { 68 ByteBuffer out = converter.encode(CharBuffer.wrap(in)); 69 if (out.remaining() == 0 || 70 (out.remaining() == 1 && out.get(0) == 0x00)) { 71 errors++; 72 } 73 } catch (CharacterCodingException e) { } 74 } 75 if (errors > 0) 76 throw new Exception("Charset "+charset+": "+errors+" errors"); 77 } 78 } 79 bug6371416()80 private static void bug6371416() throws Exception { 81 String[] charsets = { "Cp933", "Cp949", "Cp949C", "Cp970"}; 82 for (int n = 0; n < charsets.length; n++) { 83 String charset = charsets[n]; 84 CharsetEncoder converter = Charset.forName(charset).newEncoder(); 85 converter = converter.onMalformedInput(CodingErrorAction.REPORT); 86 converter = converter.onUnmappableCharacter(CodingErrorAction.REPORT); 87 int errors = 0; 88 for (int i = 0xd800; i < 0xe000; i++) { 89 char[] in = new char[] { (char)i }; 90 try { 91 ByteBuffer out = converter.encode(CharBuffer.wrap(in)); 92 if (out.remaining() == 0) 93 errors++; 94 } catch (CharacterCodingException e) { } 95 } 96 if (errors > 0) 97 throw new Exception("Charset "+charset+": "+errors+" errors"); 98 } 99 } 100 bug6371619()101 private static void bug6371619() throws Exception { 102 String encoding = "Cp964"; 103 Charset charset = Charset.forName(encoding); 104 CharsetDecoder converter = charset.newDecoder(); 105 converter = converter.onMalformedInput(CodingErrorAction.REPORT); 106 converter = converter.onUnmappableCharacter(CodingErrorAction.REPORT); 107 int errors = 0; 108 for (int b = 0x80; b < 0x100; b++) 109 if (!(b == 0x8e || // 0x8e is a SS2 110 (b >= 0x80 && b <= 0x8d) || (b >= 0x90 && b <= 0x9f))) { 111 ByteBuffer in = ByteBuffer.wrap(new byte[] { (byte)b }); 112 try { 113 CharBuffer out = converter.decode(in); 114 if (out.length() == 0) { 115 errors++; 116 } 117 } catch (CharacterCodingException e) { } 118 } 119 if (errors > 0) 120 throw new Exception("Charset "+charset+": "+errors+" errors"); 121 } 122 123 bug6371431()124 private static void bug6371431() throws Exception { 125 String encoding = "Cp33722"; 126 Charset charset = Charset.forName(encoding); 127 CharsetDecoder converter = charset.newDecoder(); 128 converter = converter.onMalformedInput(CodingErrorAction.REPORT); 129 converter = converter.onUnmappableCharacter(CodingErrorAction.REPORT); 130 int errors = 0; 131 for (int b = 0xa0; b < 0x100; b++) { 132 ByteBuffer in = ByteBuffer.wrap(new byte[] { (byte)b }); 133 try { 134 CharBuffer out = converter.decode(in); 135 if (out.length() == 0) { 136 errors++; 137 } 138 } catch (CharacterCodingException e) { } 139 } 140 if (errors > 0) 141 throw new Exception("Charset "+charset+": "+errors+" errors"); 142 } 143 bug6639450()144 private static void bug6639450 () throws Exception { 145 byte[] bytes1 = "\\".getBytes("IBM949"); 146 "\\".getBytes("IBM949C"); 147 byte[] bytes2 = "\\".getBytes("IBM949"); 148 if (bytes1.length != 1 || bytes2.length != 1 || 149 bytes1[0] != (byte)0x82 || 150 bytes2[0] != (byte)0x82) 151 throw new Exception("IBM949/IBM949C failed"); 152 } 153 bug6569191()154 private static void bug6569191 () throws Exception { 155 byte[] bs = new byte[] { (byte)0x81, (byte)0xad, // fffd ff6d 156 (byte)0x81, (byte)0xae, // fffd ff6e 157 (byte)0x81, (byte)0xaf, // fffd ff6f 158 (byte)0x81, (byte)0xb0, // fffd ff70 159 (byte)0x85, (byte)0x81, // fffd -> 160 (byte)0x85, (byte)0x87, // 2266 -> 161 (byte)0x85, (byte)0xe0, // 32a4 -> 162 (byte)0x85, (byte)0xf0 };// 7165 fffd 163 String s = new String(bs, "Cp943"); 164 // see DoubleByte for how the unmappables are handled 165 if (!"\ufffd\uff6d\ufffd\uff6e\ufffd\uff6f\ufffd\uff70\ufffd\u2266\u32a4\u7165\ufffd" 166 .equals(s)) 167 throw new Exception("Cp943 failed"); 168 } 169 170 bug6577466()171 private static void bug6577466 () throws Exception { 172 for (int c = Character.MIN_VALUE; c <= Character.MAX_VALUE; c++){ 173 if (!Character.isDefined((char)c)) continue; 174 String s = String.valueOf((char)c); 175 byte[] bb = null; 176 bb = s.getBytes("x-IBM970"); 177 } 178 } 179 bug8213618()180 private static void bug8213618 () throws Exception { 181 String cs = "x-IBM970"; 182 byte[] ba = new byte[]{(byte)0xA2,(byte)0xC1}; 183 String s = "\u25C9"; 184 if (!(new String(ba, cs)).equals(s)) 185 throw new Exception("Cp970 failed"); 186 if (!Arrays.equals(ba, s.getBytes(cs))) 187 throw new Exception("Cp970 failed"); 188 ba = new byte[]{0x3f,0x3f,0x3f}; 189 if (!Arrays.equals(ba, "\u6950\u84f1\ucf7f".getBytes(cs))) 190 throw new Exception("Cp970 failed"); 191 } 192 bug8202329()193 private static void bug8202329() throws Exception { 194 String original = "\\\u007E\u00A5\u203E"; // [backslash][tilde][yen][overscore] 195 byte[] expectedBytes; // bytes after conversion 196 String expectedStringfromBytes; // String constructed from bytes 197 198 Charset charset; // charset used for conversion 199 200 ByteBuffer bb; // Buffer that holds encoded bytes 201 byte[] ba; // byte array that holds encoded bytes 202 203 CharBuffer cb; // Buffer that holds decoded chars 204 205 206 // Test IBM943, where \ and ~ are encoded to unmappable i.e., 0x3f 207 // and [yen] and [overscore] are encoded to 0x5c and 0x7e 208 charset = Charset.forName("IBM943"); 209 expectedBytes = new byte[] {0x3f, 0x3f, 0x5c, 0x7e}; 210 expectedStringfromBytes = "??\u00A5\u203E"; 211 bb = charset.encode(original); 212 ba = new byte[bb.remaining()]; 213 bb.get(ba, 0, ba.length); 214 if(!Arrays.equals(ba, expectedBytes)) { 215 throw new Exception("IBM943 failed to encode"); 216 } 217 cb = charset.decode(ByteBuffer.wrap(expectedBytes)); 218 if(!cb.toString().equals(expectedStringfromBytes)) { 219 throw new Exception("IBM943 failed to decode"); 220 } 221 222 223 // Test IBM943C, where \ and ~ are encoded to 0x5c and 0x7e 224 // and [yen] and [overscore] are encoded to 0x5c and 0x7e 225 charset = Charset.forName("IBM943C"); 226 expectedBytes = new byte[] {0x5c, 0x7e, 0x5c, 0x7e}; 227 expectedStringfromBytes = "\\~\\~"; 228 bb = charset.encode(original); 229 ba = new byte[bb.remaining()]; 230 bb.get(ba, 0, ba.length); 231 if(!Arrays.equals(ba, expectedBytes)) { 232 throw new Exception("IBM943C failed to encode"); 233 } 234 cb = charset.decode(ByteBuffer.wrap(expectedBytes)); 235 if(!cb.toString().equals(expectedStringfromBytes)) { 236 throw new Exception("IBM943C failed to decode"); 237 } 238 } 239 bug8212794()240 private static void bug8212794 () throws Exception { 241 Charset cs = Charset.forName("x-IBM964"); 242 byte[] ba = new byte[] {(byte)0x5c, (byte)0x90, (byte)0xa1, (byte)0xa1}; 243 char[] ca = new char[] {'\\', '\u0090', '\u3000'}; 244 ByteBuffer bb = ByteBuffer.wrap(ba); 245 CharBuffer cb = cs.decode(bb); 246 if(!Arrays.equals(ca, Arrays.copyOf(cb.array(), cb.limit()))) { 247 throw new Exception("IBM964 failed to decode"); 248 } 249 cb = CharBuffer.wrap(ca); 250 bb = cs.encode(cb); 251 if(!Arrays.equals(ba, Arrays.copyOf(bb.array(), bb.limit()))) { 252 throw new Exception("IBM964 failed to encode"); 253 } 254 } 255 bug8220281()256 private static void bug8220281 () throws Exception { 257 if (System.getProperty("os.name").contains("AIX")) { 258 /* Following AIX codesets are used for Java default charset. */ 259 /* They should be in sun.nio.cs package on AIX platform. */ 260 String[] codesets = new String[] { 261 "IBM-950", "BIG5-HKSCS", "GB18030", "IBM-1046", 262 "IBM-1124", "IBM-1129", "IBM-1252", "IBM-856", 263 "IBM-858", "IBM-921", "IBM-922", "IBM-932", "IBM-943C", 264 "IBM-eucCN", "IBM-eucJP", "IBM-eucKR", "IBM-eucTW", 265 "ISO8859-1", "ISO8859-15", "ISO8859-2", "ISO8859-4", 266 "ISO8859-5", "ISO8859-6", "ISO8859-7", "ISO8859-8", 267 "ISO8859-9", "TIS-620", "UTF-8", }; 268 String[] charsets = new String[] { 269 "x-IBM950", "Big5-HKSCS", "GB18030", "x-IBM1046", 270 "x-IBM1124", "x-IBM1129", "windows-1252", "x-IBM856", 271 "IBM00858", "x-IBM921", "x-IBM922", "x-IBM942C", 272 "x-IBM943C", "x-IBM1383", "x-IBM29626C", "x-IBM970", 273 "x-IBM964", "ISO-8859-1", "ISO-8859-15", "ISO-8859-2", 274 "ISO-8859-4", "ISO-8859-5", "ISO-8859-6", "ISO-8859-7", 275 "ISO-8859-8", "ISO-8859-9", "TIS-620", "UTF-8", }; 276 for(int i = 0; i < codesets.length; i++) { 277 Charset cs0 = Charset.forName(codesets[i]); 278 if (!"sun.nio.cs".equals(cs0.getClass().getPackage().getName())) { 279 throw new Exception(cs0.getClass().getCanonicalName()+" faild"); 280 } 281 Charset cs1 = Charset.forName(charsets[i]); 282 if (!cs0.equals(cs1)) { 283 throw new Exception(codesets[i]+"("+cs0.name()+") failed"); 284 } 285 } 286 } 287 for(Charset cs : Charset.availableCharsets().values()) { 288 String csName = cs.name().toLowerCase(Locale.ROOT); 289 String suffix = null; 290 HashSet<String> aliases = new HashSet<String>(); 291 for(String s : cs.aliases()) { 292 aliases.add(s.toLowerCase(Locale.ROOT)); 293 } 294 aliases.add(csName); 295 if (csName.startsWith("x-ibm-")) { 296 suffix = csName.replaceAll("x-ibm-0*", ""); 297 } else if (csName.startsWith("x-ibm")) { 298 suffix = csName.replaceAll("x-ibm0*", ""); 299 } else if (csName.startsWith("ibm-")) { 300 suffix = csName.replaceAll("ibm-0*", ""); 301 } else if (csName.startsWith("ibm")) { 302 suffix = csName.replaceAll("ibm0*", ""); 303 } 304 if ("ibm-thai".equals(csName)) { 305 suffix = "838"; 306 } 307 if (null != suffix) { 308 while (suffix.length() < 3) { 309 suffix = "0"+suffix; 310 } 311 if (!aliases.contains("cp"+suffix)) { 312 throw new Exception(cs.name()+"\t"+"cp"+suffix); 313 } 314 if (!aliases.contains("ibm"+suffix)) { 315 throw new Exception(cs.name()+"\t"+"ibm"+suffix); 316 } 317 if (!aliases.contains("ibm-"+suffix)) { 318 throw new Exception(cs.name()+"\t"+"ibm-"+suffix); 319 } 320 if (!aliases.contains(suffix)) { 321 throw new Exception(cs.name()+"\t"+suffix); 322 } 323 } 324 } 325 } 326 327 // Following test data is for 8235834 328 private static final byte[] byteIBM943c2b = new byte[] { 329 (byte)0x81, (byte)0x5C, (byte)0x81, (byte)0x60, 330 (byte)0x81, (byte)0x61, (byte)0x81, (byte)0x7C, 331 (byte)0x88, (byte)0xA0, (byte)0x89, (byte)0x8B, 332 (byte)0x89, (byte)0xA8, (byte)0x8A, (byte)0x9A, 333 (byte)0x8B, (byte)0xA0, (byte)0x8B, (byte)0xEB, 334 (byte)0x8C, (byte)0x71, (byte)0x8C, (byte)0x74, 335 (byte)0x8C, (byte)0xB2, (byte)0x8D, (byte)0x8D, 336 (byte)0x8D, (byte)0xF2, (byte)0x8E, (byte)0xC6, 337 (byte)0x8F, (byte)0x4A, (byte)0x8F, (byte)0xD3, 338 (byte)0x8F, (byte)0xDD, (byte)0x90, (byte)0xE4, 339 (byte)0x91, (byte)0x7E, (byte)0x91, (byte)0x89, 340 (byte)0x91, (byte)0xCB, (byte)0x92, (byte)0x5C, 341 (byte)0x92, (byte)0xCD, (byte)0x93, (byte)0x55, 342 (byte)0x93, (byte)0x5E, (byte)0x93, (byte)0x98, 343 (byte)0x93, (byte)0xC0, (byte)0x94, (byte)0x58, 344 (byte)0x94, (byte)0x8D, (byte)0x94, (byte)0xAC, 345 (byte)0x94, (byte)0xAE, (byte)0x96, (byte)0x6A, 346 (byte)0x96, (byte)0xCB, (byte)0x97, (byte)0x89, 347 (byte)0x98, (byte)0x58, (byte)0x9B, (byte)0xA0, 348 (byte)0x9D, (byte)0xB7, (byte)0x9E, (byte)0x94, 349 (byte)0xE3, (byte)0x79, (byte)0xE4, (byte)0x45, 350 (byte)0xE8, (byte)0xF6, (byte)0xFA, (byte)0x55, 351 (byte)0xFA, (byte)0x59, 352 }; 353 354 private static final String strIBM943c2b1 = 355 "\u2015\uFF5E\u2225\uFF0D\u555E\u7130\u9DD7\u5699" + 356 "\u4FE0\u8EC0\u7E6B\u8346\u9E7C\u9EB4\u6805\u5C62" + 357 "\u7E61\u8523\u91AC\u87EC\u6414\u7626\u9A52\u7C1E" + 358 "\u6451\u5861\u985A\u79B1\u7006\u56CA\u525D\u6F51" + 359 "\u91B1\u9830\u9EB5\u840A\u881F\u5C5B\u6522\u688E" + 360 "\u7E48\u8141\u9839\uFFE4\uF86F"; 361 362 private static final String strIBM943c2b2 = 363 "\u2014\u301C\u2016\u2212\u5516\u7114\u9D0E\u565B" + 364 "\u4FA0\u8EAF\u7E4B\u834A\u9E78\u9EB9\u67F5\u5C61" + 365 "\u7E4D\u848B\u91A4\u8749\u63BB\u75E9\u9A28\u7BAA" + 366 "\u63B4\u586B\u985B\u7977\u6D9C\u56A2\u5265\u6E8C" + 367 "\u9197\u982C\u9EBA\u83B1\u874B\u5C4F\u6505\u688D" + 368 "\u7E66\u80FC\u983D\u00A6\u2116"; 369 bug8235834()370 private static void bug8235834 () throws Exception { 371 // 8235834 affects IBM-943 and IBM-943C encoder. 372 // The decoded results of the corresponding characters of IBM-943 373 // and IBM-943C is the same. 374 for (String csName : new String[] {"x-IBM943", "x-IBM943C"}) { 375 Charset cs = Charset.forName(csName); 376 if (!Arrays.equals(byteIBM943c2b, strIBM943c2b1.getBytes(cs))) { 377 throw new Exception(csName+" failed to encode"); 378 } 379 if (!strIBM943c2b2.equals(new String(byteIBM943c2b, cs))) { 380 throw new Exception(csName+" failed to round-trip conversion"); 381 } 382 } 383 } 384 main(String[] args)385 public static void main (String[] args) throws Exception { 386 bug6577466(); 387 // need to be tested before any other IBM949C test case 388 bug6639450(); 389 bug6371437(); 390 bug6371422(); 391 bug6371416(); 392 bug6371619(); 393 bug6371431(); 394 bug6569191(); 395 bug8202329(); 396 bug8212794(); 397 bug8213618(); 398 bug8220281(); 399 bug8235834(); 400 } 401 } 402