1 /*
2  * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * This code is free software; you can redistribute it and/or modify it
6  * under the terms of the GNU General Public License version 2 only, as
7  * published by the Free Software Foundation.
8  *
9  * This code is distributed in the hope that it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12  * version 2 for more details (a copy is included in the LICENSE file that
13  * accompanied this code).
14  *
15  * You should have received a copy of the GNU General Public License version
16  * 2 along with this work; if not, write to the Free Software Foundation,
17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18  *
19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20  * or visit www.oracle.com if you need additional information or have any
21  * questions.
22  */
23 
24 /* @test
25  * @bug 6636323 6636319 7040220 7096080 7183053 8080248 8054307
26  * @summary Test if StringCoding and NIO result have the same de/encoding result
27  * @modules java.base/sun.nio.cs
28  * @run main/othervm/timeout=2000 TestStringCoding
29  * @key randomness
30  */
31 
32 import java.util.*;
33 import java.nio.*;
34 import java.nio.charset.*;
35 
36 public class TestStringCoding {
main(String[] args)37     public static void main(String[] args) throws Throwable {
38 
39         // full bmp first
40         char[] bmp = new char[0x10000];
41         for (int i = 0; i < 0x10000; i++) {
42             bmp[i] = (char)i;
43         }
44         char[] latin = Arrays.copyOf(bmp, 0x100);
45         char[] ascii =  Arrays.copyOf(bmp, 0x80);
46 
47         byte[] latinBA = new byte[0x100];
48         for (int i = 0; i < 0x100; i++) {
49             latinBA[i] = (byte)i;
50         }
51         byte[] asciiBA =  Arrays.copyOf(latinBA, 0x80);
52 
53         for (Boolean hasSM: new boolean[] { false, true }) {
54             if (hasSM) {
55                 System.setSecurityManager(new PermissiveSecurityManger());
56             }
57             for (Charset cs:  Charset.availableCharsets().values()) {
58                 if ("ISO-2022-CN".equals(cs.name()) ||
59                     "x-COMPOUND_TEXT".equals(cs.name()) ||
60                     "x-JISAutoDetect".equals(cs.name()))
61                     continue;
62                 System.out.printf("Testing(sm=%b) " + cs.name() + "....", hasSM);
63 
64                 testNewString(cs, testGetBytes(cs, new String(bmp)));
65                 testNewString(cs, testGetBytes(cs, new String(latin)));
66                 testNewString(cs, testGetBytes(cs, new String(ascii)));
67                 testGetBytes(cs, testNewString(cs, latinBA));
68                 testGetBytes(cs, testNewString(cs, asciiBA));
69 
70                 // "randomed" sizes
71                 Random rnd = new Random();
72                 for (int i = 0; i < 10; i++) {
73                     //System.out.printf("    blen=%d, clen=%d%n", blen, clen);
74                     char[] bmp0 = Arrays.copyOf(bmp, rnd.nextInt(0x10000));
75                     testNewString(cs, testGetBytes(cs, new String(bmp0)));
76                     //add a pair of surrogates
77                     int pos = bmp0.length / 2;
78                     if ((pos + 1) < bmp0.length) {
79                         bmp0[pos] = '\uD800';
80                         bmp0[pos+1] = '\uDC00';
81                     }
82                     testNewString(cs, testGetBytes(cs, new String(bmp0)));
83 
84                     char[] latin0 = Arrays.copyOf(latin, rnd.nextInt(0x100));
85                     char[] ascii0 = Arrays.copyOf(ascii, rnd.nextInt(0x80));
86                     byte[] latinBA0 = Arrays.copyOf(latinBA, rnd.nextInt(0x100));
87                     byte[] asciiBA0 = Arrays.copyOf(asciiBA, rnd.nextInt(0x80));
88                     testNewString(cs, testGetBytes(cs, new String(latin0)));
89                     testNewString(cs, testGetBytes(cs, new String(ascii0)));
90                     testGetBytes(cs, testNewString(cs, latinBA0));
91                     testGetBytes(cs, testNewString(cs, asciiBA0));
92                 }
93                 testSurrogates(cs);
94                 testMixed(cs);
95                 System.out.println("done!");
96             }
97         }
98     }
99 
testMixed(Charset cs)100     static void testMixed(Charset cs) throws Throwable {
101         CharsetDecoder dec = cs.newDecoder()
102             .onMalformedInput(CodingErrorAction.REPLACE)
103             .onUnmappableCharacter(CodingErrorAction.REPLACE);
104         CharsetEncoder enc = cs.newEncoder()
105             .onMalformedInput(CodingErrorAction.REPLACE)
106             .onUnmappableCharacter(CodingErrorAction.REPLACE);
107         List<Integer> cps = new ArrayList<>(0x10000);
108         int off = 0;
109         int cp = 0;
110         while (cp < 0x10000) {
111             if (enc.canEncode((char)cp)) {
112                cps.add(cp);
113             }
114             cp++;
115         }
116         Collections.shuffle(cps);
117         char[] bmpCA = new char[cps.size()];
118         for (int i = 0; i < cps.size(); i++)
119             bmpCA[i] = (char)(int)cps.get(i);
120         String bmpStr = new String(bmpCA);
121         //getBytes(csn);
122         byte[] bmpBA = bmpStr.getBytes(cs.name());
123         ByteBuffer bf = enc.reset().encode(CharBuffer.wrap(bmpCA));
124         byte[] baNIO = new byte[bf.limit()];
125         bf.get(baNIO, 0, baNIO.length);
126         if (!Arrays.equals(bmpBA, baNIO)) {
127             throw new RuntimeException("getBytes(csn) failed  -> " + cs.name());
128         }
129 
130         //getBytes(cs);
131         bmpBA = bmpStr.getBytes(cs);
132         if (!Arrays.equals(bmpBA, baNIO)) {
133             throw new RuntimeException("getBytes(cs) failed  -> " + cs.name());
134         }
135 
136         //new String(csn);
137         String strSC = new String(bmpBA, cs.name());
138         String strNIO = dec.reset().decode(ByteBuffer.wrap(bmpBA)).toString();
139         if(!strNIO.equals(strSC)) {
140             throw new RuntimeException("new String(csn) failed  -> " + cs.name());
141         }
142         //new String(cs);
143         strSC = new String(bmpBA, cs);
144         if (!strNIO.equals(strSC)) {
145             throw new RuntimeException("new String(cs) failed  -> " + cs.name());
146         }
147     }
148 
getBytes(CharsetEncoder enc, String str)149     static byte[] getBytes(CharsetEncoder enc, String str) throws Throwable {
150         ByteBuffer bf = enc.reset().encode(CharBuffer.wrap(str.toCharArray()));
151         byte[] ba = new byte[bf.limit()];
152         bf.get(ba, 0, ba.length);
153         return ba;
154     }
155 
testGetBytes(Charset cs, String str)156     static byte[] testGetBytes(Charset cs, String str) throws Throwable {
157         CharsetEncoder enc = cs.newEncoder()
158             .onMalformedInput(CodingErrorAction.REPLACE)
159             .onUnmappableCharacter(CodingErrorAction.REPLACE);
160         //getBytes(csn);
161         byte[] baSC = str.getBytes(cs.name());
162         byte[] baNIO = getBytes(enc, str);
163         if (!Arrays.equals(baSC, baNIO)) {
164             throw new RuntimeException("getBytes(csn) failed  -> " + cs.name());
165         }
166         //getBytes(cs);
167         baSC = str.getBytes(cs);
168         if (!Arrays.equals(baSC, baNIO)) {
169             throw new RuntimeException("getBytes(cs) failed  -> " + cs.name());
170         }
171         return baSC;
172     }
173 
testNewString(Charset cs, byte[] ba)174     static String testNewString(Charset cs, byte[] ba) throws Throwable {
175         CharsetDecoder dec = cs.newDecoder()
176             .onMalformedInput(CodingErrorAction.REPLACE)
177             .onUnmappableCharacter(CodingErrorAction.REPLACE);
178         //new String(csn);
179         String strSC = new String(ba, cs.name());
180         String strNIO = dec.reset().decode(ByteBuffer.wrap(ba)).toString();
181         if(!strNIO.equals(strSC)) {
182             throw new RuntimeException("new String(csn) failed  -> " + cs.name());
183         }
184         //new String(cs);
185         strSC = new String(ba, cs);
186         if (!strNIO.equals(strSC)) {
187             throw new RuntimeException("new String(cs)/bmp failed  -> " + cs.name());
188         }
189         return strSC;
190     }
191 
testSurrogates(Charset cs)192     static void testSurrogates(Charset cs) throws Throwable {
193         //encode unmappable surrogates
194         CharsetEncoder enc = cs.newEncoder()
195             .onMalformedInput(CodingErrorAction.REPLACE)
196             .onUnmappableCharacter(CodingErrorAction.REPLACE);
197         if (enc instanceof sun.nio.cs.ArrayEncoder &&
198             cs.contains(Charset.forName("ASCII"))) {
199             if (cs.name().equals("UTF-8") ||     // utf8 handles surrogates
200                 cs.name().equals("CESU-8"))      // utf8 handles surrogates
201                 return;
202             enc.replaceWith(new byte[] { (byte)'A'});
203             sun.nio.cs.ArrayEncoder cae = (sun.nio.cs.ArrayEncoder)enc;
204 
205             String str = "ab\uD800\uDC00\uD800\uDC00cd";
206             byte[] ba = new byte[str.length() - 2];
207             int n = cae.encode(str.toCharArray(), 0, str.length(), ba);
208             if (n != 6 || !"abAAcd".equals(new String(ba, cs.name())))
209                 throw new RuntimeException("encode1(surrogates) failed  -> "
210                                            + cs.name());
211 
212             ba = new byte[str.length()];
213             n = cae.encode(str.toCharArray(), 0, str.length(), ba);
214             if (n != 6 || !"abAAcd".equals(new String(ba, 0, n,
215                                                      cs.name())))
216                 throw new RuntimeException("encode2(surrogates) failed  -> "
217                                            + cs.name());
218             str = "ab\uD800B\uDC00Bcd";
219             ba = new byte[str.length()];
220             n = cae.encode(str.toCharArray(), 0, str.length(), ba);
221             if (n != 8 || !"abABABcd".equals(new String(ba, 0, n,
222                                                        cs.name())))
223                 throw new RuntimeException("encode3(surrogates) failed  -> "
224                                            + cs.name());
225             /* sun.nio.cs.ArrayDeEncoder works on the assumption that the
226                invoker (StringCoder) allocates enough output buf, utf8
227                and double-byte coder does not check the output buffer limit.
228             ba = new byte[str.length() - 1];
229             n = cae.encode(str.toCharArray(), 0, str.length(), ba);
230             if (n != 7 || !"abABABc".equals(new String(ba, 0, n, cs.name()))) {
231                 throw new RuntimeException("encode4(surrogates) failed  -> "
232                                            + cs.name());
233             }
234             */
235         }
236 
237         //encode mappable surrogates for hkscs
238         if (cs.name().equals("Big5-HKSCS") || cs.name().equals("x-MS950-HKSCS")) {
239             String str = "ab\uD840\uDD0Ccd";
240             byte[] expected = new byte[] {(byte)'a', (byte)'b',
241                 (byte)0x88, (byte)0x45, (byte)'c', (byte)'d' };
242             if (!Arrays.equals(str.getBytes(cs.name()), expected) ||
243                 !Arrays.equals(str.getBytes(cs), expected)) {
244                 throw new RuntimeException("encode(surrogates) failed  -> "
245                                            + cs.name());
246             }
247         }
248     }
249 
250     static class PermissiveSecurityManger extends SecurityManager {
checkPermission(java.security.Permission p)251         @Override public void checkPermission(java.security.Permission p) {}
252     }
253 }
254