1 /*
2  * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * This code is free software; you can redistribute it and/or modify it
6  * under the terms of the GNU General Public License version 2 only, as
7  * published by the Free Software Foundation.
8  *
9  * This code is distributed in the hope that it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12  * version 2 for more details (a copy is included in the LICENSE file that
13  * accompanied this code).
14  *
15  * You should have received a copy of the GNU General Public License version
16  * 2 along with this work; if not, write to the Free Software Foundation,
17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18  *
19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20  * or visit www.oracle.com if you need additional information or have any
21  * questions.
22  */
23 
24 /* @test
25  * @bug 4153987
26  * @summary Malformed surrogates should be handled by the converter in
27  * substitution mode.
28  */
29 import java.io.*;
30 import java.nio.charset.Charset;
31 import java.nio.charset.CharsetDecoder;
32 import java.nio.charset.CharsetEncoder;
33 import java.nio.CharBuffer;
34 import java.nio.ByteBuffer;
35 import java.nio.charset.CodingErrorAction;
36 import java.nio.charset.MalformedInputException;
main(String[] args)37 import java.nio.charset.UnmappableCharacterException;
38 import java.util.SortedMap;
39 
40 public class MalformedSurrogates {
41 
42     private static final String PREFIX = "abc";
43     private static final String SUFFIX = "efgh";
44     private static final String MALFORMED_SURROGATE = PREFIX + "\uD800\uDB00" + SUFFIX;
45     private static final String NORMAL_SURROGATE = PREFIX + "\uD800\uDC00" + SUFFIX;
46     private static final String REVERSED_SURROGATE = PREFIX + "\uDC00\uD800" + SUFFIX;
47     private static final String SOLITARY_HIGH_SURROGATE = PREFIX + "\uD800" + SUFFIX;
48     private static final String SOLITARY_LOW_SURROGATE = PREFIX + "\uDC00" + SUFFIX;
49 
50     public static void main(String[] args) throws IOException {
51         SortedMap<String, Charset> map = Charset.availableCharsets();
52         for (String name : map.keySet()) {
53             Charset charset = map.get(name);
54             if (charset.canEncode() && !charset.name().equals("x-COMPOUND_TEXT")) {
55                 testNormalSurrogate(charset, NORMAL_SURROGATE);
56                 testMalformedSurrogate(charset, MALFORMED_SURROGATE);
57                 testMalformedSurrogate(charset, REVERSED_SURROGATE);
58                 testMalformedSurrogate(charset, SOLITARY_HIGH_SURROGATE);
59                 testMalformedSurrogate(charset, SOLITARY_LOW_SURROGATE);
60                 testSurrogateWithReplacement(charset, NORMAL_SURROGATE);
61                 testSurrogateWithReplacement(charset, MALFORMED_SURROGATE);
62                 testSurrogateWithReplacement(charset, REVERSED_SURROGATE);
63                 testSurrogateWithReplacement(charset, SOLITARY_HIGH_SURROGATE);
64                 testSurrogateWithReplacement(charset, SOLITARY_LOW_SURROGATE);
65             }
66         }
67     }
68 
69     public static void testMalformedSurrogate(Charset cs, String surrogate) throws IOException {
70         CharsetEncoder en = cs.newEncoder();
71         if (en.canEncode(surrogate)) {
72             throw new RuntimeException("testMalformedSurrogate failed with charset " + cs.name());
73         }
74 
75         try {
76             en.encode(CharBuffer.wrap(surrogate));
77             throw new RuntimeException("Should throw MalformedInputException or UnmappableCharacterException");
78         } catch (MalformedInputException | UnmappableCharacterException ex) {
79         } finally {
80             en.reset();
81         }
82 
83         try (OutputStreamWriter osw = new OutputStreamWriter(new ByteArrayOutputStream(), en)) {
84             osw.write(surrogate);
85             throw new RuntimeException("Should throw MalformedInputException or UnmappableCharacterException");
86         } catch (MalformedInputException | UnmappableCharacterException ex) {
87         }
88     }
89 
90     public static void testNormalSurrogate(Charset cs, String surrogate) throws IOException {
91         CharsetEncoder en = cs.newEncoder();
92         try {
93             en.encode(CharBuffer.wrap(surrogate));
94         } catch (UnmappableCharacterException ex) {
95         } finally {
96             en.reset();
97         }
98 
99         try (OutputStreamWriter osw = new OutputStreamWriter(new ByteArrayOutputStream(), en)) {
100             osw.write(surrogate);
101         } catch (UnmappableCharacterException ex) {
102         }
103     }
104 
105     public static void testSurrogateWithReplacement(Charset cs, String surrogate) throws IOException {
106         CharsetEncoder en = cs.newEncoder();
107         CharsetDecoder de = cs.newDecoder();
108         if (!en.canEncode(NORMAL_SURROGATE)) {
109             return;
110         }
111         String expected = null;
112         String replace = new String(en.replacement(), cs);
113         switch (surrogate) {
114             case MALFORMED_SURROGATE:
115             case REVERSED_SURROGATE:
116                 expected = PREFIX + replace + replace + SUFFIX;
117                 break;
118             case SOLITARY_HIGH_SURROGATE:
119             case SOLITARY_LOW_SURROGATE:
120                 expected = PREFIX + replace + SUFFIX;
121                 break;
122             default:
123                 expected = NORMAL_SURROGATE;
124         }
125 
126         try {
127             en.onMalformedInput(CodingErrorAction.REPLACE);
128             en.onUnmappableCharacter(CodingErrorAction.REPLACE);
129             ByteBuffer bbuf = en.encode(CharBuffer.wrap(surrogate));
130             CharBuffer cbuf = de.decode(bbuf);
131             if (!cbuf.toString().equals(expected)) {
132                 throw new RuntimeException("charset " + cs.name() + " (en)decoded the surrogate " + surrogate + " to " + cbuf.toString() + " which is not same as the expected " + expected);
133             }
134         } finally {
135             en.reset();
136             de.reset();
137         }
138 
139         try (ByteArrayOutputStream bos = new ByteArrayOutputStream();
140                 OutputStreamWriter osw = new OutputStreamWriter(bos, en);) {
141             osw.write(surrogate);
142             osw.flush();
143             try (InputStreamReader isr = new InputStreamReader(new ByteArrayInputStream(bos.toByteArray()), de)) {
144                 CharBuffer cbuf = CharBuffer.allocate(expected.length());
145                 isr.read(cbuf);
146                 cbuf.rewind();
147                 if (!cbuf.toString().equals(expected)) {
148                     throw new RuntimeException("charset " + cs.name() + " (en)decoded the surrogate " + surrogate + " to " + cbuf.toString() + " which is not same as the expected " + expected);
149                 }
150             }
151         }
152     }
153 }
154