1 /*
2  * Copyright (c) 2009, 2021, Oracle and/or its affiliates. All rights reserved.
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * This code is free software; you can redistribute it and/or modify it
6  * under the terms of the GNU General Public License version 2 only, as
7  * published by the Free Software Foundation.  Oracle designates this
8  * particular file as subject to the "Classpath" exception as provided
9  * by Oracle in the LICENSE file that accompanied this code.
10  *
11  * This code is distributed in the hope that it will be useful, but WITHOUT
12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14  * version 2 for more details (a copy is included in the LICENSE file that
15  * accompanied this code).
16  *
17  * You should have received a copy of the GNU General Public License version
18  * 2 along with this work; if not, write to the Free Software Foundation,
19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20  *
21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22  * or visit www.oracle.com if you need additional information or have any
23  * questions.
24  */
25 
26 package sun.nio.cs;
27 
28 import java.nio.ByteBuffer;
29 import java.nio.CharBuffer;
30 import java.nio.charset.Charset;
31 import java.nio.charset.CharsetDecoder;
32 import java.nio.charset.CharsetEncoder;
33 import java.nio.charset.CoderResult;
34 import java.util.Arrays;
35 
36 import jdk.internal.access.JavaLangAccess;
37 import jdk.internal.access.SharedSecrets;
38 import sun.nio.cs.Surrogate;
39 import sun.nio.cs.ArrayDecoder;
40 import sun.nio.cs.ArrayEncoder;
41 import static sun.nio.cs.CharsetMapping.*;
42 
43 /*
44  * Four types of "DoubleByte" charsets are implemented in this class
45  * (1)DoubleByte
46  *    The "mostly widely used" multibyte charset, a combination of
47  *    a singlebyte character set (usually the ASCII charset) and a
48  *    doublebyte character set. The codepoint values of singlebyte
49  *    and doublebyte don't overlap. Microsoft's multibyte charsets
50  *    and IBM's "DBCS_ASCII" charsets, such as IBM1381, 942, 943,
51  *    948, 949 and 950 are such charsets.
52  *
53  * (2)DoubleByte_EBCDIC
54  *    IBM EBCDIC Mix multibyte charset. Use SO and SI to shift (switch)
55  *    in and out between the singlebyte character set and doublebyte
56  *    character set.
57  *
58  * (3)DoubleByte_SIMPLE_EUC
59  *    It's a "simple" form of EUC encoding scheme, only have the
60  *    singlebyte character set G0 and one doublebyte character set
61  *    G1 are defined, G2 (with SS2) and G3 (with SS3) are not used.
62  *    So it is actually the same as the "typical" type (1) mentioned
63  *    above, except it return "malformed" for the SS2 and SS3 when
64  *    decoding.
65  *
66  * (4)DoubleByte ONLY
67  *    A "pure" doublebyte only character set. From implementation
68  *    point of view, this is the type (1) with "decodeSingle" always
69  *    returns unmappable.
70  *
71  * For simplicity, all implementations share the same decoding and
72  * encoding data structure.
73  *
74  * Decoding:
75  *
76  *    char[][] b2c;
77  *    char[] b2cSB;
78  *    int b2Min, b2Max
79  *
80  *    public char decodeSingle(int b) {
81  *        return b2cSB.[b];
82  *    }
83  *
84  *    public char decodeDouble(int b1, int b2) {
85  *        if (b2 < b2Min || b2 > b2Max)
86  *            return UNMAPPABLE_DECODING;
87  *         return b2c[b1][b2 - b2Min];
88  *    }
89  *
90  *    (1)b2Min, b2Max are the corresponding min and max value of the
91  *       low-half of the double-byte.
92  *    (2)The high 8-bit/b1 of the double-byte are used to indexed into
93  *       b2c array.
94  *
95  * Encoding:
96  *
97  *    char[] c2b;
98  *    char[] c2bIndex;
99  *
100  *    public int encodeChar(char ch) {
101  *        return c2b[c2bIndex[ch >> 8] + (ch & 0xff)];
102  *    }
103  *
104  */
105 
106 public class DoubleByte {
107 
108     public static final char[] B2C_UNMAPPABLE;
109     static {
110         B2C_UNMAPPABLE = new char[0x100];
Arrays.fill(B2C_UNMAPPABLE, UNMAPPABLE_DECODING)111         Arrays.fill(B2C_UNMAPPABLE, UNMAPPABLE_DECODING);
112     }
113 
114     public static class Decoder extends CharsetDecoder
115                                 implements DelegatableDecoder, ArrayDecoder
116     {
117         private static final JavaLangAccess JLA = SharedSecrets.getJavaLangAccess();
118 
119         final char[][] b2c;
120         final char[] b2cSB;
121         final int b2Min;
122         final int b2Max;
123         final boolean isASCIICompatible;
124 
125         // for SimpleEUC override
crMalformedOrUnderFlow(int b)126         protected CoderResult crMalformedOrUnderFlow(int b) {
127             return CoderResult.UNDERFLOW;
128         }
129 
crMalformedOrUnmappable(int b1, int b2)130         protected CoderResult crMalformedOrUnmappable(int b1, int b2) {
131             if (b2c[b1] == B2C_UNMAPPABLE ||                // isNotLeadingByte(b1)
132                 b2c[b2] != B2C_UNMAPPABLE ||                // isLeadingByte(b2)
133                 decodeSingle(b2) != UNMAPPABLE_DECODING) {  // isSingle(b2)
134                 return CoderResult.malformedForLength(1);
135             }
136             return CoderResult.unmappableForLength(2);
137         }
138 
Decoder(Charset cs, float avgcpb, float maxcpb, char[][] b2c, char[] b2cSB, int b2Min, int b2Max, boolean isASCIICompatible)139         public Decoder(Charset cs, float avgcpb, float maxcpb,
140                        char[][] b2c, char[] b2cSB,
141                        int b2Min, int b2Max,
142                        boolean isASCIICompatible) {
143             super(cs, avgcpb, maxcpb);
144             this.b2c = b2c;
145             this.b2cSB = b2cSB;
146             this.b2Min = b2Min;
147             this.b2Max = b2Max;
148             this.isASCIICompatible = isASCIICompatible;
149         }
150 
Decoder(Charset cs, char[][] b2c, char[] b2cSB, int b2Min, int b2Max, boolean isASCIICompatible)151         public Decoder(Charset cs, char[][] b2c, char[] b2cSB, int b2Min, int b2Max,
152                        boolean isASCIICompatible) {
153             this(cs, 0.5f, 1.0f, b2c, b2cSB, b2Min, b2Max, isASCIICompatible);
154         }
155 
Decoder(Charset cs, char[][] b2c, char[] b2cSB, int b2Min, int b2Max)156         public Decoder(Charset cs, char[][] b2c, char[] b2cSB, int b2Min, int b2Max) {
157             this(cs, 0.5f, 1.0f, b2c, b2cSB, b2Min, b2Max, false);
158         }
159 
decodeArrayLoop(ByteBuffer src, CharBuffer dst)160         protected CoderResult decodeArrayLoop(ByteBuffer src, CharBuffer dst) {
161             byte[] sa = src.array();
162             int soff = src.arrayOffset();
163             int sp = soff + src.position();
164             int sl = soff + src.limit();
165 
166             char[] da = dst.array();
167             int doff = dst.arrayOffset();
168             int dp = doff + dst.position();
169             int dl = doff + dst.limit();
170 
171             try {
172                 if (isASCIICompatible) {
173                     int n = JLA.decodeASCII(sa, sp, da, dp, Math.min(dl - dp, sl - sp));
174                     dp += n;
175                     sp += n;
176                 }
177                 while (sp < sl && dp < dl) {
178                     // inline the decodeSingle/Double() for better performance
179                     int inSize = 1;
180                     int b1 = sa[sp] & 0xff;
181                     char c = b2cSB[b1];
182                     if (c == UNMAPPABLE_DECODING) {
183                         if (sl - sp < 2)
184                             return crMalformedOrUnderFlow(b1);
185                         int b2 = sa[sp + 1] & 0xff;
186                         if (b2 < b2Min || b2 > b2Max ||
187                             (c = b2c[b1][b2 - b2Min]) == UNMAPPABLE_DECODING) {
188                             return crMalformedOrUnmappable(b1, b2);
189                         }
190                         inSize++;
191                     }
192                     da[dp++] = c;
193                     sp += inSize;
194                 }
195                 return (sp >= sl) ? CoderResult.UNDERFLOW
196                                   : CoderResult.OVERFLOW;
197             } finally {
198                 src.position(sp - soff);
199                 dst.position(dp - doff);
200             }
201         }
202 
decodeBufferLoop(ByteBuffer src, CharBuffer dst)203         protected CoderResult decodeBufferLoop(ByteBuffer src, CharBuffer dst) {
204             int mark = src.position();
205             try {
206 
207                 while (src.hasRemaining() && dst.hasRemaining()) {
208                     int b1 = src.get() & 0xff;
209                     char c = b2cSB[b1];
210                     int inSize = 1;
211                     if (c == UNMAPPABLE_DECODING) {
212                         if (src.remaining() < 1)
213                             return crMalformedOrUnderFlow(b1);
214                         int b2 = src.get() & 0xff;
215                         if (b2 < b2Min || b2 > b2Max ||
216                             (c = b2c[b1][b2 - b2Min]) == UNMAPPABLE_DECODING)
217                             return crMalformedOrUnmappable(b1, b2);
218                         inSize++;
219                     }
220                     dst.put(c);
221                     mark += inSize;
222                 }
223                 return src.hasRemaining()? CoderResult.OVERFLOW
224                                          : CoderResult.UNDERFLOW;
225             } finally {
226                 src.position(mark);
227             }
228         }
229 
230         // Make some protected methods public for use by JISAutoDetect
decodeLoop(ByteBuffer src, CharBuffer dst)231         public CoderResult decodeLoop(ByteBuffer src, CharBuffer dst) {
232             if (src.hasArray() && dst.hasArray())
233                 return decodeArrayLoop(src, dst);
234             else
235                 return decodeBufferLoop(src, dst);
236         }
237 
238         @Override
decode(byte[] src, int sp, int len, char[] dst)239         public int decode(byte[] src, int sp, int len, char[] dst) {
240             int dp = 0;
241             int sl = sp + len;
242             char repl = replacement().charAt(0);
243             while (sp < sl) {
244                 int b1 = src[sp++] & 0xff;
245                 char c = b2cSB[b1];
246                 if (c == UNMAPPABLE_DECODING) {
247                     if (sp < sl) {
248                         int b2 = src[sp++] & 0xff;
249                         if (b2 < b2Min || b2 > b2Max ||
250                             (c = b2c[b1][b2 - b2Min]) == UNMAPPABLE_DECODING) {
251                             if (crMalformedOrUnmappable(b1, b2).length() == 1) {
252                                 sp--;
253                             }
254                         }
255                     }
256                     if (c == UNMAPPABLE_DECODING) {
257                          c = repl;
258                     }
259                 }
260                 dst[dp++] = c;
261             }
262             return dp;
263         }
264 
265         @Override
isASCIICompatible()266         public boolean isASCIICompatible() {
267             return isASCIICompatible;
268         }
269 
implReset()270         public void implReset() {
271             super.implReset();
272         }
273 
implFlush(CharBuffer out)274         public CoderResult implFlush(CharBuffer out) {
275             return super.implFlush(out);
276         }
277 
278         // decode loops are not using decodeSingle/Double() for performance
279         // reason.
decodeSingle(int b)280         public char decodeSingle(int b) {
281             return b2cSB[b];
282         }
283 
decodeDouble(int b1, int b2)284         public char decodeDouble(int b1, int b2) {
285             if (b1 < 0 || b1 > b2c.length ||
286                 b2 < b2Min || b2 > b2Max)
287                 return UNMAPPABLE_DECODING;
288             return  b2c[b1][b2 - b2Min];
289         }
290     }
291 
292     // IBM_EBCDIC_DBCS
293     public static class Decoder_EBCDIC extends Decoder {
294         private static final int SBCS = 0;
295         private static final int DBCS = 1;
296         private static final int SO = 0x0e;
297         private static final int SI = 0x0f;
298         private int  currentState;
299 
Decoder_EBCDIC(Charset cs, char[][] b2c, char[] b2cSB, int b2Min, int b2Max, boolean isASCIICompatible)300         public Decoder_EBCDIC(Charset cs,
301                               char[][] b2c, char[] b2cSB, int b2Min, int b2Max,
302                               boolean isASCIICompatible) {
303             super(cs, b2c, b2cSB, b2Min, b2Max, isASCIICompatible);
304         }
305 
Decoder_EBCDIC(Charset cs, char[][] b2c, char[] b2cSB, int b2Min, int b2Max)306         public Decoder_EBCDIC(Charset cs,
307                               char[][] b2c, char[] b2cSB, int b2Min, int b2Max) {
308             super(cs, b2c, b2cSB, b2Min, b2Max, false);
309         }
310 
implReset()311         public void implReset() {
312             currentState = SBCS;
313         }
314 
315         // Check validity of dbcs ebcdic byte pair values
316         //
317         // First byte : 0x41 -- 0xFE
318         // Second byte: 0x41 -- 0xFE
319         // Doublebyte blank: 0x4040
320         //
321         // The validation implementation in "old" DBCS_IBM_EBCDIC and sun.io
322         // as
323         //            if ((b1 != 0x40 || b2 != 0x40) &&
324         //                (b2 < 0x41 || b2 > 0xfe)) {...}
325         // is not correct/complete (range check for b1)
326         //
isDoubleByte(int b1, int b2)327         private static boolean isDoubleByte(int b1, int b2) {
328             return (0x41 <= b1 && b1 <= 0xfe && 0x41 <= b2 && b2 <= 0xfe)
329                    || (b1 == 0x40 && b2 == 0x40); // DBCS-HOST SPACE
330         }
331 
decodeArrayLoop(ByteBuffer src, CharBuffer dst)332         protected CoderResult decodeArrayLoop(ByteBuffer src, CharBuffer dst) {
333             byte[] sa = src.array();
334             int sp = src.arrayOffset() + src.position();
335             int sl = src.arrayOffset() + src.limit();
336             char[] da = dst.array();
337             int dp = dst.arrayOffset() + dst.position();
338             int dl = dst.arrayOffset() + dst.limit();
339 
340             try {
341                 // don't check dp/dl together here, it's possible to
342                 // decdoe a SO/SI without space in output buffer.
343                 while (sp < sl) {
344                     int b1 = sa[sp] & 0xff;
345                     int inSize = 1;
346                     if (b1 == SO) {  // Shift out
347                         if (currentState != SBCS)
348                             return CoderResult.malformedForLength(1);
349                         else
350                             currentState = DBCS;
351                     } else if (b1 == SI) {
352                         if (currentState != DBCS)
353                             return CoderResult.malformedForLength(1);
354                         else
355                             currentState = SBCS;
356                     } else {
357                         char c;
358                         if (currentState == SBCS) {
359                             c = b2cSB[b1];
360                             if (c == UNMAPPABLE_DECODING)
361                                 return CoderResult.unmappableForLength(1);
362                         } else {
363                             if (sl - sp < 2)
364                                 return CoderResult.UNDERFLOW;
365                             int b2 = sa[sp + 1] & 0xff;
366                             if (b2 < b2Min || b2 > b2Max ||
367                                 (c = b2c[b1][b2 - b2Min]) == UNMAPPABLE_DECODING) {
368                                 if (!isDoubleByte(b1, b2))
369                                     return CoderResult.malformedForLength(2);
370                                 return CoderResult.unmappableForLength(2);
371                             }
372                             inSize++;
373                         }
374                         if (dl - dp < 1)
375                             return CoderResult.OVERFLOW;
376 
377                         da[dp++] = c;
378                     }
379                     sp += inSize;
380                 }
381                 return CoderResult.UNDERFLOW;
382             } finally {
383                 src.position(sp - src.arrayOffset());
384                 dst.position(dp - dst.arrayOffset());
385             }
386         }
387 
decodeBufferLoop(ByteBuffer src, CharBuffer dst)388         protected CoderResult decodeBufferLoop(ByteBuffer src, CharBuffer dst) {
389             int mark = src.position();
390             try {
391                 while (src.hasRemaining()) {
392                     int b1 = src.get() & 0xff;
393                     int inSize = 1;
394                     if (b1 == SO) {  // Shift out
395                         if (currentState != SBCS)
396                             return CoderResult.malformedForLength(1);
397                         else
398                             currentState = DBCS;
399                     } else if (b1 == SI) {
400                         if (currentState != DBCS)
401                             return CoderResult.malformedForLength(1);
402                         else
403                             currentState = SBCS;
404                     } else {
405                         char c = UNMAPPABLE_DECODING;
406                         if (currentState == SBCS) {
407                             c = b2cSB[b1];
408                             if (c == UNMAPPABLE_DECODING)
409                                 return CoderResult.unmappableForLength(1);
410                         } else {
411                             if (src.remaining() < 1)
412                                 return CoderResult.UNDERFLOW;
413                             int b2 = src.get()&0xff;
414                             if (b2 < b2Min || b2 > b2Max ||
415                                 (c = b2c[b1][b2 - b2Min]) == UNMAPPABLE_DECODING) {
416                                 if (!isDoubleByte(b1, b2))
417                                     return CoderResult.malformedForLength(2);
418                                 return CoderResult.unmappableForLength(2);
419                             }
420                             inSize++;
421                         }
422 
423                         if (dst.remaining() < 1)
424                             return CoderResult.OVERFLOW;
425 
426                         dst.put(c);
427                     }
428                     mark += inSize;
429                 }
430                 return CoderResult.UNDERFLOW;
431             } finally {
432                 src.position(mark);
433             }
434         }
435 
436         @Override
decode(byte[] src, int sp, int len, char[] dst)437         public int decode(byte[] src, int sp, int len, char[] dst) {
438             int dp = 0;
439             int sl = sp + len;
440             currentState = SBCS;
441             char repl = replacement().charAt(0);
442             while (sp < sl) {
443                 int b1 = src[sp++] & 0xff;
444                 if (b1 == SO) {  // Shift out
445                     if (currentState != SBCS)
446                         dst[dp++] = repl;
447                     else
448                         currentState = DBCS;
449                 } else if (b1 == SI) {
450                     if (currentState != DBCS)
451                         dst[dp++] = repl;
452                     else
453                         currentState = SBCS;
454                 } else {
455                     char c =  UNMAPPABLE_DECODING;
456                     if (currentState == SBCS) {
457                         c = b2cSB[b1];
458                         if (c == UNMAPPABLE_DECODING)
459                             c = repl;
460                     } else {
461                         if (sl == sp) {
462                             c = repl;
463                         } else {
464                             int b2 = src[sp++] & 0xff;
465                             if (b2 < b2Min || b2 > b2Max ||
466                                 (c = b2c[b1][b2 - b2Min]) == UNMAPPABLE_DECODING) {
467                                 c = repl;
468                             }
469                         }
470                     }
471                     dst[dp++] = c;
472                 }
473             }
474             return dp;
475         }
476     }
477 
478     // DBCS_ONLY
479     public static class Decoder_DBCSONLY extends Decoder {
480         static final char[] b2cSB_UNMAPPABLE;
481         static {
482             b2cSB_UNMAPPABLE = new char[0x100];
Arrays.fill(b2cSB_UNMAPPABLE, UNMAPPABLE_DECODING)483             Arrays.fill(b2cSB_UNMAPPABLE, UNMAPPABLE_DECODING);
484         }
485 
486         // always returns unmappableForLenth(2) for doublebyte_only
487         @Override
crMalformedOrUnmappable(int b1, int b2)488         protected CoderResult crMalformedOrUnmappable(int b1, int b2) {
489             return CoderResult.unmappableForLength(2);
490         }
491 
Decoder_DBCSONLY(Charset cs, char[][] b2c, char[] b2cSB, int b2Min, int b2Max, boolean isASCIICompatible)492         public Decoder_DBCSONLY(Charset cs, char[][] b2c, char[] b2cSB, int b2Min, int b2Max,
493                                 boolean isASCIICompatible) {
494             super(cs, 0.5f, 1.0f, b2c, b2cSB_UNMAPPABLE, b2Min, b2Max, isASCIICompatible);
495         }
496 
Decoder_DBCSONLY(Charset cs, char[][] b2c, char[] b2cSB, int b2Min, int b2Max)497         public Decoder_DBCSONLY(Charset cs, char[][] b2c, char[] b2cSB, int b2Min, int b2Max) {
498             super(cs, 0.5f, 1.0f, b2c, b2cSB_UNMAPPABLE, b2Min, b2Max, false);
499         }
500     }
501 
502     // EUC_SIMPLE
503     // The only thing we need to "override" is to check SS2/SS3 and
504     // return "malformed" if found
505     public static class Decoder_EUC_SIM extends Decoder {
506         private final int SS2 =  0x8E;
507         private final int SS3 =  0x8F;
508 
Decoder_EUC_SIM(Charset cs, char[][] b2c, char[] b2cSB, int b2Min, int b2Max, boolean isASCIICompatible)509         public Decoder_EUC_SIM(Charset cs,
510                                char[][] b2c, char[] b2cSB, int b2Min, int b2Max,
511                                boolean isASCIICompatible) {
512             super(cs, b2c, b2cSB, b2Min, b2Max, isASCIICompatible);
513         }
514 
515         // No support provided for G2/G3 for SimpleEUC
crMalformedOrUnderFlow(int b)516         protected CoderResult crMalformedOrUnderFlow(int b) {
517             if (b == SS2 || b == SS3 )
518                 return CoderResult.malformedForLength(1);
519             return CoderResult.UNDERFLOW;
520         }
521 
crMalformedOrUnmappable(int b1, int b2)522         protected CoderResult crMalformedOrUnmappable(int b1, int b2) {
523             if (b1 == SS2 || b1 == SS3 )
524                 return CoderResult.malformedForLength(1);
525             return CoderResult.unmappableForLength(2);
526         }
527 
528         @Override
decode(byte[] src, int sp, int len, char[] dst)529         public int decode(byte[] src, int sp, int len, char[] dst) {
530             int dp = 0;
531             int sl = sp + len;
532             char repl = replacement().charAt(0);
533             while (sp < sl) {
534                 int b1 = src[sp++] & 0xff;
535                 char c = b2cSB[b1];
536                 if (c == UNMAPPABLE_DECODING) {
537                     if (sp < sl) {
538                         int b2 = src[sp++] & 0xff;
539                         if (b2 < b2Min || b2 > b2Max ||
540                             (c = b2c[b1][b2 - b2Min]) == UNMAPPABLE_DECODING) {
541                             if (b1 == SS2 || b1 == SS3) {
542                                 sp--;
543                             }
544                             c = repl;
545                         }
546                     } else {
547                         c = repl;
548                     }
549                 }
550                 dst[dp++] = c;
551             }
552             return dp;
553         }
554     }
555 
556     public static class Encoder extends CharsetEncoder
557                                 implements ArrayEncoder
558     {
559         protected final int MAX_SINGLEBYTE = 0xff;
560         private final char[] c2b;
561         private final char[] c2bIndex;
562         protected Surrogate.Parser sgp;
563         final boolean isASCIICompatible;
564 
Encoder(Charset cs, char[] c2b, char[] c2bIndex)565         public Encoder(Charset cs, char[] c2b, char[] c2bIndex) {
566             this(cs, c2b, c2bIndex, false);
567         }
568 
Encoder(Charset cs, char[] c2b, char[] c2bIndex, boolean isASCIICompatible)569         public Encoder(Charset cs, char[] c2b, char[] c2bIndex, boolean isASCIICompatible) {
570             super(cs, 2.0f, 2.0f);
571             this.c2b = c2b;
572             this.c2bIndex = c2bIndex;
573             this.isASCIICompatible = isASCIICompatible;
574         }
575 
Encoder(Charset cs, float avg, float max, byte[] repl, char[] c2b, char[] c2bIndex, boolean isASCIICompatible)576         public Encoder(Charset cs, float avg, float max, byte[] repl, char[] c2b, char[] c2bIndex,
577                        boolean isASCIICompatible) {
578             super(cs, avg, max, repl);
579             this.c2b = c2b;
580             this.c2bIndex = c2bIndex;
581             this.isASCIICompatible = isASCIICompatible;
582         }
583 
canEncode(char c)584         public boolean canEncode(char c) {
585             return encodeChar(c) != UNMAPPABLE_ENCODING;
586         }
587 
sgp()588         protected Surrogate.Parser sgp() {
589             if (sgp == null)
590                 sgp = new Surrogate.Parser();
591             return sgp;
592         }
593 
encodeArrayLoop(CharBuffer src, ByteBuffer dst)594         protected CoderResult encodeArrayLoop(CharBuffer src, ByteBuffer dst) {
595             char[] sa = src.array();
596             int sp = src.arrayOffset() + src.position();
597             int sl = src.arrayOffset() + src.limit();
598 
599             byte[] da = dst.array();
600             int dp = dst.arrayOffset() + dst.position();
601             int dl = dst.arrayOffset() + dst.limit();
602 
603             try {
604                 while (sp < sl) {
605                     char c = sa[sp];
606                     int bb = encodeChar(c);
607                     if (bb == UNMAPPABLE_ENCODING) {
608                         if (Character.isSurrogate(c)) {
609                             if (sgp().parse(c, sa, sp, sl) < 0)
610                                 return sgp.error();
611                             return sgp.unmappableResult();
612                         }
613                         return CoderResult.unmappableForLength(1);
614                     }
615 
616                     if (bb > MAX_SINGLEBYTE) {    // DoubleByte
617                         if (dl - dp < 2)
618                             return CoderResult.OVERFLOW;
619                         da[dp++] = (byte)(bb >> 8);
620                         da[dp++] = (byte)bb;
621                     } else {                      // SingleByte
622                         if (dl - dp < 1)
623                             return CoderResult.OVERFLOW;
624                         da[dp++] = (byte)bb;
625                     }
626 
627                     sp++;
628                 }
629                 return CoderResult.UNDERFLOW;
630             } finally {
631                 src.position(sp - src.arrayOffset());
632                 dst.position(dp - dst.arrayOffset());
633             }
634         }
635 
encodeBufferLoop(CharBuffer src, ByteBuffer dst)636         protected CoderResult encodeBufferLoop(CharBuffer src, ByteBuffer dst) {
637             int mark = src.position();
638             try {
639                 while (src.hasRemaining()) {
640                     char c = src.get();
641                     int bb = encodeChar(c);
642                     if (bb == UNMAPPABLE_ENCODING) {
643                         if (Character.isSurrogate(c)) {
644                             if (sgp().parse(c, src) < 0)
645                                 return sgp.error();
646                             return sgp.unmappableResult();
647                         }
648                         return CoderResult.unmappableForLength(1);
649                     }
650                     if (bb > MAX_SINGLEBYTE) {  // DoubleByte
651                         if (dst.remaining() < 2)
652                             return CoderResult.OVERFLOW;
653                         dst.put((byte)(bb >> 8));
654                         dst.put((byte)(bb));
655                     } else {
656                         if (dst.remaining() < 1)
657                         return CoderResult.OVERFLOW;
658                         dst.put((byte)bb);
659                     }
660                     mark++;
661                 }
662                 return CoderResult.UNDERFLOW;
663             } finally {
664                 src.position(mark);
665             }
666         }
667 
encodeLoop(CharBuffer src, ByteBuffer dst)668         protected CoderResult encodeLoop(CharBuffer src, ByteBuffer dst) {
669             if (src.hasArray() && dst.hasArray())
670                 return encodeArrayLoop(src, dst);
671             else
672                 return encodeBufferLoop(src, dst);
673         }
674 
675         protected byte[] repl = replacement();
implReplaceWith(byte[] newReplacement)676         protected void implReplaceWith(byte[] newReplacement) {
677             repl = newReplacement;
678         }
679 
680         @Override
encode(char[] src, int sp, int len, byte[] dst)681         public int encode(char[] src, int sp, int len, byte[] dst) {
682             int dp = 0;
683             int sl = sp + len;
684             int dl = dst.length;
685             while (sp < sl) {
686                 char c = src[sp++];
687                 int bb = encodeChar(c);
688                 if (bb == UNMAPPABLE_ENCODING) {
689                     if (Character.isHighSurrogate(c) && sp < sl &&
690                         Character.isLowSurrogate(src[sp])) {
691                         sp++;
692                     }
693                     dst[dp++] = repl[0];
694                     if (repl.length > 1)
695                         dst[dp++] = repl[1];
696                     continue;
697                 } //else
698                 if (bb > MAX_SINGLEBYTE) { // DoubleByte
699                     dst[dp++] = (byte)(bb >> 8);
700                     dst[dp++] = (byte)bb;
701                 } else {                          // SingleByte
702                     dst[dp++] = (byte)bb;
703                 }
704             }
705             return dp;
706         }
707 
708         @Override
encodeFromLatin1(byte[] src, int sp, int len, byte[] dst)709         public int encodeFromLatin1(byte[] src, int sp, int len, byte[] dst) {
710             int dp = 0;
711             int sl = sp + len;
712             while (sp < sl) {
713                 char c = (char)(src[sp++] & 0xff);
714                 int bb = encodeChar(c);
715                 if (bb == UNMAPPABLE_ENCODING) {
716                     // no surrogate pair in latin1 string
717                     dst[dp++] = repl[0];
718                     if (repl.length > 1) {
719                         dst[dp++] = repl[1];
720                     }
721                     continue;
722                 } //else
723                 if (bb > MAX_SINGLEBYTE) { // DoubleByte
724                     dst[dp++] = (byte)(bb >> 8);
725                     dst[dp++] = (byte)bb;
726                 } else {                   // SingleByte
727                     dst[dp++] = (byte)bb;
728                 }
729 
730             }
731             return dp;
732         }
733 
734         @Override
encodeFromUTF16(byte[] src, int sp, int len, byte[] dst)735         public int encodeFromUTF16(byte[] src, int sp, int len, byte[] dst) {
736             int dp = 0;
737             int sl = sp + len;
738             while (sp < sl) {
739                 char c = StringUTF16.getChar(src, sp++);
740                 int bb = encodeChar(c);
741                 if (bb == UNMAPPABLE_ENCODING) {
742                     if (Character.isHighSurrogate(c) && sp < sl &&
743                         Character.isLowSurrogate(StringUTF16.getChar(src, sp))) {
744                         sp++;
745                     }
746                     dst[dp++] = repl[0];
747                     if (repl.length > 1) {
748                         dst[dp++] = repl[1];
749                     }
750                     continue;
751                 } //else
752                 if (bb > MAX_SINGLEBYTE) { // DoubleByte
753                     dst[dp++] = (byte)(bb >> 8);
754                     dst[dp++] = (byte)bb;
755                 } else {                   // SingleByte
756                     dst[dp++] = (byte)bb;
757                 }
758             }
759             return dp;
760         }
761 
762         @Override
isASCIICompatible()763         public boolean isASCIICompatible() {
764             return isASCIICompatible;
765         }
766 
encodeChar(char ch)767         public int encodeChar(char ch) {
768             return c2b[c2bIndex[ch >> 8] + (ch & 0xff)];
769         }
770 
771         // init the c2b and c2bIndex tables from b2c.
initC2B(String[] b2c, String b2cSB, String b2cNR, String c2bNR, int b2Min, int b2Max, char[] c2b, char[] c2bIndex)772         public static void initC2B(String[] b2c, String b2cSB, String b2cNR,  String c2bNR,
773                             int b2Min, int b2Max,
774                             char[] c2b, char[] c2bIndex)
775         {
776             Arrays.fill(c2b, (char)UNMAPPABLE_ENCODING);
777             int off = 0x100;
778 
779             char[][] b2c_ca = new char[b2c.length][];
780             char[] b2cSB_ca = null;
781             if (b2cSB != null)
782                 b2cSB_ca = b2cSB.toCharArray();
783 
784             for (int i = 0; i < b2c.length; i++) {
785                 if (b2c[i] == null)
786                     continue;
787                 b2c_ca[i] = b2c[i].toCharArray();
788             }
789 
790             if (b2cNR != null) {
791                 int j = 0;
792                 while (j < b2cNR.length()) {
793                     char b  = b2cNR.charAt(j++);
794                     char c  = b2cNR.charAt(j++);
795                     if (b < 0x100 && b2cSB_ca != null) {
796                         if (b2cSB_ca[b] == c)
797                             b2cSB_ca[b] = UNMAPPABLE_DECODING;
798                     } else {
799                         if (b2c_ca[b >> 8][(b & 0xff) - b2Min] == c)
800                             b2c_ca[b >> 8][(b & 0xff) - b2Min] = UNMAPPABLE_DECODING;
801                     }
802                 }
803             }
804 
805             if (b2cSB_ca != null) {      // SingleByte
806                 for (int b = 0; b < b2cSB_ca.length; b++) {
807                     char c = b2cSB_ca[b];
808                     if (c == UNMAPPABLE_DECODING)
809                         continue;
810                     int index = c2bIndex[c >> 8];
811                     if (index == 0) {
812                         index = off;
813                         off += 0x100;
814                         c2bIndex[c >> 8] = (char)index;
815                     }
816                     c2b[index + (c & 0xff)] = (char)b;
817                 }
818             }
819 
820             for (int b1 = 0; b1 < b2c.length; b1++) {  // DoubleByte
821                 char[] db = b2c_ca[b1];
822                 if (db == null)
823                     continue;
824                 for (int b2 = b2Min; b2 <= b2Max; b2++) {
825                     char c = db[b2 - b2Min];
826                     if (c == UNMAPPABLE_DECODING)
827                         continue;
828                     int index = c2bIndex[c >> 8];
829                     if (index == 0) {
830                         index = off;
831                         off += 0x100;
832                         c2bIndex[c >> 8] = (char)index;
833                     }
834                     c2b[index + (c & 0xff)] = (char)((b1 << 8) | b2);
835                 }
836             }
837 
838             if (c2bNR != null) {
839                 // add c->b only nr entries
840                 for (int i = 0; i < c2bNR.length(); i += 2) {
841                     char b = c2bNR.charAt(i);
842                     char c = c2bNR.charAt(i + 1);
843                     int index = (c >> 8);
844                     if (c2bIndex[index] == 0) {
845                         c2bIndex[index] = (char)off;
846                         off += 0x100;
847                     }
848                     index = c2bIndex[index] + (c & 0xff);
849                     c2b[index] = b;
850                 }
851             }
852         }
853     }
854 
855     public static class Encoder_DBCSONLY extends Encoder {
856 
Encoder_DBCSONLY(Charset cs, byte[] repl, char[] c2b, char[] c2bIndex, boolean isASCIICompatible)857         public Encoder_DBCSONLY(Charset cs, byte[] repl,
858                                 char[] c2b, char[] c2bIndex,
859                                 boolean isASCIICompatible) {
860             super(cs, 2.0f, 2.0f, repl, c2b, c2bIndex, isASCIICompatible);
861         }
862 
encodeChar(char ch)863         public int encodeChar(char ch) {
864             int bb = super.encodeChar(ch);
865             if (bb <= MAX_SINGLEBYTE)
866                 return UNMAPPABLE_ENCODING;
867             return bb;
868         }
869     }
870 
871     public static class Encoder_EBCDIC extends Encoder {
872         static final int SBCS = 0;
873         static final int DBCS = 1;
874         static final byte SO = 0x0e;
875         static final byte SI = 0x0f;
876 
877         protected int  currentState = SBCS;
878 
Encoder_EBCDIC(Charset cs, char[] c2b, char[] c2bIndex, boolean isASCIICompatible)879         public Encoder_EBCDIC(Charset cs, char[] c2b, char[] c2bIndex,
880                               boolean isASCIICompatible) {
881             super(cs, 4.0f, 5.0f, new byte[] {(byte)0x6f}, c2b, c2bIndex, isASCIICompatible);
882         }
883 
implReset()884         protected void implReset() {
885             currentState = SBCS;
886         }
887 
implFlush(ByteBuffer out)888         protected CoderResult implFlush(ByteBuffer out) {
889             if (currentState == DBCS) {
890                 if (out.remaining() < 1)
891                     return CoderResult.OVERFLOW;
892                 out.put(SI);
893             }
894             implReset();
895             return CoderResult.UNDERFLOW;
896         }
897 
encodeArrayLoop(CharBuffer src, ByteBuffer dst)898         protected CoderResult encodeArrayLoop(CharBuffer src, ByteBuffer dst) {
899             char[] sa = src.array();
900             int sp = src.arrayOffset() + src.position();
901             int sl = src.arrayOffset() + src.limit();
902             byte[] da = dst.array();
903             int dp = dst.arrayOffset() + dst.position();
904             int dl = dst.arrayOffset() + dst.limit();
905 
906             try {
907                 while (sp < sl) {
908                     char c = sa[sp];
909                     int bb = encodeChar(c);
910                     if (bb == UNMAPPABLE_ENCODING) {
911                         if (Character.isSurrogate(c)) {
912                             if (sgp().parse(c, sa, sp, sl) < 0)
913                                 return sgp.error();
914                             return sgp.unmappableResult();
915                         }
916                         return CoderResult.unmappableForLength(1);
917                     }
918                     if (bb > MAX_SINGLEBYTE) {  // DoubleByte
919                         if (currentState == SBCS) {
920                             if (dl - dp < 1)
921                                 return CoderResult.OVERFLOW;
922                             currentState = DBCS;
923                             da[dp++] = SO;
924                         }
925                         if (dl - dp < 2)
926                             return CoderResult.OVERFLOW;
927                         da[dp++] = (byte)(bb >> 8);
928                         da[dp++] = (byte)bb;
929                     } else {                    // SingleByte
930                         if (currentState == DBCS) {
931                             if (dl - dp < 1)
932                                 return CoderResult.OVERFLOW;
933                             currentState = SBCS;
934                             da[dp++] = SI;
935                         }
936                         if (dl - dp < 1)
937                             return CoderResult.OVERFLOW;
938                         da[dp++] = (byte)bb;
939 
940                     }
941                     sp++;
942                 }
943                 return CoderResult.UNDERFLOW;
944             } finally {
945                 src.position(sp - src.arrayOffset());
946                 dst.position(dp - dst.arrayOffset());
947             }
948         }
949 
encodeBufferLoop(CharBuffer src, ByteBuffer dst)950         protected CoderResult encodeBufferLoop(CharBuffer src, ByteBuffer dst) {
951             int mark = src.position();
952             try {
953                 while (src.hasRemaining()) {
954                     char c = src.get();
955                     int bb = encodeChar(c);
956                     if (bb == UNMAPPABLE_ENCODING) {
957                         if (Character.isSurrogate(c)) {
958                             if (sgp().parse(c, src) < 0)
959                                 return sgp.error();
960                             return sgp.unmappableResult();
961                         }
962                         return CoderResult.unmappableForLength(1);
963                     }
964                     if (bb > MAX_SINGLEBYTE) {  // DoubleByte
965                         if (currentState == SBCS) {
966                             if (dst.remaining() < 1)
967                                 return CoderResult.OVERFLOW;
968                             currentState = DBCS;
969                             dst.put(SO);
970                         }
971                         if (dst.remaining() < 2)
972                             return CoderResult.OVERFLOW;
973                         dst.put((byte)(bb >> 8));
974                         dst.put((byte)(bb));
975                     } else {                  // Single-byte
976                         if (currentState == DBCS) {
977                             if (dst.remaining() < 1)
978                                 return CoderResult.OVERFLOW;
979                             currentState = SBCS;
980                             dst.put(SI);
981                         }
982                         if (dst.remaining() < 1)
983                             return CoderResult.OVERFLOW;
984                         dst.put((byte)bb);
985                     }
986                     mark++;
987                 }
988                 return CoderResult.UNDERFLOW;
989             } finally {
990                 src.position(mark);
991             }
992         }
993 
994         @Override
encode(char[] src, int sp, int len, byte[] dst)995         public int encode(char[] src, int sp, int len, byte[] dst) {
996             int dp = 0;
997             int sl = sp + len;
998             while (sp < sl) {
999                 char c = src[sp++];
1000                 int bb = encodeChar(c);
1001 
1002                 if (bb == UNMAPPABLE_ENCODING) {
1003                     if (Character.isHighSurrogate(c) && sp < sl &&
1004                         Character.isLowSurrogate(src[sp])) {
1005                         sp++;
1006                     }
1007                     dst[dp++] = repl[0];
1008                     if (repl.length > 1)
1009                         dst[dp++] = repl[1];
1010                     continue;
1011                 } //else
1012                 if (bb > MAX_SINGLEBYTE) {           // DoubleByte
1013                     if (currentState == SBCS) {
1014                         currentState = DBCS;
1015                         dst[dp++] = SO;
1016                     }
1017                     dst[dp++] = (byte)(bb >> 8);
1018                     dst[dp++] = (byte)bb;
1019                 } else {                             // SingleByte
1020                     if (currentState == DBCS) {
1021                          currentState = SBCS;
1022                          dst[dp++] = SI;
1023                     }
1024                     dst[dp++] = (byte)bb;
1025                 }
1026             }
1027 
1028             if (currentState == DBCS) {
1029                  currentState = SBCS;
1030                  dst[dp++] = SI;
1031             }
1032             return dp;
1033         }
1034 
1035         @Override
encodeFromLatin1(byte[] src, int sp, int len, byte[] dst)1036         public int encodeFromLatin1(byte[] src, int sp, int len, byte[] dst) {
1037             int dp = 0;
1038             int sl = sp + len;
1039             while (sp < sl) {
1040                 char c = (char)(src[sp++] & 0xff);
1041                 int bb = encodeChar(c);
1042                 if (bb == UNMAPPABLE_ENCODING) {
1043                     // no surrogate pair in latin1 string
1044                     dst[dp++] = repl[0];
1045                     if (repl.length > 1)
1046                         dst[dp++] = repl[1];
1047                     continue;
1048                 } //else
1049                 if (bb > MAX_SINGLEBYTE) {           // DoubleByte
1050                     if (currentState == SBCS) {
1051                         currentState = DBCS;
1052                         dst[dp++] = SO;
1053                     }
1054                     dst[dp++] = (byte)(bb >> 8);
1055                     dst[dp++] = (byte)bb;
1056                 } else {                             // SingleByte
1057                     if (currentState == DBCS) {
1058                          currentState = SBCS;
1059                          dst[dp++] = SI;
1060                     }
1061                     dst[dp++] = (byte)bb;
1062                 }
1063             }
1064             if (currentState == DBCS) {
1065                  currentState = SBCS;
1066                  dst[dp++] = SI;
1067             }
1068             return dp;
1069         }
1070 
1071         @Override
encodeFromUTF16(byte[] src, int sp, int len, byte[] dst)1072         public int encodeFromUTF16(byte[] src, int sp, int len, byte[] dst) {
1073             int dp = 0;
1074             int sl = sp + len;
1075             while (sp < sl) {
1076                 char c = StringUTF16.getChar(src, sp++);
1077                 int bb = encodeChar(c);
1078                 if (bb == UNMAPPABLE_ENCODING) {
1079                     if (Character.isHighSurrogate(c) && sp < sl &&
1080                         Character.isLowSurrogate(StringUTF16.getChar(src, sp))) {
1081                         sp++;
1082                     }
1083                     dst[dp++] = repl[0];
1084                     if (repl.length > 1)
1085                         dst[dp++] = repl[1];
1086                     continue;
1087                 } //else
1088                 if (bb > MAX_SINGLEBYTE) {           // DoubleByte
1089                     if (currentState == SBCS) {
1090                         currentState = DBCS;
1091                         dst[dp++] = SO;
1092                     }
1093                     dst[dp++] = (byte)(bb >> 8);
1094                     dst[dp++] = (byte)bb;
1095                 } else {                             // SingleByte
1096                     if (currentState == DBCS) {
1097                          currentState = SBCS;
1098                          dst[dp++] = SI;
1099                     }
1100                     dst[dp++] = (byte)bb;
1101                 }
1102             }
1103             if (currentState == DBCS) {
1104                  currentState = SBCS;
1105                  dst[dp++] = SI;
1106             }
1107             return dp;
1108         }
1109     }
1110 
1111     // EUC_SIMPLE
1112     public static class Encoder_EUC_SIM extends Encoder {
Encoder_EUC_SIM(Charset cs, char[] c2b, char[] c2bIndex, boolean isASCIICompatible)1113         public Encoder_EUC_SIM(Charset cs, char[] c2b, char[] c2bIndex,
1114                                boolean isASCIICompatible) {
1115             super(cs, c2b, c2bIndex, isASCIICompatible);
1116         }
1117     }
1118 
1119 }
1120