1 /*
2  * Copyright (c) 2009, 2013, Oracle and/or its affiliates. All rights reserved.
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * This code is free software; you can redistribute it and/or modify it
6  * under the terms of the GNU General Public License version 2 only, as
7  * published by the Free Software Foundation.  Oracle designates this
8  * particular file as subject to the "Classpath" exception as provided
9  * by Oracle in the LICENSE file that accompanied this code.
10  *
11  * This code is distributed in the hope that it will be useful, but WITHOUT
12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14  * version 2 for more details (a copy is included in the LICENSE file that
15  * accompanied this code).
16  *
17  * You should have received a copy of the GNU General Public License version
18  * 2 along with this work; if not, write to the Free Software Foundation,
19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20  *
21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22  * or visit www.oracle.com if you need additional information or have any
23  * questions.
24  */
25 
26 package sun.nio.cs;
27 
28 import java.nio.ByteBuffer;
29 import java.nio.CharBuffer;
30 import java.nio.charset.Charset;
31 import java.nio.charset.CharsetDecoder;
32 import java.nio.charset.CharsetEncoder;
33 import java.nio.charset.CoderResult;
34 import java.util.Arrays;
35 import sun.nio.cs.Surrogate;
36 import sun.nio.cs.ArrayDecoder;
37 import sun.nio.cs.ArrayEncoder;
38 import static sun.nio.cs.CharsetMapping.*;
39 
40 /*
41  * Four types of "DoubleByte" charsets are implemented in this class
42  * (1)DoubleByte
43  *    The "mostly widely used" multibyte charset, a combination of
44  *    a singlebyte character set (usually the ASCII charset) and a
45  *    doublebyte character set. The codepoint values of singlebyte
46  *    and doublebyte don't overlap. Microsoft's multibyte charsets
47  *    and IBM's "DBCS_ASCII" charsets, such as IBM1381, 942, 943,
48  *    948, 949 and 950 are such charsets.
49  *
50  * (2)DoubleByte_EBCDIC
51  *    IBM EBCDIC Mix multibyte charset. Use SO and SI to shift (switch)
52  *    in and out between the singlebyte character set and doublebyte
53  *    character set.
54  *
55  * (3)DoubleByte_SIMPLE_EUC
56  *    It's a "simple" form of EUC encoding scheme, only have the
57  *    singlebyte character set G0 and one doublebyte character set
58  *    G1 are defined, G2 (with SS2) and G3 (with SS3) are not used.
59  *    So it is actually the same as the "typical" type (1) mentioned
60  *    above, except it return "malformed" for the SS2 and SS3 when
61  *    decoding.
62  *
63  * (4)DoubleByte ONLY
64  *    A "pure" doublebyte only character set. From implementation
65  *    point of view, this is the type (1) with "decodeSingle" always
66  *    returns unmappable.
67  *
68  * For simplicity, all implementations share the same decoding and
69  * encoding data structure.
70  *
71  * Decoding:
72  *
73  *    char[][] b2c;
74  *    char[] b2cSB;
75  *    int b2Min, b2Max
76  *
77  *    public char decodeSingle(int b) {
78  *        return b2cSB.[b];
79  *    }
80  *
81  *    public char decodeDouble(int b1, int b2) {
82  *        if (b2 < b2Min || b2 > b2Max)
83  *            return UNMAPPABLE_DECODING;
84  *         return b2c[b1][b2 - b2Min];
85  *    }
86  *
87  *    (1)b2Min, b2Max are the corresponding min and max value of the
88  *       low-half of the double-byte.
89  *    (2)The high 8-bit/b1 of the double-byte are used to indexed into
90  *       b2c array.
91  *
92  * Encoding:
93  *
94  *    char[] c2b;
95  *    char[] c2bIndex;
96  *
97  *    public int encodeChar(char ch) {
98  *        return c2b[c2bIndex[ch >> 8] + (ch & 0xff)];
99  *    }
100  *
101  */
102 
103 public class DoubleByte {
104 
105     public static final char[] B2C_UNMAPPABLE;
106     static {
107         B2C_UNMAPPABLE = new char[0x100];
Arrays.fill(B2C_UNMAPPABLE, UNMAPPABLE_DECODING)108         Arrays.fill(B2C_UNMAPPABLE, UNMAPPABLE_DECODING);
109     }
110 
111     public static class Decoder extends CharsetDecoder
112                                 implements DelegatableDecoder, ArrayDecoder
113     {
114         final char[][] b2c;
115         final char[] b2cSB;
116         final int b2Min;
117         final int b2Max;
118         final boolean isASCIICompatible;
119 
120         // for SimpleEUC override
crMalformedOrUnderFlow(int b)121         protected CoderResult crMalformedOrUnderFlow(int b) {
122             return CoderResult.UNDERFLOW;
123         }
124 
crMalformedOrUnmappable(int b1, int b2)125         protected CoderResult crMalformedOrUnmappable(int b1, int b2) {
126             if (b2c[b1] == B2C_UNMAPPABLE ||                // isNotLeadingByte(b1)
127                 b2c[b2] != B2C_UNMAPPABLE ||                // isLeadingByte(b2)
128                 decodeSingle(b2) != UNMAPPABLE_DECODING) {  // isSingle(b2)
129                 return CoderResult.malformedForLength(1);
130             }
131             return CoderResult.unmappableForLength(2);
132         }
133 
Decoder(Charset cs, float avgcpb, float maxcpb, char[][] b2c, char[] b2cSB, int b2Min, int b2Max, boolean isASCIICompatible)134         public Decoder(Charset cs, float avgcpb, float maxcpb,
135                        char[][] b2c, char[] b2cSB,
136                        int b2Min, int b2Max,
137                        boolean isASCIICompatible) {
138             super(cs, avgcpb, maxcpb);
139             this.b2c = b2c;
140             this.b2cSB = b2cSB;
141             this.b2Min = b2Min;
142             this.b2Max = b2Max;
143             this.isASCIICompatible = isASCIICompatible;
144         }
145 
Decoder(Charset cs, char[][] b2c, char[] b2cSB, int b2Min, int b2Max, boolean isASCIICompatible)146         public Decoder(Charset cs, char[][] b2c, char[] b2cSB, int b2Min, int b2Max,
147                        boolean isASCIICompatible) {
148             this(cs, 0.5f, 1.0f, b2c, b2cSB, b2Min, b2Max, isASCIICompatible);
149         }
150 
Decoder(Charset cs, char[][] b2c, char[] b2cSB, int b2Min, int b2Max)151         public Decoder(Charset cs, char[][] b2c, char[] b2cSB, int b2Min, int b2Max) {
152             this(cs, 0.5f, 1.0f, b2c, b2cSB, b2Min, b2Max, false);
153         }
154 
decodeArrayLoop(ByteBuffer src, CharBuffer dst)155         protected CoderResult decodeArrayLoop(ByteBuffer src, CharBuffer dst) {
156             byte[] sa = src.array();
157             int sp = src.arrayOffset() + src.position();
158             int sl = src.arrayOffset() + src.limit();
159 
160             char[] da = dst.array();
161             int dp = dst.arrayOffset() + dst.position();
162             int dl = dst.arrayOffset() + dst.limit();
163 
164             try {
165                 while (sp < sl && dp < dl) {
166                     // inline the decodeSingle/Double() for better performance
167                     int inSize = 1;
168                     int b1 = sa[sp] & 0xff;
169                     char c = b2cSB[b1];
170                     if (c == UNMAPPABLE_DECODING) {
171                         if (sl - sp < 2)
172                             return crMalformedOrUnderFlow(b1);
173                         int b2 = sa[sp + 1] & 0xff;
174                         if (b2 < b2Min || b2 > b2Max ||
175                             (c = b2c[b1][b2 - b2Min]) == UNMAPPABLE_DECODING) {
176                             return crMalformedOrUnmappable(b1, b2);
177                         }
178                         inSize++;
179                     }
180                     da[dp++] = c;
181                     sp += inSize;
182                 }
183                 return (sp >= sl) ? CoderResult.UNDERFLOW
184                                   : CoderResult.OVERFLOW;
185             } finally {
186                 src.position(sp - src.arrayOffset());
187                 dst.position(dp - dst.arrayOffset());
188             }
189         }
190 
decodeBufferLoop(ByteBuffer src, CharBuffer dst)191         protected CoderResult decodeBufferLoop(ByteBuffer src, CharBuffer dst) {
192             int mark = src.position();
193             try {
194 
195                 while (src.hasRemaining() && dst.hasRemaining()) {
196                     int b1 = src.get() & 0xff;
197                     char c = b2cSB[b1];
198                     int inSize = 1;
199                     if (c == UNMAPPABLE_DECODING) {
200                         if (src.remaining() < 1)
201                             return crMalformedOrUnderFlow(b1);
202                         int b2 = src.get() & 0xff;
203                         if (b2 < b2Min || b2 > b2Max ||
204                             (c = b2c[b1][b2 - b2Min]) == UNMAPPABLE_DECODING)
205                             return crMalformedOrUnmappable(b1, b2);
206                         inSize++;
207                     }
208                     dst.put(c);
209                     mark += inSize;
210                 }
211                 return src.hasRemaining()? CoderResult.OVERFLOW
212                                          : CoderResult.UNDERFLOW;
213             } finally {
214                 src.position(mark);
215             }
216         }
217 
218         // Make some protected methods public for use by JISAutoDetect
decodeLoop(ByteBuffer src, CharBuffer dst)219         public CoderResult decodeLoop(ByteBuffer src, CharBuffer dst) {
220             if (src.hasArray() && dst.hasArray())
221                 return decodeArrayLoop(src, dst);
222             else
223                 return decodeBufferLoop(src, dst);
224         }
225 
226         @Override
decode(byte[] src, int sp, int len, char[] dst)227         public int decode(byte[] src, int sp, int len, char[] dst) {
228             int dp = 0;
229             int sl = sp + len;
230             char repl = replacement().charAt(0);
231             while (sp < sl) {
232                 int b1 = src[sp++] & 0xff;
233                 char c = b2cSB[b1];
234                 if (c == UNMAPPABLE_DECODING) {
235                     if (sp < sl) {
236                         int b2 = src[sp++] & 0xff;
237                         if (b2 < b2Min || b2 > b2Max ||
238                             (c = b2c[b1][b2 - b2Min]) == UNMAPPABLE_DECODING) {
239                             if (crMalformedOrUnmappable(b1, b2).length() == 1) {
240                                 sp--;
241                             }
242                         }
243                     }
244                     if (c == UNMAPPABLE_DECODING) {
245                          c = repl;
246                     }
247                 }
248                 dst[dp++] = c;
249             }
250             return dp;
251         }
252 
253         @Override
isASCIICompatible()254         public boolean isASCIICompatible() {
255             return isASCIICompatible;
256         }
257 
implReset()258         public void implReset() {
259             super.implReset();
260         }
261 
implFlush(CharBuffer out)262         public CoderResult implFlush(CharBuffer out) {
263             return super.implFlush(out);
264         }
265 
266         // decode loops are not using decodeSingle/Double() for performance
267         // reason.
decodeSingle(int b)268         public char decodeSingle(int b) {
269             return b2cSB[b];
270         }
271 
decodeDouble(int b1, int b2)272         public char decodeDouble(int b1, int b2) {
273             if (b1 < 0 || b1 > b2c.length ||
274                 b2 < b2Min || b2 > b2Max)
275                 return UNMAPPABLE_DECODING;
276             return  b2c[b1][b2 - b2Min];
277         }
278     }
279 
280     // IBM_EBCDIC_DBCS
281     public static class Decoder_EBCDIC extends Decoder {
282         private static final int SBCS = 0;
283         private static final int DBCS = 1;
284         private static final int SO = 0x0e;
285         private static final int SI = 0x0f;
286         private int  currentState;
287 
Decoder_EBCDIC(Charset cs, char[][] b2c, char[] b2cSB, int b2Min, int b2Max, boolean isASCIICompatible)288         public Decoder_EBCDIC(Charset cs,
289                               char[][] b2c, char[] b2cSB, int b2Min, int b2Max,
290                               boolean isASCIICompatible) {
291             super(cs, b2c, b2cSB, b2Min, b2Max, isASCIICompatible);
292         }
293 
Decoder_EBCDIC(Charset cs, char[][] b2c, char[] b2cSB, int b2Min, int b2Max)294         public Decoder_EBCDIC(Charset cs,
295                               char[][] b2c, char[] b2cSB, int b2Min, int b2Max) {
296             super(cs, b2c, b2cSB, b2Min, b2Max, false);
297         }
298 
implReset()299         public void implReset() {
300             currentState = SBCS;
301         }
302 
303         // Check validity of dbcs ebcdic byte pair values
304         //
305         // First byte : 0x41 -- 0xFE
306         // Second byte: 0x41 -- 0xFE
307         // Doublebyte blank: 0x4040
308         //
309         // The validation implementation in "old" DBCS_IBM_EBCDIC and sun.io
310         // as
311         //            if ((b1 != 0x40 || b2 != 0x40) &&
312         //                (b2 < 0x41 || b2 > 0xfe)) {...}
313         // is not correct/complete (range check for b1)
314         //
isDoubleByte(int b1, int b2)315         private static boolean isDoubleByte(int b1, int b2) {
316             return (0x41 <= b1 && b1 <= 0xfe && 0x41 <= b2 && b2 <= 0xfe)
317                    || (b1 == 0x40 && b2 == 0x40); // DBCS-HOST SPACE
318         }
319 
decodeArrayLoop(ByteBuffer src, CharBuffer dst)320         protected CoderResult decodeArrayLoop(ByteBuffer src, CharBuffer dst) {
321             byte[] sa = src.array();
322             int sp = src.arrayOffset() + src.position();
323             int sl = src.arrayOffset() + src.limit();
324             char[] da = dst.array();
325             int dp = dst.arrayOffset() + dst.position();
326             int dl = dst.arrayOffset() + dst.limit();
327 
328             try {
329                 // don't check dp/dl together here, it's possible to
330                 // decdoe a SO/SI without space in output buffer.
331                 while (sp < sl) {
332                     int b1 = sa[sp] & 0xff;
333                     int inSize = 1;
334                     if (b1 == SO) {  // Shift out
335                         if (currentState != SBCS)
336                             return CoderResult.malformedForLength(1);
337                         else
338                             currentState = DBCS;
339                     } else if (b1 == SI) {
340                         if (currentState != DBCS)
341                             return CoderResult.malformedForLength(1);
342                         else
343                             currentState = SBCS;
344                     } else {
345                         char c =  UNMAPPABLE_DECODING;
346                         if (currentState == SBCS) {
347                             c = b2cSB[b1];
348                             if (c == UNMAPPABLE_DECODING)
349                                 return CoderResult.unmappableForLength(1);
350                         } else {
351                             if (sl - sp < 2)
352                                 return CoderResult.UNDERFLOW;
353                             int b2 = sa[sp + 1] & 0xff;
354                             if (b2 < b2Min || b2 > b2Max ||
355                                 (c = b2c[b1][b2 - b2Min]) == UNMAPPABLE_DECODING) {
356                                 if (!isDoubleByte(b1, b2))
357                                     return CoderResult.malformedForLength(2);
358                                 return CoderResult.unmappableForLength(2);
359                             }
360                             inSize++;
361                         }
362                         if (dl - dp < 1)
363                             return CoderResult.OVERFLOW;
364 
365                         da[dp++] = c;
366                     }
367                     sp += inSize;
368                 }
369                 return CoderResult.UNDERFLOW;
370             } finally {
371                 src.position(sp - src.arrayOffset());
372                 dst.position(dp - dst.arrayOffset());
373             }
374         }
375 
decodeBufferLoop(ByteBuffer src, CharBuffer dst)376         protected CoderResult decodeBufferLoop(ByteBuffer src, CharBuffer dst) {
377             int mark = src.position();
378             try {
379                 while (src.hasRemaining()) {
380                     int b1 = src.get() & 0xff;
381                     int inSize = 1;
382                     if (b1 == SO) {  // Shift out
383                         if (currentState != SBCS)
384                             return CoderResult.malformedForLength(1);
385                         else
386                             currentState = DBCS;
387                     } else if (b1 == SI) {
388                         if (currentState != DBCS)
389                             return CoderResult.malformedForLength(1);
390                         else
391                             currentState = SBCS;
392                     } else {
393                         char c = UNMAPPABLE_DECODING;
394                         if (currentState == SBCS) {
395                             c = b2cSB[b1];
396                             if (c == UNMAPPABLE_DECODING)
397                                 return CoderResult.unmappableForLength(1);
398                         } else {
399                             if (src.remaining() < 1)
400                                 return CoderResult.UNDERFLOW;
401                             int b2 = src.get()&0xff;
402                             if (b2 < b2Min || b2 > b2Max ||
403                                 (c = b2c[b1][b2 - b2Min]) == UNMAPPABLE_DECODING) {
404                                 if (!isDoubleByte(b1, b2))
405                                     return CoderResult.malformedForLength(2);
406                                 return CoderResult.unmappableForLength(2);
407                             }
408                             inSize++;
409                         }
410 
411                         if (dst.remaining() < 1)
412                             return CoderResult.OVERFLOW;
413 
414                         dst.put(c);
415                     }
416                     mark += inSize;
417                 }
418                 return CoderResult.UNDERFLOW;
419             } finally {
420                 src.position(mark);
421             }
422         }
423 
424         @Override
decode(byte[] src, int sp, int len, char[] dst)425         public int decode(byte[] src, int sp, int len, char[] dst) {
426             int dp = 0;
427             int sl = sp + len;
428             currentState = SBCS;
429             char repl = replacement().charAt(0);
430             while (sp < sl) {
431                 int b1 = src[sp++] & 0xff;
432                 if (b1 == SO) {  // Shift out
433                     if (currentState != SBCS)
434                         dst[dp++] = repl;
435                     else
436                         currentState = DBCS;
437                 } else if (b1 == SI) {
438                     if (currentState != DBCS)
439                         dst[dp++] = repl;
440                     else
441                         currentState = SBCS;
442                 } else {
443                     char c =  UNMAPPABLE_DECODING;
444                     if (currentState == SBCS) {
445                         c = b2cSB[b1];
446                         if (c == UNMAPPABLE_DECODING)
447                             c = repl;
448                     } else {
449                         if (sl == sp) {
450                             c = repl;
451                         } else {
452                             int b2 = src[sp++] & 0xff;
453                             if (b2 < b2Min || b2 > b2Max ||
454                                 (c = b2c[b1][b2 - b2Min]) == UNMAPPABLE_DECODING) {
455                                 c = repl;
456                             }
457                         }
458                     }
459                     dst[dp++] = c;
460                 }
461             }
462             return dp;
463         }
464     }
465 
466     // DBCS_ONLY
467     public static class Decoder_DBCSONLY extends Decoder {
468         static final char[] b2cSB_UNMAPPABLE;
469         static {
470             b2cSB_UNMAPPABLE = new char[0x100];
Arrays.fill(b2cSB_UNMAPPABLE, UNMAPPABLE_DECODING)471             Arrays.fill(b2cSB_UNMAPPABLE, UNMAPPABLE_DECODING);
472         }
473 
474         // always returns unmappableForLenth(2) for doublebyte_only
475         @Override
crMalformedOrUnmappable(int b1, int b2)476         protected CoderResult crMalformedOrUnmappable(int b1, int b2) {
477             return CoderResult.unmappableForLength(2);
478         }
479 
Decoder_DBCSONLY(Charset cs, char[][] b2c, char[] b2cSB, int b2Min, int b2Max, boolean isASCIICompatible)480         public Decoder_DBCSONLY(Charset cs, char[][] b2c, char[] b2cSB, int b2Min, int b2Max,
481                                 boolean isASCIICompatible) {
482             super(cs, 0.5f, 1.0f, b2c, b2cSB_UNMAPPABLE, b2Min, b2Max, isASCIICompatible);
483         }
484 
Decoder_DBCSONLY(Charset cs, char[][] b2c, char[] b2cSB, int b2Min, int b2Max)485         public Decoder_DBCSONLY(Charset cs, char[][] b2c, char[] b2cSB, int b2Min, int b2Max) {
486             super(cs, 0.5f, 1.0f, b2c, b2cSB_UNMAPPABLE, b2Min, b2Max, false);
487         }
488     }
489 
490     // EUC_SIMPLE
491     // The only thing we need to "override" is to check SS2/SS3 and
492     // return "malformed" if found
493     public static class Decoder_EUC_SIM extends Decoder {
494         private final int SS2 =  0x8E;
495         private final int SS3 =  0x8F;
496 
Decoder_EUC_SIM(Charset cs, char[][] b2c, char[] b2cSB, int b2Min, int b2Max, boolean isASCIICompatible)497         public Decoder_EUC_SIM(Charset cs,
498                                char[][] b2c, char[] b2cSB, int b2Min, int b2Max,
499                                boolean isASCIICompatible) {
500             super(cs, b2c, b2cSB, b2Min, b2Max, isASCIICompatible);
501         }
502 
503         // No support provided for G2/G3 for SimpleEUC
crMalformedOrUnderFlow(int b)504         protected CoderResult crMalformedOrUnderFlow(int b) {
505             if (b == SS2 || b == SS3 )
506                 return CoderResult.malformedForLength(1);
507             return CoderResult.UNDERFLOW;
508         }
509 
crMalformedOrUnmappable(int b1, int b2)510         protected CoderResult crMalformedOrUnmappable(int b1, int b2) {
511             if (b1 == SS2 || b1 == SS3 )
512                 return CoderResult.malformedForLength(1);
513             return CoderResult.unmappableForLength(2);
514         }
515 
516         @Override
decode(byte[] src, int sp, int len, char[] dst)517         public int decode(byte[] src, int sp, int len, char[] dst) {
518             int dp = 0;
519             int sl = sp + len;
520             char repl = replacement().charAt(0);
521             while (sp < sl) {
522                 int b1 = src[sp++] & 0xff;
523                 char c = b2cSB[b1];
524                 if (c == UNMAPPABLE_DECODING) {
525                     if (sp < sl) {
526                         int b2 = src[sp++] & 0xff;
527                         if (b2 < b2Min || b2 > b2Max ||
528                             (c = b2c[b1][b2 - b2Min]) == UNMAPPABLE_DECODING) {
529                             if (b1 == SS2 || b1 == SS3) {
530                                 sp--;
531                             }
532                             c = repl;
533                         }
534                     } else {
535                         c = repl;
536                     }
537                 }
538                 dst[dp++] = c;
539             }
540             return dp;
541         }
542     }
543 
544     public static class Encoder extends CharsetEncoder
545                                 implements ArrayEncoder
546     {
547         protected final int MAX_SINGLEBYTE = 0xff;
548         private final char[] c2b;
549         private final char[] c2bIndex;
550         protected Surrogate.Parser sgp;
551         final boolean isASCIICompatible;
552 
Encoder(Charset cs, char[] c2b, char[] c2bIndex)553         public Encoder(Charset cs, char[] c2b, char[] c2bIndex) {
554             this(cs, c2b, c2bIndex, false);
555         }
556 
Encoder(Charset cs, char[] c2b, char[] c2bIndex, boolean isASCIICompatible)557         public Encoder(Charset cs, char[] c2b, char[] c2bIndex, boolean isASCIICompatible) {
558             super(cs, 2.0f, 2.0f);
559             this.c2b = c2b;
560             this.c2bIndex = c2bIndex;
561             this.isASCIICompatible = isASCIICompatible;
562         }
563 
Encoder(Charset cs, float avg, float max, byte[] repl, char[] c2b, char[] c2bIndex, boolean isASCIICompatible)564         public Encoder(Charset cs, float avg, float max, byte[] repl, char[] c2b, char[] c2bIndex,
565                        boolean isASCIICompatible) {
566             super(cs, avg, max, repl);
567             this.c2b = c2b;
568             this.c2bIndex = c2bIndex;
569             this.isASCIICompatible = isASCIICompatible;
570         }
571 
canEncode(char c)572         public boolean canEncode(char c) {
573             return encodeChar(c) != UNMAPPABLE_ENCODING;
574         }
575 
sgp()576         protected Surrogate.Parser sgp() {
577             if (sgp == null)
578                 sgp = new Surrogate.Parser();
579             return sgp;
580         }
581 
encodeArrayLoop(CharBuffer src, ByteBuffer dst)582         protected CoderResult encodeArrayLoop(CharBuffer src, ByteBuffer dst) {
583             char[] sa = src.array();
584             int sp = src.arrayOffset() + src.position();
585             int sl = src.arrayOffset() + src.limit();
586 
587             byte[] da = dst.array();
588             int dp = dst.arrayOffset() + dst.position();
589             int dl = dst.arrayOffset() + dst.limit();
590 
591             try {
592                 while (sp < sl) {
593                     char c = sa[sp];
594                     int bb = encodeChar(c);
595                     if (bb == UNMAPPABLE_ENCODING) {
596                         if (Character.isSurrogate(c)) {
597                             if (sgp().parse(c, sa, sp, sl) < 0)
598                                 return sgp.error();
599                             return sgp.unmappableResult();
600                         }
601                         return CoderResult.unmappableForLength(1);
602                     }
603 
604                     if (bb > MAX_SINGLEBYTE) {    // DoubleByte
605                         if (dl - dp < 2)
606                             return CoderResult.OVERFLOW;
607                         da[dp++] = (byte)(bb >> 8);
608                         da[dp++] = (byte)bb;
609                     } else {                      // SingleByte
610                         if (dl - dp < 1)
611                             return CoderResult.OVERFLOW;
612                         da[dp++] = (byte)bb;
613                     }
614 
615                     sp++;
616                 }
617                 return CoderResult.UNDERFLOW;
618             } finally {
619                 src.position(sp - src.arrayOffset());
620                 dst.position(dp - dst.arrayOffset());
621             }
622         }
623 
encodeBufferLoop(CharBuffer src, ByteBuffer dst)624         protected CoderResult encodeBufferLoop(CharBuffer src, ByteBuffer dst) {
625             int mark = src.position();
626             try {
627                 while (src.hasRemaining()) {
628                     char c = src.get();
629                     int bb = encodeChar(c);
630                     if (bb == UNMAPPABLE_ENCODING) {
631                         if (Character.isSurrogate(c)) {
632                             if (sgp().parse(c, src) < 0)
633                                 return sgp.error();
634                             return sgp.unmappableResult();
635                         }
636                         return CoderResult.unmappableForLength(1);
637                     }
638                     if (bb > MAX_SINGLEBYTE) {  // DoubleByte
639                         if (dst.remaining() < 2)
640                             return CoderResult.OVERFLOW;
641                         dst.put((byte)(bb >> 8));
642                         dst.put((byte)(bb));
643                     } else {
644                         if (dst.remaining() < 1)
645                         return CoderResult.OVERFLOW;
646                         dst.put((byte)bb);
647                     }
648                     mark++;
649                 }
650                 return CoderResult.UNDERFLOW;
651             } finally {
652                 src.position(mark);
653             }
654         }
655 
encodeLoop(CharBuffer src, ByteBuffer dst)656         protected CoderResult encodeLoop(CharBuffer src, ByteBuffer dst) {
657             if (src.hasArray() && dst.hasArray())
658                 return encodeArrayLoop(src, dst);
659             else
660                 return encodeBufferLoop(src, dst);
661         }
662 
663         protected byte[] repl = replacement();
implReplaceWith(byte[] newReplacement)664         protected void implReplaceWith(byte[] newReplacement) {
665             repl = newReplacement;
666         }
667 
668         @Override
encode(char[] src, int sp, int len, byte[] dst)669         public int encode(char[] src, int sp, int len, byte[] dst) {
670             int dp = 0;
671             int sl = sp + len;
672             int dl = dst.length;
673             while (sp < sl) {
674                 char c = src[sp++];
675                 int bb = encodeChar(c);
676                 if (bb == UNMAPPABLE_ENCODING) {
677                     if (Character.isHighSurrogate(c) && sp < sl &&
678                         Character.isLowSurrogate(src[sp])) {
679                         sp++;
680                     }
681                     dst[dp++] = repl[0];
682                     if (repl.length > 1)
683                         dst[dp++] = repl[1];
684                     continue;
685                 } //else
686                 if (bb > MAX_SINGLEBYTE) { // DoubleByte
687                     dst[dp++] = (byte)(bb >> 8);
688                     dst[dp++] = (byte)bb;
689                 } else {                          // SingleByte
690                     dst[dp++] = (byte)bb;
691                 }
692             }
693             return dp;
694         }
695 
696         @Override
encodeFromLatin1(byte[] src, int sp, int len, byte[] dst)697         public int encodeFromLatin1(byte[] src, int sp, int len, byte[] dst) {
698             int dp = 0;
699             int sl = sp + len;
700             while (sp < sl) {
701                 char c = (char)(src[sp++] & 0xff);
702                 int bb = encodeChar(c);
703                 if (bb == UNMAPPABLE_ENCODING) {
704                     // no surrogate pair in latin1 string
705                     dst[dp++] = repl[0];
706                     if (repl.length > 1) {
707                         dst[dp++] = repl[1];
708                     }
709                     continue;
710                 } //else
711                 if (bb > MAX_SINGLEBYTE) { // DoubleByte
712                     dst[dp++] = (byte)(bb >> 8);
713                     dst[dp++] = (byte)bb;
714                 } else {                   // SingleByte
715                     dst[dp++] = (byte)bb;
716                 }
717 
718             }
719             return dp;
720         }
721 
722         @Override
encodeFromUTF16(byte[] src, int sp, int len, byte[] dst)723         public int encodeFromUTF16(byte[] src, int sp, int len, byte[] dst) {
724             int dp = 0;
725             int sl = sp + len;
726             while (sp < sl) {
727                 char c = StringUTF16.getChar(src, sp++);
728                 int bb = encodeChar(c);
729                 if (bb == UNMAPPABLE_ENCODING) {
730                     if (Character.isHighSurrogate(c) && sp < sl &&
731                         Character.isLowSurrogate(StringUTF16.getChar(src, sp))) {
732                         sp++;
733                     }
734                     dst[dp++] = repl[0];
735                     if (repl.length > 1) {
736                         dst[dp++] = repl[1];
737                     }
738                     continue;
739                 } //else
740                 if (bb > MAX_SINGLEBYTE) { // DoubleByte
741                     dst[dp++] = (byte)(bb >> 8);
742                     dst[dp++] = (byte)bb;
743                 } else {                   // SingleByte
744                     dst[dp++] = (byte)bb;
745                 }
746             }
747             return dp;
748         }
749 
750         @Override
isASCIICompatible()751         public boolean isASCIICompatible() {
752             return isASCIICompatible;
753         }
754 
encodeChar(char ch)755         public int encodeChar(char ch) {
756             return c2b[c2bIndex[ch >> 8] + (ch & 0xff)];
757         }
758 
759         // init the c2b and c2bIndex tables from b2c.
initC2B(String[] b2c, String b2cSB, String b2cNR, String c2bNR, int b2Min, int b2Max, char[] c2b, char[] c2bIndex)760         public static void initC2B(String[] b2c, String b2cSB, String b2cNR,  String c2bNR,
761                             int b2Min, int b2Max,
762                             char[] c2b, char[] c2bIndex)
763         {
764             Arrays.fill(c2b, (char)UNMAPPABLE_ENCODING);
765             int off = 0x100;
766 
767             char[][] b2c_ca = new char[b2c.length][];
768             char[] b2cSB_ca = null;
769             if (b2cSB != null)
770                 b2cSB_ca = b2cSB.toCharArray();
771 
772             for (int i = 0; i < b2c.length; i++) {
773                 if (b2c[i] == null)
774                     continue;
775                 b2c_ca[i] = b2c[i].toCharArray();
776             }
777 
778             if (b2cNR != null) {
779                 int j = 0;
780                 while (j < b2cNR.length()) {
781                     char b  = b2cNR.charAt(j++);
782                     char c  = b2cNR.charAt(j++);
783                     if (b < 0x100 && b2cSB_ca != null) {
784                         if (b2cSB_ca[b] == c)
785                             b2cSB_ca[b] = UNMAPPABLE_DECODING;
786                     } else {
787                         if (b2c_ca[b >> 8][(b & 0xff) - b2Min] == c)
788                             b2c_ca[b >> 8][(b & 0xff) - b2Min] = UNMAPPABLE_DECODING;
789                     }
790                 }
791             }
792 
793             if (b2cSB_ca != null) {      // SingleByte
794                 for (int b = 0; b < b2cSB_ca.length; b++) {
795                     char c = b2cSB_ca[b];
796                     if (c == UNMAPPABLE_DECODING)
797                         continue;
798                     int index = c2bIndex[c >> 8];
799                     if (index == 0) {
800                         index = off;
801                         off += 0x100;
802                         c2bIndex[c >> 8] = (char)index;
803                     }
804                     c2b[index + (c & 0xff)] = (char)b;
805                 }
806             }
807 
808             for (int b1 = 0; b1 < b2c.length; b1++) {  // DoubleByte
809                 char[] db = b2c_ca[b1];
810                 if (db == null)
811                     continue;
812                 for (int b2 = b2Min; b2 <= b2Max; b2++) {
813                     char c = db[b2 - b2Min];
814                     if (c == UNMAPPABLE_DECODING)
815                         continue;
816                     int index = c2bIndex[c >> 8];
817                     if (index == 0) {
818                         index = off;
819                         off += 0x100;
820                         c2bIndex[c >> 8] = (char)index;
821                     }
822                     c2b[index + (c & 0xff)] = (char)((b1 << 8) | b2);
823                 }
824             }
825 
826             if (c2bNR != null) {
827                 // add c->b only nr entries
828                 for (int i = 0; i < c2bNR.length(); i += 2) {
829                     char b = c2bNR.charAt(i);
830                     char c = c2bNR.charAt(i + 1);
831                     int index = (c >> 8);
832                     if (c2bIndex[index] == 0) {
833                         c2bIndex[index] = (char)off;
834                         off += 0x100;
835                     }
836                     index = c2bIndex[index] + (c & 0xff);
837                     c2b[index] = b;
838                 }
839             }
840         }
841     }
842 
843     public static class Encoder_DBCSONLY extends Encoder {
844 
Encoder_DBCSONLY(Charset cs, byte[] repl, char[] c2b, char[] c2bIndex, boolean isASCIICompatible)845         public Encoder_DBCSONLY(Charset cs, byte[] repl,
846                                 char[] c2b, char[] c2bIndex,
847                                 boolean isASCIICompatible) {
848             super(cs, 2.0f, 2.0f, repl, c2b, c2bIndex, isASCIICompatible);
849         }
850 
encodeChar(char ch)851         public int encodeChar(char ch) {
852             int bb = super.encodeChar(ch);
853             if (bb <= MAX_SINGLEBYTE)
854                 return UNMAPPABLE_ENCODING;
855             return bb;
856         }
857     }
858 
859     public static class Encoder_EBCDIC extends Encoder {
860         static final int SBCS = 0;
861         static final int DBCS = 1;
862         static final byte SO = 0x0e;
863         static final byte SI = 0x0f;
864 
865         protected int  currentState = SBCS;
866 
Encoder_EBCDIC(Charset cs, char[] c2b, char[] c2bIndex, boolean isASCIICompatible)867         public Encoder_EBCDIC(Charset cs, char[] c2b, char[] c2bIndex,
868                               boolean isASCIICompatible) {
869             super(cs, 4.0f, 5.0f, new byte[] {(byte)0x6f}, c2b, c2bIndex, isASCIICompatible);
870         }
871 
implReset()872         protected void implReset() {
873             currentState = SBCS;
874         }
875 
implFlush(ByteBuffer out)876         protected CoderResult implFlush(ByteBuffer out) {
877             if (currentState == DBCS) {
878                 if (out.remaining() < 1)
879                     return CoderResult.OVERFLOW;
880                 out.put(SI);
881             }
882             implReset();
883             return CoderResult.UNDERFLOW;
884         }
885 
encodeArrayLoop(CharBuffer src, ByteBuffer dst)886         protected CoderResult encodeArrayLoop(CharBuffer src, ByteBuffer dst) {
887             char[] sa = src.array();
888             int sp = src.arrayOffset() + src.position();
889             int sl = src.arrayOffset() + src.limit();
890             byte[] da = dst.array();
891             int dp = dst.arrayOffset() + dst.position();
892             int dl = dst.arrayOffset() + dst.limit();
893 
894             try {
895                 while (sp < sl) {
896                     char c = sa[sp];
897                     int bb = encodeChar(c);
898                     if (bb == UNMAPPABLE_ENCODING) {
899                         if (Character.isSurrogate(c)) {
900                             if (sgp().parse(c, sa, sp, sl) < 0)
901                                 return sgp.error();
902                             return sgp.unmappableResult();
903                         }
904                         return CoderResult.unmappableForLength(1);
905                     }
906                     if (bb > MAX_SINGLEBYTE) {  // DoubleByte
907                         if (currentState == SBCS) {
908                             if (dl - dp < 1)
909                                 return CoderResult.OVERFLOW;
910                             currentState = DBCS;
911                             da[dp++] = SO;
912                         }
913                         if (dl - dp < 2)
914                             return CoderResult.OVERFLOW;
915                         da[dp++] = (byte)(bb >> 8);
916                         da[dp++] = (byte)bb;
917                     } else {                    // SingleByte
918                         if (currentState == DBCS) {
919                             if (dl - dp < 1)
920                                 return CoderResult.OVERFLOW;
921                             currentState = SBCS;
922                             da[dp++] = SI;
923                         }
924                         if (dl - dp < 1)
925                             return CoderResult.OVERFLOW;
926                         da[dp++] = (byte)bb;
927 
928                     }
929                     sp++;
930                 }
931                 return CoderResult.UNDERFLOW;
932             } finally {
933                 src.position(sp - src.arrayOffset());
934                 dst.position(dp - dst.arrayOffset());
935             }
936         }
937 
encodeBufferLoop(CharBuffer src, ByteBuffer dst)938         protected CoderResult encodeBufferLoop(CharBuffer src, ByteBuffer dst) {
939             int mark = src.position();
940             try {
941                 while (src.hasRemaining()) {
942                     char c = src.get();
943                     int bb = encodeChar(c);
944                     if (bb == UNMAPPABLE_ENCODING) {
945                         if (Character.isSurrogate(c)) {
946                             if (sgp().parse(c, src) < 0)
947                                 return sgp.error();
948                             return sgp.unmappableResult();
949                         }
950                         return CoderResult.unmappableForLength(1);
951                     }
952                     if (bb > MAX_SINGLEBYTE) {  // DoubleByte
953                         if (currentState == SBCS) {
954                             if (dst.remaining() < 1)
955                                 return CoderResult.OVERFLOW;
956                             currentState = DBCS;
957                             dst.put(SO);
958                         }
959                         if (dst.remaining() < 2)
960                             return CoderResult.OVERFLOW;
961                         dst.put((byte)(bb >> 8));
962                         dst.put((byte)(bb));
963                     } else {                  // Single-byte
964                         if (currentState == DBCS) {
965                             if (dst.remaining() < 1)
966                                 return CoderResult.OVERFLOW;
967                             currentState = SBCS;
968                             dst.put(SI);
969                         }
970                         if (dst.remaining() < 1)
971                             return CoderResult.OVERFLOW;
972                         dst.put((byte)bb);
973                     }
974                     mark++;
975                 }
976                 return CoderResult.UNDERFLOW;
977             } finally {
978                 src.position(mark);
979             }
980         }
981 
982         @Override
encode(char[] src, int sp, int len, byte[] dst)983         public int encode(char[] src, int sp, int len, byte[] dst) {
984             int dp = 0;
985             int sl = sp + len;
986             while (sp < sl) {
987                 char c = src[sp++];
988                 int bb = encodeChar(c);
989 
990                 if (bb == UNMAPPABLE_ENCODING) {
991                     if (Character.isHighSurrogate(c) && sp < sl &&
992                         Character.isLowSurrogate(src[sp])) {
993                         sp++;
994                     }
995                     dst[dp++] = repl[0];
996                     if (repl.length > 1)
997                         dst[dp++] = repl[1];
998                     continue;
999                 } //else
1000                 if (bb > MAX_SINGLEBYTE) {           // DoubleByte
1001                     if (currentState == SBCS) {
1002                         currentState = DBCS;
1003                         dst[dp++] = SO;
1004                     }
1005                     dst[dp++] = (byte)(bb >> 8);
1006                     dst[dp++] = (byte)bb;
1007                 } else {                             // SingleByte
1008                     if (currentState == DBCS) {
1009                          currentState = SBCS;
1010                          dst[dp++] = SI;
1011                     }
1012                     dst[dp++] = (byte)bb;
1013                 }
1014             }
1015 
1016             if (currentState == DBCS) {
1017                  currentState = SBCS;
1018                  dst[dp++] = SI;
1019             }
1020             return dp;
1021         }
1022 
1023         @Override
encodeFromLatin1(byte[] src, int sp, int len, byte[] dst)1024         public int encodeFromLatin1(byte[] src, int sp, int len, byte[] dst) {
1025             int dp = 0;
1026             int sl = sp + len;
1027             while (sp < sl) {
1028                 char c = (char)(src[sp++] & 0xff);
1029                 int bb = encodeChar(c);
1030                 if (bb == UNMAPPABLE_ENCODING) {
1031                     // no surrogate pair in latin1 string
1032                     dst[dp++] = repl[0];
1033                     if (repl.length > 1)
1034                         dst[dp++] = repl[1];
1035                     continue;
1036                 } //else
1037                 if (bb > MAX_SINGLEBYTE) {           // DoubleByte
1038                     if (currentState == SBCS) {
1039                         currentState = DBCS;
1040                         dst[dp++] = SO;
1041                     }
1042                     dst[dp++] = (byte)(bb >> 8);
1043                     dst[dp++] = (byte)bb;
1044                 } else {                             // SingleByte
1045                     if (currentState == DBCS) {
1046                          currentState = SBCS;
1047                          dst[dp++] = SI;
1048                     }
1049                     dst[dp++] = (byte)bb;
1050                 }
1051             }
1052             if (currentState == DBCS) {
1053                  currentState = SBCS;
1054                  dst[dp++] = SI;
1055             }
1056             return dp;
1057         }
1058 
1059         @Override
encodeFromUTF16(byte[] src, int sp, int len, byte[] dst)1060         public int encodeFromUTF16(byte[] src, int sp, int len, byte[] dst) {
1061             int dp = 0;
1062             int sl = sp + len;
1063             while (sp < sl) {
1064                 char c = StringUTF16.getChar(src, sp++);
1065                 int bb = encodeChar(c);
1066                 if (bb == UNMAPPABLE_ENCODING) {
1067                     if (Character.isHighSurrogate(c) && sp < sl &&
1068                         Character.isLowSurrogate(StringUTF16.getChar(src, sp))) {
1069                         sp++;
1070                     }
1071                     dst[dp++] = repl[0];
1072                     if (repl.length > 1)
1073                         dst[dp++] = repl[1];
1074                     continue;
1075                 } //else
1076                 if (bb > MAX_SINGLEBYTE) {           // DoubleByte
1077                     if (currentState == SBCS) {
1078                         currentState = DBCS;
1079                         dst[dp++] = SO;
1080                     }
1081                     dst[dp++] = (byte)(bb >> 8);
1082                     dst[dp++] = (byte)bb;
1083                 } else {                             // SingleByte
1084                     if (currentState == DBCS) {
1085                          currentState = SBCS;
1086                          dst[dp++] = SI;
1087                     }
1088                     dst[dp++] = (byte)bb;
1089                 }
1090             }
1091             if (currentState == DBCS) {
1092                  currentState = SBCS;
1093                  dst[dp++] = SI;
1094             }
1095             return dp;
1096         }
1097     }
1098 
1099     // EUC_SIMPLE
1100     public static class Encoder_EUC_SIM extends Encoder {
Encoder_EUC_SIM(Charset cs, char[] c2b, char[] c2bIndex, boolean isASCIICompatible)1101         public Encoder_EUC_SIM(Charset cs, char[] c2b, char[] c2bIndex,
1102                                boolean isASCIICompatible) {
1103             super(cs, c2b, c2bIndex, isASCIICompatible);
1104         }
1105     }
1106 
1107 }
1108