1 /* 2 * Copyright (c) 2009, 2013, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package sun.nio.cs; 27 28 import java.nio.ByteBuffer; 29 import java.nio.CharBuffer; 30 import java.nio.charset.Charset; 31 import java.nio.charset.CharsetDecoder; 32 import java.nio.charset.CharsetEncoder; 33 import java.nio.charset.CoderResult; 34 import java.util.Arrays; 35 import sun.nio.cs.Surrogate; 36 import sun.nio.cs.ArrayDecoder; 37 import sun.nio.cs.ArrayEncoder; 38 import static sun.nio.cs.CharsetMapping.*; 39 40 /* 41 * Four types of "DoubleByte" charsets are implemented in this class 42 * (1)DoubleByte 43 * The "mostly widely used" multibyte charset, a combination of 44 * a singlebyte character set (usually the ASCII charset) and a 45 * doublebyte character set. The codepoint values of singlebyte 46 * and doublebyte don't overlap. Microsoft's multibyte charsets 47 * and IBM's "DBCS_ASCII" charsets, such as IBM1381, 942, 943, 48 * 948, 949 and 950 are such charsets. 49 * 50 * (2)DoubleByte_EBCDIC 51 * IBM EBCDIC Mix multibyte charset. Use SO and SI to shift (switch) 52 * in and out between the singlebyte character set and doublebyte 53 * character set. 54 * 55 * (3)DoubleByte_SIMPLE_EUC 56 * It's a "simple" form of EUC encoding scheme, only have the 57 * singlebyte character set G0 and one doublebyte character set 58 * G1 are defined, G2 (with SS2) and G3 (with SS3) are not used. 59 * So it is actually the same as the "typical" type (1) mentioned 60 * above, except it return "malformed" for the SS2 and SS3 when 61 * decoding. 62 * 63 * (4)DoubleByte ONLY 64 * A "pure" doublebyte only character set. From implementation 65 * point of view, this is the type (1) with "decodeSingle" always 66 * returns unmappable. 67 * 68 * For simplicity, all implementations share the same decoding and 69 * encoding data structure. 70 * 71 * Decoding: 72 * 73 * char[][] b2c; 74 * char[] b2cSB; 75 * int b2Min, b2Max 76 * 77 * public char decodeSingle(int b) { 78 * return b2cSB.[b]; 79 * } 80 * 81 * public char decodeDouble(int b1, int b2) { 82 * if (b2 < b2Min || b2 > b2Max) 83 * return UNMAPPABLE_DECODING; 84 * return b2c[b1][b2 - b2Min]; 85 * } 86 * 87 * (1)b2Min, b2Max are the corresponding min and max value of the 88 * low-half of the double-byte. 89 * (2)The high 8-bit/b1 of the double-byte are used to indexed into 90 * b2c array. 91 * 92 * Encoding: 93 * 94 * char[] c2b; 95 * char[] c2bIndex; 96 * 97 * public int encodeChar(char ch) { 98 * return c2b[c2bIndex[ch >> 8] + (ch & 0xff)]; 99 * } 100 * 101 */ 102 103 public class DoubleByte { 104 105 public static final char[] B2C_UNMAPPABLE; 106 static { 107 B2C_UNMAPPABLE = new char[0x100]; Arrays.fill(B2C_UNMAPPABLE, UNMAPPABLE_DECODING)108 Arrays.fill(B2C_UNMAPPABLE, UNMAPPABLE_DECODING); 109 } 110 111 public static class Decoder extends CharsetDecoder 112 implements DelegatableDecoder, ArrayDecoder 113 { 114 final char[][] b2c; 115 final char[] b2cSB; 116 final int b2Min; 117 final int b2Max; 118 final boolean isASCIICompatible; 119 120 // for SimpleEUC override crMalformedOrUnderFlow(int b)121 protected CoderResult crMalformedOrUnderFlow(int b) { 122 return CoderResult.UNDERFLOW; 123 } 124 crMalformedOrUnmappable(int b1, int b2)125 protected CoderResult crMalformedOrUnmappable(int b1, int b2) { 126 if (b2c[b1] == B2C_UNMAPPABLE || // isNotLeadingByte(b1) 127 b2c[b2] != B2C_UNMAPPABLE || // isLeadingByte(b2) 128 decodeSingle(b2) != UNMAPPABLE_DECODING) { // isSingle(b2) 129 return CoderResult.malformedForLength(1); 130 } 131 return CoderResult.unmappableForLength(2); 132 } 133 Decoder(Charset cs, float avgcpb, float maxcpb, char[][] b2c, char[] b2cSB, int b2Min, int b2Max, boolean isASCIICompatible)134 public Decoder(Charset cs, float avgcpb, float maxcpb, 135 char[][] b2c, char[] b2cSB, 136 int b2Min, int b2Max, 137 boolean isASCIICompatible) { 138 super(cs, avgcpb, maxcpb); 139 this.b2c = b2c; 140 this.b2cSB = b2cSB; 141 this.b2Min = b2Min; 142 this.b2Max = b2Max; 143 this.isASCIICompatible = isASCIICompatible; 144 } 145 Decoder(Charset cs, char[][] b2c, char[] b2cSB, int b2Min, int b2Max, boolean isASCIICompatible)146 public Decoder(Charset cs, char[][] b2c, char[] b2cSB, int b2Min, int b2Max, 147 boolean isASCIICompatible) { 148 this(cs, 0.5f, 1.0f, b2c, b2cSB, b2Min, b2Max, isASCIICompatible); 149 } 150 Decoder(Charset cs, char[][] b2c, char[] b2cSB, int b2Min, int b2Max)151 public Decoder(Charset cs, char[][] b2c, char[] b2cSB, int b2Min, int b2Max) { 152 this(cs, 0.5f, 1.0f, b2c, b2cSB, b2Min, b2Max, false); 153 } 154 decodeArrayLoop(ByteBuffer src, CharBuffer dst)155 protected CoderResult decodeArrayLoop(ByteBuffer src, CharBuffer dst) { 156 byte[] sa = src.array(); 157 int sp = src.arrayOffset() + src.position(); 158 int sl = src.arrayOffset() + src.limit(); 159 160 char[] da = dst.array(); 161 int dp = dst.arrayOffset() + dst.position(); 162 int dl = dst.arrayOffset() + dst.limit(); 163 164 try { 165 while (sp < sl && dp < dl) { 166 // inline the decodeSingle/Double() for better performance 167 int inSize = 1; 168 int b1 = sa[sp] & 0xff; 169 char c = b2cSB[b1]; 170 if (c == UNMAPPABLE_DECODING) { 171 if (sl - sp < 2) 172 return crMalformedOrUnderFlow(b1); 173 int b2 = sa[sp + 1] & 0xff; 174 if (b2 < b2Min || b2 > b2Max || 175 (c = b2c[b1][b2 - b2Min]) == UNMAPPABLE_DECODING) { 176 return crMalformedOrUnmappable(b1, b2); 177 } 178 inSize++; 179 } 180 da[dp++] = c; 181 sp += inSize; 182 } 183 return (sp >= sl) ? CoderResult.UNDERFLOW 184 : CoderResult.OVERFLOW; 185 } finally { 186 src.position(sp - src.arrayOffset()); 187 dst.position(dp - dst.arrayOffset()); 188 } 189 } 190 decodeBufferLoop(ByteBuffer src, CharBuffer dst)191 protected CoderResult decodeBufferLoop(ByteBuffer src, CharBuffer dst) { 192 int mark = src.position(); 193 try { 194 195 while (src.hasRemaining() && dst.hasRemaining()) { 196 int b1 = src.get() & 0xff; 197 char c = b2cSB[b1]; 198 int inSize = 1; 199 if (c == UNMAPPABLE_DECODING) { 200 if (src.remaining() < 1) 201 return crMalformedOrUnderFlow(b1); 202 int b2 = src.get() & 0xff; 203 if (b2 < b2Min || b2 > b2Max || 204 (c = b2c[b1][b2 - b2Min]) == UNMAPPABLE_DECODING) 205 return crMalformedOrUnmappable(b1, b2); 206 inSize++; 207 } 208 dst.put(c); 209 mark += inSize; 210 } 211 return src.hasRemaining()? CoderResult.OVERFLOW 212 : CoderResult.UNDERFLOW; 213 } finally { 214 src.position(mark); 215 } 216 } 217 218 // Make some protected methods public for use by JISAutoDetect decodeLoop(ByteBuffer src, CharBuffer dst)219 public CoderResult decodeLoop(ByteBuffer src, CharBuffer dst) { 220 if (src.hasArray() && dst.hasArray()) 221 return decodeArrayLoop(src, dst); 222 else 223 return decodeBufferLoop(src, dst); 224 } 225 226 @Override decode(byte[] src, int sp, int len, char[] dst)227 public int decode(byte[] src, int sp, int len, char[] dst) { 228 int dp = 0; 229 int sl = sp + len; 230 char repl = replacement().charAt(0); 231 while (sp < sl) { 232 int b1 = src[sp++] & 0xff; 233 char c = b2cSB[b1]; 234 if (c == UNMAPPABLE_DECODING) { 235 if (sp < sl) { 236 int b2 = src[sp++] & 0xff; 237 if (b2 < b2Min || b2 > b2Max || 238 (c = b2c[b1][b2 - b2Min]) == UNMAPPABLE_DECODING) { 239 if (crMalformedOrUnmappable(b1, b2).length() == 1) { 240 sp--; 241 } 242 } 243 } 244 if (c == UNMAPPABLE_DECODING) { 245 c = repl; 246 } 247 } 248 dst[dp++] = c; 249 } 250 return dp; 251 } 252 253 @Override isASCIICompatible()254 public boolean isASCIICompatible() { 255 return isASCIICompatible; 256 } 257 implReset()258 public void implReset() { 259 super.implReset(); 260 } 261 implFlush(CharBuffer out)262 public CoderResult implFlush(CharBuffer out) { 263 return super.implFlush(out); 264 } 265 266 // decode loops are not using decodeSingle/Double() for performance 267 // reason. decodeSingle(int b)268 public char decodeSingle(int b) { 269 return b2cSB[b]; 270 } 271 decodeDouble(int b1, int b2)272 public char decodeDouble(int b1, int b2) { 273 if (b1 < 0 || b1 > b2c.length || 274 b2 < b2Min || b2 > b2Max) 275 return UNMAPPABLE_DECODING; 276 return b2c[b1][b2 - b2Min]; 277 } 278 } 279 280 // IBM_EBCDIC_DBCS 281 public static class Decoder_EBCDIC extends Decoder { 282 private static final int SBCS = 0; 283 private static final int DBCS = 1; 284 private static final int SO = 0x0e; 285 private static final int SI = 0x0f; 286 private int currentState; 287 Decoder_EBCDIC(Charset cs, char[][] b2c, char[] b2cSB, int b2Min, int b2Max, boolean isASCIICompatible)288 public Decoder_EBCDIC(Charset cs, 289 char[][] b2c, char[] b2cSB, int b2Min, int b2Max, 290 boolean isASCIICompatible) { 291 super(cs, b2c, b2cSB, b2Min, b2Max, isASCIICompatible); 292 } 293 Decoder_EBCDIC(Charset cs, char[][] b2c, char[] b2cSB, int b2Min, int b2Max)294 public Decoder_EBCDIC(Charset cs, 295 char[][] b2c, char[] b2cSB, int b2Min, int b2Max) { 296 super(cs, b2c, b2cSB, b2Min, b2Max, false); 297 } 298 implReset()299 public void implReset() { 300 currentState = SBCS; 301 } 302 303 // Check validity of dbcs ebcdic byte pair values 304 // 305 // First byte : 0x41 -- 0xFE 306 // Second byte: 0x41 -- 0xFE 307 // Doublebyte blank: 0x4040 308 // 309 // The validation implementation in "old" DBCS_IBM_EBCDIC and sun.io 310 // as 311 // if ((b1 != 0x40 || b2 != 0x40) && 312 // (b2 < 0x41 || b2 > 0xfe)) {...} 313 // is not correct/complete (range check for b1) 314 // isDoubleByte(int b1, int b2)315 private static boolean isDoubleByte(int b1, int b2) { 316 return (0x41 <= b1 && b1 <= 0xfe && 0x41 <= b2 && b2 <= 0xfe) 317 || (b1 == 0x40 && b2 == 0x40); // DBCS-HOST SPACE 318 } 319 decodeArrayLoop(ByteBuffer src, CharBuffer dst)320 protected CoderResult decodeArrayLoop(ByteBuffer src, CharBuffer dst) { 321 byte[] sa = src.array(); 322 int sp = src.arrayOffset() + src.position(); 323 int sl = src.arrayOffset() + src.limit(); 324 char[] da = dst.array(); 325 int dp = dst.arrayOffset() + dst.position(); 326 int dl = dst.arrayOffset() + dst.limit(); 327 328 try { 329 // don't check dp/dl together here, it's possible to 330 // decdoe a SO/SI without space in output buffer. 331 while (sp < sl) { 332 int b1 = sa[sp] & 0xff; 333 int inSize = 1; 334 if (b1 == SO) { // Shift out 335 if (currentState != SBCS) 336 return CoderResult.malformedForLength(1); 337 else 338 currentState = DBCS; 339 } else if (b1 == SI) { 340 if (currentState != DBCS) 341 return CoderResult.malformedForLength(1); 342 else 343 currentState = SBCS; 344 } else { 345 char c = UNMAPPABLE_DECODING; 346 if (currentState == SBCS) { 347 c = b2cSB[b1]; 348 if (c == UNMAPPABLE_DECODING) 349 return CoderResult.unmappableForLength(1); 350 } else { 351 if (sl - sp < 2) 352 return CoderResult.UNDERFLOW; 353 int b2 = sa[sp + 1] & 0xff; 354 if (b2 < b2Min || b2 > b2Max || 355 (c = b2c[b1][b2 - b2Min]) == UNMAPPABLE_DECODING) { 356 if (!isDoubleByte(b1, b2)) 357 return CoderResult.malformedForLength(2); 358 return CoderResult.unmappableForLength(2); 359 } 360 inSize++; 361 } 362 if (dl - dp < 1) 363 return CoderResult.OVERFLOW; 364 365 da[dp++] = c; 366 } 367 sp += inSize; 368 } 369 return CoderResult.UNDERFLOW; 370 } finally { 371 src.position(sp - src.arrayOffset()); 372 dst.position(dp - dst.arrayOffset()); 373 } 374 } 375 decodeBufferLoop(ByteBuffer src, CharBuffer dst)376 protected CoderResult decodeBufferLoop(ByteBuffer src, CharBuffer dst) { 377 int mark = src.position(); 378 try { 379 while (src.hasRemaining()) { 380 int b1 = src.get() & 0xff; 381 int inSize = 1; 382 if (b1 == SO) { // Shift out 383 if (currentState != SBCS) 384 return CoderResult.malformedForLength(1); 385 else 386 currentState = DBCS; 387 } else if (b1 == SI) { 388 if (currentState != DBCS) 389 return CoderResult.malformedForLength(1); 390 else 391 currentState = SBCS; 392 } else { 393 char c = UNMAPPABLE_DECODING; 394 if (currentState == SBCS) { 395 c = b2cSB[b1]; 396 if (c == UNMAPPABLE_DECODING) 397 return CoderResult.unmappableForLength(1); 398 } else { 399 if (src.remaining() < 1) 400 return CoderResult.UNDERFLOW; 401 int b2 = src.get()&0xff; 402 if (b2 < b2Min || b2 > b2Max || 403 (c = b2c[b1][b2 - b2Min]) == UNMAPPABLE_DECODING) { 404 if (!isDoubleByte(b1, b2)) 405 return CoderResult.malformedForLength(2); 406 return CoderResult.unmappableForLength(2); 407 } 408 inSize++; 409 } 410 411 if (dst.remaining() < 1) 412 return CoderResult.OVERFLOW; 413 414 dst.put(c); 415 } 416 mark += inSize; 417 } 418 return CoderResult.UNDERFLOW; 419 } finally { 420 src.position(mark); 421 } 422 } 423 424 @Override decode(byte[] src, int sp, int len, char[] dst)425 public int decode(byte[] src, int sp, int len, char[] dst) { 426 int dp = 0; 427 int sl = sp + len; 428 currentState = SBCS; 429 char repl = replacement().charAt(0); 430 while (sp < sl) { 431 int b1 = src[sp++] & 0xff; 432 if (b1 == SO) { // Shift out 433 if (currentState != SBCS) 434 dst[dp++] = repl; 435 else 436 currentState = DBCS; 437 } else if (b1 == SI) { 438 if (currentState != DBCS) 439 dst[dp++] = repl; 440 else 441 currentState = SBCS; 442 } else { 443 char c = UNMAPPABLE_DECODING; 444 if (currentState == SBCS) { 445 c = b2cSB[b1]; 446 if (c == UNMAPPABLE_DECODING) 447 c = repl; 448 } else { 449 if (sl == sp) { 450 c = repl; 451 } else { 452 int b2 = src[sp++] & 0xff; 453 if (b2 < b2Min || b2 > b2Max || 454 (c = b2c[b1][b2 - b2Min]) == UNMAPPABLE_DECODING) { 455 c = repl; 456 } 457 } 458 } 459 dst[dp++] = c; 460 } 461 } 462 return dp; 463 } 464 } 465 466 // DBCS_ONLY 467 public static class Decoder_DBCSONLY extends Decoder { 468 static final char[] b2cSB_UNMAPPABLE; 469 static { 470 b2cSB_UNMAPPABLE = new char[0x100]; Arrays.fill(b2cSB_UNMAPPABLE, UNMAPPABLE_DECODING)471 Arrays.fill(b2cSB_UNMAPPABLE, UNMAPPABLE_DECODING); 472 } 473 474 // always returns unmappableForLenth(2) for doublebyte_only 475 @Override crMalformedOrUnmappable(int b1, int b2)476 protected CoderResult crMalformedOrUnmappable(int b1, int b2) { 477 return CoderResult.unmappableForLength(2); 478 } 479 Decoder_DBCSONLY(Charset cs, char[][] b2c, char[] b2cSB, int b2Min, int b2Max, boolean isASCIICompatible)480 public Decoder_DBCSONLY(Charset cs, char[][] b2c, char[] b2cSB, int b2Min, int b2Max, 481 boolean isASCIICompatible) { 482 super(cs, 0.5f, 1.0f, b2c, b2cSB_UNMAPPABLE, b2Min, b2Max, isASCIICompatible); 483 } 484 Decoder_DBCSONLY(Charset cs, char[][] b2c, char[] b2cSB, int b2Min, int b2Max)485 public Decoder_DBCSONLY(Charset cs, char[][] b2c, char[] b2cSB, int b2Min, int b2Max) { 486 super(cs, 0.5f, 1.0f, b2c, b2cSB_UNMAPPABLE, b2Min, b2Max, false); 487 } 488 } 489 490 // EUC_SIMPLE 491 // The only thing we need to "override" is to check SS2/SS3 and 492 // return "malformed" if found 493 public static class Decoder_EUC_SIM extends Decoder { 494 private final int SS2 = 0x8E; 495 private final int SS3 = 0x8F; 496 Decoder_EUC_SIM(Charset cs, char[][] b2c, char[] b2cSB, int b2Min, int b2Max, boolean isASCIICompatible)497 public Decoder_EUC_SIM(Charset cs, 498 char[][] b2c, char[] b2cSB, int b2Min, int b2Max, 499 boolean isASCIICompatible) { 500 super(cs, b2c, b2cSB, b2Min, b2Max, isASCIICompatible); 501 } 502 503 // No support provided for G2/G3 for SimpleEUC crMalformedOrUnderFlow(int b)504 protected CoderResult crMalformedOrUnderFlow(int b) { 505 if (b == SS2 || b == SS3 ) 506 return CoderResult.malformedForLength(1); 507 return CoderResult.UNDERFLOW; 508 } 509 crMalformedOrUnmappable(int b1, int b2)510 protected CoderResult crMalformedOrUnmappable(int b1, int b2) { 511 if (b1 == SS2 || b1 == SS3 ) 512 return CoderResult.malformedForLength(1); 513 return CoderResult.unmappableForLength(2); 514 } 515 516 @Override decode(byte[] src, int sp, int len, char[] dst)517 public int decode(byte[] src, int sp, int len, char[] dst) { 518 int dp = 0; 519 int sl = sp + len; 520 char repl = replacement().charAt(0); 521 while (sp < sl) { 522 int b1 = src[sp++] & 0xff; 523 char c = b2cSB[b1]; 524 if (c == UNMAPPABLE_DECODING) { 525 if (sp < sl) { 526 int b2 = src[sp++] & 0xff; 527 if (b2 < b2Min || b2 > b2Max || 528 (c = b2c[b1][b2 - b2Min]) == UNMAPPABLE_DECODING) { 529 if (b1 == SS2 || b1 == SS3) { 530 sp--; 531 } 532 c = repl; 533 } 534 } else { 535 c = repl; 536 } 537 } 538 dst[dp++] = c; 539 } 540 return dp; 541 } 542 } 543 544 public static class Encoder extends CharsetEncoder 545 implements ArrayEncoder 546 { 547 protected final int MAX_SINGLEBYTE = 0xff; 548 private final char[] c2b; 549 private final char[] c2bIndex; 550 protected Surrogate.Parser sgp; 551 final boolean isASCIICompatible; 552 Encoder(Charset cs, char[] c2b, char[] c2bIndex)553 public Encoder(Charset cs, char[] c2b, char[] c2bIndex) { 554 this(cs, c2b, c2bIndex, false); 555 } 556 Encoder(Charset cs, char[] c2b, char[] c2bIndex, boolean isASCIICompatible)557 public Encoder(Charset cs, char[] c2b, char[] c2bIndex, boolean isASCIICompatible) { 558 super(cs, 2.0f, 2.0f); 559 this.c2b = c2b; 560 this.c2bIndex = c2bIndex; 561 this.isASCIICompatible = isASCIICompatible; 562 } 563 Encoder(Charset cs, float avg, float max, byte[] repl, char[] c2b, char[] c2bIndex, boolean isASCIICompatible)564 public Encoder(Charset cs, float avg, float max, byte[] repl, char[] c2b, char[] c2bIndex, 565 boolean isASCIICompatible) { 566 super(cs, avg, max, repl); 567 this.c2b = c2b; 568 this.c2bIndex = c2bIndex; 569 this.isASCIICompatible = isASCIICompatible; 570 } 571 canEncode(char c)572 public boolean canEncode(char c) { 573 return encodeChar(c) != UNMAPPABLE_ENCODING; 574 } 575 sgp()576 protected Surrogate.Parser sgp() { 577 if (sgp == null) 578 sgp = new Surrogate.Parser(); 579 return sgp; 580 } 581 encodeArrayLoop(CharBuffer src, ByteBuffer dst)582 protected CoderResult encodeArrayLoop(CharBuffer src, ByteBuffer dst) { 583 char[] sa = src.array(); 584 int sp = src.arrayOffset() + src.position(); 585 int sl = src.arrayOffset() + src.limit(); 586 587 byte[] da = dst.array(); 588 int dp = dst.arrayOffset() + dst.position(); 589 int dl = dst.arrayOffset() + dst.limit(); 590 591 try { 592 while (sp < sl) { 593 char c = sa[sp]; 594 int bb = encodeChar(c); 595 if (bb == UNMAPPABLE_ENCODING) { 596 if (Character.isSurrogate(c)) { 597 if (sgp().parse(c, sa, sp, sl) < 0) 598 return sgp.error(); 599 return sgp.unmappableResult(); 600 } 601 return CoderResult.unmappableForLength(1); 602 } 603 604 if (bb > MAX_SINGLEBYTE) { // DoubleByte 605 if (dl - dp < 2) 606 return CoderResult.OVERFLOW; 607 da[dp++] = (byte)(bb >> 8); 608 da[dp++] = (byte)bb; 609 } else { // SingleByte 610 if (dl - dp < 1) 611 return CoderResult.OVERFLOW; 612 da[dp++] = (byte)bb; 613 } 614 615 sp++; 616 } 617 return CoderResult.UNDERFLOW; 618 } finally { 619 src.position(sp - src.arrayOffset()); 620 dst.position(dp - dst.arrayOffset()); 621 } 622 } 623 encodeBufferLoop(CharBuffer src, ByteBuffer dst)624 protected CoderResult encodeBufferLoop(CharBuffer src, ByteBuffer dst) { 625 int mark = src.position(); 626 try { 627 while (src.hasRemaining()) { 628 char c = src.get(); 629 int bb = encodeChar(c); 630 if (bb == UNMAPPABLE_ENCODING) { 631 if (Character.isSurrogate(c)) { 632 if (sgp().parse(c, src) < 0) 633 return sgp.error(); 634 return sgp.unmappableResult(); 635 } 636 return CoderResult.unmappableForLength(1); 637 } 638 if (bb > MAX_SINGLEBYTE) { // DoubleByte 639 if (dst.remaining() < 2) 640 return CoderResult.OVERFLOW; 641 dst.put((byte)(bb >> 8)); 642 dst.put((byte)(bb)); 643 } else { 644 if (dst.remaining() < 1) 645 return CoderResult.OVERFLOW; 646 dst.put((byte)bb); 647 } 648 mark++; 649 } 650 return CoderResult.UNDERFLOW; 651 } finally { 652 src.position(mark); 653 } 654 } 655 encodeLoop(CharBuffer src, ByteBuffer dst)656 protected CoderResult encodeLoop(CharBuffer src, ByteBuffer dst) { 657 if (src.hasArray() && dst.hasArray()) 658 return encodeArrayLoop(src, dst); 659 else 660 return encodeBufferLoop(src, dst); 661 } 662 663 protected byte[] repl = replacement(); implReplaceWith(byte[] newReplacement)664 protected void implReplaceWith(byte[] newReplacement) { 665 repl = newReplacement; 666 } 667 668 @Override encode(char[] src, int sp, int len, byte[] dst)669 public int encode(char[] src, int sp, int len, byte[] dst) { 670 int dp = 0; 671 int sl = sp + len; 672 int dl = dst.length; 673 while (sp < sl) { 674 char c = src[sp++]; 675 int bb = encodeChar(c); 676 if (bb == UNMAPPABLE_ENCODING) { 677 if (Character.isHighSurrogate(c) && sp < sl && 678 Character.isLowSurrogate(src[sp])) { 679 sp++; 680 } 681 dst[dp++] = repl[0]; 682 if (repl.length > 1) 683 dst[dp++] = repl[1]; 684 continue; 685 } //else 686 if (bb > MAX_SINGLEBYTE) { // DoubleByte 687 dst[dp++] = (byte)(bb >> 8); 688 dst[dp++] = (byte)bb; 689 } else { // SingleByte 690 dst[dp++] = (byte)bb; 691 } 692 } 693 return dp; 694 } 695 696 @Override encodeFromLatin1(byte[] src, int sp, int len, byte[] dst)697 public int encodeFromLatin1(byte[] src, int sp, int len, byte[] dst) { 698 int dp = 0; 699 int sl = sp + len; 700 while (sp < sl) { 701 char c = (char)(src[sp++] & 0xff); 702 int bb = encodeChar(c); 703 if (bb == UNMAPPABLE_ENCODING) { 704 // no surrogate pair in latin1 string 705 dst[dp++] = repl[0]; 706 if (repl.length > 1) { 707 dst[dp++] = repl[1]; 708 } 709 continue; 710 } //else 711 if (bb > MAX_SINGLEBYTE) { // DoubleByte 712 dst[dp++] = (byte)(bb >> 8); 713 dst[dp++] = (byte)bb; 714 } else { // SingleByte 715 dst[dp++] = (byte)bb; 716 } 717 718 } 719 return dp; 720 } 721 722 @Override encodeFromUTF16(byte[] src, int sp, int len, byte[] dst)723 public int encodeFromUTF16(byte[] src, int sp, int len, byte[] dst) { 724 int dp = 0; 725 int sl = sp + len; 726 while (sp < sl) { 727 char c = StringUTF16.getChar(src, sp++); 728 int bb = encodeChar(c); 729 if (bb == UNMAPPABLE_ENCODING) { 730 if (Character.isHighSurrogate(c) && sp < sl && 731 Character.isLowSurrogate(StringUTF16.getChar(src, sp))) { 732 sp++; 733 } 734 dst[dp++] = repl[0]; 735 if (repl.length > 1) { 736 dst[dp++] = repl[1]; 737 } 738 continue; 739 } //else 740 if (bb > MAX_SINGLEBYTE) { // DoubleByte 741 dst[dp++] = (byte)(bb >> 8); 742 dst[dp++] = (byte)bb; 743 } else { // SingleByte 744 dst[dp++] = (byte)bb; 745 } 746 } 747 return dp; 748 } 749 750 @Override isASCIICompatible()751 public boolean isASCIICompatible() { 752 return isASCIICompatible; 753 } 754 encodeChar(char ch)755 public int encodeChar(char ch) { 756 return c2b[c2bIndex[ch >> 8] + (ch & 0xff)]; 757 } 758 759 // init the c2b and c2bIndex tables from b2c. initC2B(String[] b2c, String b2cSB, String b2cNR, String c2bNR, int b2Min, int b2Max, char[] c2b, char[] c2bIndex)760 public static void initC2B(String[] b2c, String b2cSB, String b2cNR, String c2bNR, 761 int b2Min, int b2Max, 762 char[] c2b, char[] c2bIndex) 763 { 764 Arrays.fill(c2b, (char)UNMAPPABLE_ENCODING); 765 int off = 0x100; 766 767 char[][] b2c_ca = new char[b2c.length][]; 768 char[] b2cSB_ca = null; 769 if (b2cSB != null) 770 b2cSB_ca = b2cSB.toCharArray(); 771 772 for (int i = 0; i < b2c.length; i++) { 773 if (b2c[i] == null) 774 continue; 775 b2c_ca[i] = b2c[i].toCharArray(); 776 } 777 778 if (b2cNR != null) { 779 int j = 0; 780 while (j < b2cNR.length()) { 781 char b = b2cNR.charAt(j++); 782 char c = b2cNR.charAt(j++); 783 if (b < 0x100 && b2cSB_ca != null) { 784 if (b2cSB_ca[b] == c) 785 b2cSB_ca[b] = UNMAPPABLE_DECODING; 786 } else { 787 if (b2c_ca[b >> 8][(b & 0xff) - b2Min] == c) 788 b2c_ca[b >> 8][(b & 0xff) - b2Min] = UNMAPPABLE_DECODING; 789 } 790 } 791 } 792 793 if (b2cSB_ca != null) { // SingleByte 794 for (int b = 0; b < b2cSB_ca.length; b++) { 795 char c = b2cSB_ca[b]; 796 if (c == UNMAPPABLE_DECODING) 797 continue; 798 int index = c2bIndex[c >> 8]; 799 if (index == 0) { 800 index = off; 801 off += 0x100; 802 c2bIndex[c >> 8] = (char)index; 803 } 804 c2b[index + (c & 0xff)] = (char)b; 805 } 806 } 807 808 for (int b1 = 0; b1 < b2c.length; b1++) { // DoubleByte 809 char[] db = b2c_ca[b1]; 810 if (db == null) 811 continue; 812 for (int b2 = b2Min; b2 <= b2Max; b2++) { 813 char c = db[b2 - b2Min]; 814 if (c == UNMAPPABLE_DECODING) 815 continue; 816 int index = c2bIndex[c >> 8]; 817 if (index == 0) { 818 index = off; 819 off += 0x100; 820 c2bIndex[c >> 8] = (char)index; 821 } 822 c2b[index + (c & 0xff)] = (char)((b1 << 8) | b2); 823 } 824 } 825 826 if (c2bNR != null) { 827 // add c->b only nr entries 828 for (int i = 0; i < c2bNR.length(); i += 2) { 829 char b = c2bNR.charAt(i); 830 char c = c2bNR.charAt(i + 1); 831 int index = (c >> 8); 832 if (c2bIndex[index] == 0) { 833 c2bIndex[index] = (char)off; 834 off += 0x100; 835 } 836 index = c2bIndex[index] + (c & 0xff); 837 c2b[index] = b; 838 } 839 } 840 } 841 } 842 843 public static class Encoder_DBCSONLY extends Encoder { 844 Encoder_DBCSONLY(Charset cs, byte[] repl, char[] c2b, char[] c2bIndex, boolean isASCIICompatible)845 public Encoder_DBCSONLY(Charset cs, byte[] repl, 846 char[] c2b, char[] c2bIndex, 847 boolean isASCIICompatible) { 848 super(cs, 2.0f, 2.0f, repl, c2b, c2bIndex, isASCIICompatible); 849 } 850 encodeChar(char ch)851 public int encodeChar(char ch) { 852 int bb = super.encodeChar(ch); 853 if (bb <= MAX_SINGLEBYTE) 854 return UNMAPPABLE_ENCODING; 855 return bb; 856 } 857 } 858 859 public static class Encoder_EBCDIC extends Encoder { 860 static final int SBCS = 0; 861 static final int DBCS = 1; 862 static final byte SO = 0x0e; 863 static final byte SI = 0x0f; 864 865 protected int currentState = SBCS; 866 Encoder_EBCDIC(Charset cs, char[] c2b, char[] c2bIndex, boolean isASCIICompatible)867 public Encoder_EBCDIC(Charset cs, char[] c2b, char[] c2bIndex, 868 boolean isASCIICompatible) { 869 super(cs, 4.0f, 5.0f, new byte[] {(byte)0x6f}, c2b, c2bIndex, isASCIICompatible); 870 } 871 implReset()872 protected void implReset() { 873 currentState = SBCS; 874 } 875 implFlush(ByteBuffer out)876 protected CoderResult implFlush(ByteBuffer out) { 877 if (currentState == DBCS) { 878 if (out.remaining() < 1) 879 return CoderResult.OVERFLOW; 880 out.put(SI); 881 } 882 implReset(); 883 return CoderResult.UNDERFLOW; 884 } 885 encodeArrayLoop(CharBuffer src, ByteBuffer dst)886 protected CoderResult encodeArrayLoop(CharBuffer src, ByteBuffer dst) { 887 char[] sa = src.array(); 888 int sp = src.arrayOffset() + src.position(); 889 int sl = src.arrayOffset() + src.limit(); 890 byte[] da = dst.array(); 891 int dp = dst.arrayOffset() + dst.position(); 892 int dl = dst.arrayOffset() + dst.limit(); 893 894 try { 895 while (sp < sl) { 896 char c = sa[sp]; 897 int bb = encodeChar(c); 898 if (bb == UNMAPPABLE_ENCODING) { 899 if (Character.isSurrogate(c)) { 900 if (sgp().parse(c, sa, sp, sl) < 0) 901 return sgp.error(); 902 return sgp.unmappableResult(); 903 } 904 return CoderResult.unmappableForLength(1); 905 } 906 if (bb > MAX_SINGLEBYTE) { // DoubleByte 907 if (currentState == SBCS) { 908 if (dl - dp < 1) 909 return CoderResult.OVERFLOW; 910 currentState = DBCS; 911 da[dp++] = SO; 912 } 913 if (dl - dp < 2) 914 return CoderResult.OVERFLOW; 915 da[dp++] = (byte)(bb >> 8); 916 da[dp++] = (byte)bb; 917 } else { // SingleByte 918 if (currentState == DBCS) { 919 if (dl - dp < 1) 920 return CoderResult.OVERFLOW; 921 currentState = SBCS; 922 da[dp++] = SI; 923 } 924 if (dl - dp < 1) 925 return CoderResult.OVERFLOW; 926 da[dp++] = (byte)bb; 927 928 } 929 sp++; 930 } 931 return CoderResult.UNDERFLOW; 932 } finally { 933 src.position(sp - src.arrayOffset()); 934 dst.position(dp - dst.arrayOffset()); 935 } 936 } 937 encodeBufferLoop(CharBuffer src, ByteBuffer dst)938 protected CoderResult encodeBufferLoop(CharBuffer src, ByteBuffer dst) { 939 int mark = src.position(); 940 try { 941 while (src.hasRemaining()) { 942 char c = src.get(); 943 int bb = encodeChar(c); 944 if (bb == UNMAPPABLE_ENCODING) { 945 if (Character.isSurrogate(c)) { 946 if (sgp().parse(c, src) < 0) 947 return sgp.error(); 948 return sgp.unmappableResult(); 949 } 950 return CoderResult.unmappableForLength(1); 951 } 952 if (bb > MAX_SINGLEBYTE) { // DoubleByte 953 if (currentState == SBCS) { 954 if (dst.remaining() < 1) 955 return CoderResult.OVERFLOW; 956 currentState = DBCS; 957 dst.put(SO); 958 } 959 if (dst.remaining() < 2) 960 return CoderResult.OVERFLOW; 961 dst.put((byte)(bb >> 8)); 962 dst.put((byte)(bb)); 963 } else { // Single-byte 964 if (currentState == DBCS) { 965 if (dst.remaining() < 1) 966 return CoderResult.OVERFLOW; 967 currentState = SBCS; 968 dst.put(SI); 969 } 970 if (dst.remaining() < 1) 971 return CoderResult.OVERFLOW; 972 dst.put((byte)bb); 973 } 974 mark++; 975 } 976 return CoderResult.UNDERFLOW; 977 } finally { 978 src.position(mark); 979 } 980 } 981 982 @Override encode(char[] src, int sp, int len, byte[] dst)983 public int encode(char[] src, int sp, int len, byte[] dst) { 984 int dp = 0; 985 int sl = sp + len; 986 while (sp < sl) { 987 char c = src[sp++]; 988 int bb = encodeChar(c); 989 990 if (bb == UNMAPPABLE_ENCODING) { 991 if (Character.isHighSurrogate(c) && sp < sl && 992 Character.isLowSurrogate(src[sp])) { 993 sp++; 994 } 995 dst[dp++] = repl[0]; 996 if (repl.length > 1) 997 dst[dp++] = repl[1]; 998 continue; 999 } //else 1000 if (bb > MAX_SINGLEBYTE) { // DoubleByte 1001 if (currentState == SBCS) { 1002 currentState = DBCS; 1003 dst[dp++] = SO; 1004 } 1005 dst[dp++] = (byte)(bb >> 8); 1006 dst[dp++] = (byte)bb; 1007 } else { // SingleByte 1008 if (currentState == DBCS) { 1009 currentState = SBCS; 1010 dst[dp++] = SI; 1011 } 1012 dst[dp++] = (byte)bb; 1013 } 1014 } 1015 1016 if (currentState == DBCS) { 1017 currentState = SBCS; 1018 dst[dp++] = SI; 1019 } 1020 return dp; 1021 } 1022 1023 @Override encodeFromLatin1(byte[] src, int sp, int len, byte[] dst)1024 public int encodeFromLatin1(byte[] src, int sp, int len, byte[] dst) { 1025 int dp = 0; 1026 int sl = sp + len; 1027 while (sp < sl) { 1028 char c = (char)(src[sp++] & 0xff); 1029 int bb = encodeChar(c); 1030 if (bb == UNMAPPABLE_ENCODING) { 1031 // no surrogate pair in latin1 string 1032 dst[dp++] = repl[0]; 1033 if (repl.length > 1) 1034 dst[dp++] = repl[1]; 1035 continue; 1036 } //else 1037 if (bb > MAX_SINGLEBYTE) { // DoubleByte 1038 if (currentState == SBCS) { 1039 currentState = DBCS; 1040 dst[dp++] = SO; 1041 } 1042 dst[dp++] = (byte)(bb >> 8); 1043 dst[dp++] = (byte)bb; 1044 } else { // SingleByte 1045 if (currentState == DBCS) { 1046 currentState = SBCS; 1047 dst[dp++] = SI; 1048 } 1049 dst[dp++] = (byte)bb; 1050 } 1051 } 1052 if (currentState == DBCS) { 1053 currentState = SBCS; 1054 dst[dp++] = SI; 1055 } 1056 return dp; 1057 } 1058 1059 @Override encodeFromUTF16(byte[] src, int sp, int len, byte[] dst)1060 public int encodeFromUTF16(byte[] src, int sp, int len, byte[] dst) { 1061 int dp = 0; 1062 int sl = sp + len; 1063 while (sp < sl) { 1064 char c = StringUTF16.getChar(src, sp++); 1065 int bb = encodeChar(c); 1066 if (bb == UNMAPPABLE_ENCODING) { 1067 if (Character.isHighSurrogate(c) && sp < sl && 1068 Character.isLowSurrogate(StringUTF16.getChar(src, sp))) { 1069 sp++; 1070 } 1071 dst[dp++] = repl[0]; 1072 if (repl.length > 1) 1073 dst[dp++] = repl[1]; 1074 continue; 1075 } //else 1076 if (bb > MAX_SINGLEBYTE) { // DoubleByte 1077 if (currentState == SBCS) { 1078 currentState = DBCS; 1079 dst[dp++] = SO; 1080 } 1081 dst[dp++] = (byte)(bb >> 8); 1082 dst[dp++] = (byte)bb; 1083 } else { // SingleByte 1084 if (currentState == DBCS) { 1085 currentState = SBCS; 1086 dst[dp++] = SI; 1087 } 1088 dst[dp++] = (byte)bb; 1089 } 1090 } 1091 if (currentState == DBCS) { 1092 currentState = SBCS; 1093 dst[dp++] = SI; 1094 } 1095 return dp; 1096 } 1097 } 1098 1099 // EUC_SIMPLE 1100 public static class Encoder_EUC_SIM extends Encoder { Encoder_EUC_SIM(Charset cs, char[] c2b, char[] c2bIndex, boolean isASCIICompatible)1101 public Encoder_EUC_SIM(Charset cs, char[] c2b, char[] c2bIndex, 1102 boolean isASCIICompatible) { 1103 super(cs, c2b, c2bIndex, isASCIICompatible); 1104 } 1105 } 1106 1107 } 1108