1 /* 2 * Copyright (c) 2009, 2021, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package sun.nio.cs; 27 28 import java.nio.ByteBuffer; 29 import java.nio.CharBuffer; 30 import java.nio.charset.Charset; 31 import java.nio.charset.CharsetDecoder; 32 import java.nio.charset.CharsetEncoder; 33 import java.nio.charset.CoderResult; 34 import java.util.Arrays; 35 36 import jdk.internal.access.JavaLangAccess; 37 import jdk.internal.access.SharedSecrets; 38 import sun.nio.cs.Surrogate; 39 import sun.nio.cs.ArrayDecoder; 40 import sun.nio.cs.ArrayEncoder; 41 import static sun.nio.cs.CharsetMapping.*; 42 43 /* 44 * Four types of "DoubleByte" charsets are implemented in this class 45 * (1)DoubleByte 46 * The "mostly widely used" multibyte charset, a combination of 47 * a singlebyte character set (usually the ASCII charset) and a 48 * doublebyte character set. The codepoint values of singlebyte 49 * and doublebyte don't overlap. Microsoft's multibyte charsets 50 * and IBM's "DBCS_ASCII" charsets, such as IBM1381, 942, 943, 51 * 948, 949 and 950 are such charsets. 52 * 53 * (2)DoubleByte_EBCDIC 54 * IBM EBCDIC Mix multibyte charset. Use SO and SI to shift (switch) 55 * in and out between the singlebyte character set and doublebyte 56 * character set. 57 * 58 * (3)DoubleByte_SIMPLE_EUC 59 * It's a "simple" form of EUC encoding scheme, only have the 60 * singlebyte character set G0 and one doublebyte character set 61 * G1 are defined, G2 (with SS2) and G3 (with SS3) are not used. 62 * So it is actually the same as the "typical" type (1) mentioned 63 * above, except it return "malformed" for the SS2 and SS3 when 64 * decoding. 65 * 66 * (4)DoubleByte ONLY 67 * A "pure" doublebyte only character set. From implementation 68 * point of view, this is the type (1) with "decodeSingle" always 69 * returns unmappable. 70 * 71 * For simplicity, all implementations share the same decoding and 72 * encoding data structure. 73 * 74 * Decoding: 75 * 76 * char[][] b2c; 77 * char[] b2cSB; 78 * int b2Min, b2Max 79 * 80 * public char decodeSingle(int b) { 81 * return b2cSB.[b]; 82 * } 83 * 84 * public char decodeDouble(int b1, int b2) { 85 * if (b2 < b2Min || b2 > b2Max) 86 * return UNMAPPABLE_DECODING; 87 * return b2c[b1][b2 - b2Min]; 88 * } 89 * 90 * (1)b2Min, b2Max are the corresponding min and max value of the 91 * low-half of the double-byte. 92 * (2)The high 8-bit/b1 of the double-byte are used to indexed into 93 * b2c array. 94 * 95 * Encoding: 96 * 97 * char[] c2b; 98 * char[] c2bIndex; 99 * 100 * public int encodeChar(char ch) { 101 * return c2b[c2bIndex[ch >> 8] + (ch & 0xff)]; 102 * } 103 * 104 */ 105 106 public class DoubleByte { 107 108 public static final char[] B2C_UNMAPPABLE; 109 static { 110 B2C_UNMAPPABLE = new char[0x100]; Arrays.fill(B2C_UNMAPPABLE, UNMAPPABLE_DECODING)111 Arrays.fill(B2C_UNMAPPABLE, UNMAPPABLE_DECODING); 112 } 113 114 public static class Decoder extends CharsetDecoder 115 implements DelegatableDecoder, ArrayDecoder 116 { 117 private static final JavaLangAccess JLA = SharedSecrets.getJavaLangAccess(); 118 119 final char[][] b2c; 120 final char[] b2cSB; 121 final int b2Min; 122 final int b2Max; 123 final boolean isASCIICompatible; 124 125 // for SimpleEUC override crMalformedOrUnderFlow(int b)126 protected CoderResult crMalformedOrUnderFlow(int b) { 127 return CoderResult.UNDERFLOW; 128 } 129 crMalformedOrUnmappable(int b1, int b2)130 protected CoderResult crMalformedOrUnmappable(int b1, int b2) { 131 if (b2c[b1] == B2C_UNMAPPABLE || // isNotLeadingByte(b1) 132 b2c[b2] != B2C_UNMAPPABLE || // isLeadingByte(b2) 133 decodeSingle(b2) != UNMAPPABLE_DECODING) { // isSingle(b2) 134 return CoderResult.malformedForLength(1); 135 } 136 return CoderResult.unmappableForLength(2); 137 } 138 Decoder(Charset cs, float avgcpb, float maxcpb, char[][] b2c, char[] b2cSB, int b2Min, int b2Max, boolean isASCIICompatible)139 public Decoder(Charset cs, float avgcpb, float maxcpb, 140 char[][] b2c, char[] b2cSB, 141 int b2Min, int b2Max, 142 boolean isASCIICompatible) { 143 super(cs, avgcpb, maxcpb); 144 this.b2c = b2c; 145 this.b2cSB = b2cSB; 146 this.b2Min = b2Min; 147 this.b2Max = b2Max; 148 this.isASCIICompatible = isASCIICompatible; 149 } 150 Decoder(Charset cs, char[][] b2c, char[] b2cSB, int b2Min, int b2Max, boolean isASCIICompatible)151 public Decoder(Charset cs, char[][] b2c, char[] b2cSB, int b2Min, int b2Max, 152 boolean isASCIICompatible) { 153 this(cs, 0.5f, 1.0f, b2c, b2cSB, b2Min, b2Max, isASCIICompatible); 154 } 155 Decoder(Charset cs, char[][] b2c, char[] b2cSB, int b2Min, int b2Max)156 public Decoder(Charset cs, char[][] b2c, char[] b2cSB, int b2Min, int b2Max) { 157 this(cs, 0.5f, 1.0f, b2c, b2cSB, b2Min, b2Max, false); 158 } 159 decodeArrayLoop(ByteBuffer src, CharBuffer dst)160 protected CoderResult decodeArrayLoop(ByteBuffer src, CharBuffer dst) { 161 byte[] sa = src.array(); 162 int soff = src.arrayOffset(); 163 int sp = soff + src.position(); 164 int sl = soff + src.limit(); 165 166 char[] da = dst.array(); 167 int doff = dst.arrayOffset(); 168 int dp = doff + dst.position(); 169 int dl = doff + dst.limit(); 170 171 try { 172 if (isASCIICompatible) { 173 int n = JLA.decodeASCII(sa, sp, da, dp, Math.min(dl - dp, sl - sp)); 174 dp += n; 175 sp += n; 176 } 177 while (sp < sl && dp < dl) { 178 // inline the decodeSingle/Double() for better performance 179 int inSize = 1; 180 int b1 = sa[sp] & 0xff; 181 char c = b2cSB[b1]; 182 if (c == UNMAPPABLE_DECODING) { 183 if (sl - sp < 2) 184 return crMalformedOrUnderFlow(b1); 185 int b2 = sa[sp + 1] & 0xff; 186 if (b2 < b2Min || b2 > b2Max || 187 (c = b2c[b1][b2 - b2Min]) == UNMAPPABLE_DECODING) { 188 return crMalformedOrUnmappable(b1, b2); 189 } 190 inSize++; 191 } 192 da[dp++] = c; 193 sp += inSize; 194 } 195 return (sp >= sl) ? CoderResult.UNDERFLOW 196 : CoderResult.OVERFLOW; 197 } finally { 198 src.position(sp - soff); 199 dst.position(dp - doff); 200 } 201 } 202 decodeBufferLoop(ByteBuffer src, CharBuffer dst)203 protected CoderResult decodeBufferLoop(ByteBuffer src, CharBuffer dst) { 204 int mark = src.position(); 205 try { 206 207 while (src.hasRemaining() && dst.hasRemaining()) { 208 int b1 = src.get() & 0xff; 209 char c = b2cSB[b1]; 210 int inSize = 1; 211 if (c == UNMAPPABLE_DECODING) { 212 if (src.remaining() < 1) 213 return crMalformedOrUnderFlow(b1); 214 int b2 = src.get() & 0xff; 215 if (b2 < b2Min || b2 > b2Max || 216 (c = b2c[b1][b2 - b2Min]) == UNMAPPABLE_DECODING) 217 return crMalformedOrUnmappable(b1, b2); 218 inSize++; 219 } 220 dst.put(c); 221 mark += inSize; 222 } 223 return src.hasRemaining()? CoderResult.OVERFLOW 224 : CoderResult.UNDERFLOW; 225 } finally { 226 src.position(mark); 227 } 228 } 229 230 // Make some protected methods public for use by JISAutoDetect decodeLoop(ByteBuffer src, CharBuffer dst)231 public CoderResult decodeLoop(ByteBuffer src, CharBuffer dst) { 232 if (src.hasArray() && dst.hasArray()) 233 return decodeArrayLoop(src, dst); 234 else 235 return decodeBufferLoop(src, dst); 236 } 237 238 @Override decode(byte[] src, int sp, int len, char[] dst)239 public int decode(byte[] src, int sp, int len, char[] dst) { 240 int dp = 0; 241 int sl = sp + len; 242 char repl = replacement().charAt(0); 243 while (sp < sl) { 244 int b1 = src[sp++] & 0xff; 245 char c = b2cSB[b1]; 246 if (c == UNMAPPABLE_DECODING) { 247 if (sp < sl) { 248 int b2 = src[sp++] & 0xff; 249 if (b2 < b2Min || b2 > b2Max || 250 (c = b2c[b1][b2 - b2Min]) == UNMAPPABLE_DECODING) { 251 if (crMalformedOrUnmappable(b1, b2).length() == 1) { 252 sp--; 253 } 254 } 255 } 256 if (c == UNMAPPABLE_DECODING) { 257 c = repl; 258 } 259 } 260 dst[dp++] = c; 261 } 262 return dp; 263 } 264 265 @Override isASCIICompatible()266 public boolean isASCIICompatible() { 267 return isASCIICompatible; 268 } 269 implReset()270 public void implReset() { 271 super.implReset(); 272 } 273 implFlush(CharBuffer out)274 public CoderResult implFlush(CharBuffer out) { 275 return super.implFlush(out); 276 } 277 278 // decode loops are not using decodeSingle/Double() for performance 279 // reason. decodeSingle(int b)280 public char decodeSingle(int b) { 281 return b2cSB[b]; 282 } 283 decodeDouble(int b1, int b2)284 public char decodeDouble(int b1, int b2) { 285 if (b1 < 0 || b1 > b2c.length || 286 b2 < b2Min || b2 > b2Max) 287 return UNMAPPABLE_DECODING; 288 return b2c[b1][b2 - b2Min]; 289 } 290 } 291 292 // IBM_EBCDIC_DBCS 293 public static class Decoder_EBCDIC extends Decoder { 294 private static final int SBCS = 0; 295 private static final int DBCS = 1; 296 private static final int SO = 0x0e; 297 private static final int SI = 0x0f; 298 private int currentState; 299 Decoder_EBCDIC(Charset cs, char[][] b2c, char[] b2cSB, int b2Min, int b2Max, boolean isASCIICompatible)300 public Decoder_EBCDIC(Charset cs, 301 char[][] b2c, char[] b2cSB, int b2Min, int b2Max, 302 boolean isASCIICompatible) { 303 super(cs, b2c, b2cSB, b2Min, b2Max, isASCIICompatible); 304 } 305 Decoder_EBCDIC(Charset cs, char[][] b2c, char[] b2cSB, int b2Min, int b2Max)306 public Decoder_EBCDIC(Charset cs, 307 char[][] b2c, char[] b2cSB, int b2Min, int b2Max) { 308 super(cs, b2c, b2cSB, b2Min, b2Max, false); 309 } 310 implReset()311 public void implReset() { 312 currentState = SBCS; 313 } 314 315 // Check validity of dbcs ebcdic byte pair values 316 // 317 // First byte : 0x41 -- 0xFE 318 // Second byte: 0x41 -- 0xFE 319 // Doublebyte blank: 0x4040 320 // 321 // The validation implementation in "old" DBCS_IBM_EBCDIC and sun.io 322 // as 323 // if ((b1 != 0x40 || b2 != 0x40) && 324 // (b2 < 0x41 || b2 > 0xfe)) {...} 325 // is not correct/complete (range check for b1) 326 // isDoubleByte(int b1, int b2)327 private static boolean isDoubleByte(int b1, int b2) { 328 return (0x41 <= b1 && b1 <= 0xfe && 0x41 <= b2 && b2 <= 0xfe) 329 || (b1 == 0x40 && b2 == 0x40); // DBCS-HOST SPACE 330 } 331 decodeArrayLoop(ByteBuffer src, CharBuffer dst)332 protected CoderResult decodeArrayLoop(ByteBuffer src, CharBuffer dst) { 333 byte[] sa = src.array(); 334 int sp = src.arrayOffset() + src.position(); 335 int sl = src.arrayOffset() + src.limit(); 336 char[] da = dst.array(); 337 int dp = dst.arrayOffset() + dst.position(); 338 int dl = dst.arrayOffset() + dst.limit(); 339 340 try { 341 // don't check dp/dl together here, it's possible to 342 // decdoe a SO/SI without space in output buffer. 343 while (sp < sl) { 344 int b1 = sa[sp] & 0xff; 345 int inSize = 1; 346 if (b1 == SO) { // Shift out 347 if (currentState != SBCS) 348 return CoderResult.malformedForLength(1); 349 else 350 currentState = DBCS; 351 } else if (b1 == SI) { 352 if (currentState != DBCS) 353 return CoderResult.malformedForLength(1); 354 else 355 currentState = SBCS; 356 } else { 357 char c; 358 if (currentState == SBCS) { 359 c = b2cSB[b1]; 360 if (c == UNMAPPABLE_DECODING) 361 return CoderResult.unmappableForLength(1); 362 } else { 363 if (sl - sp < 2) 364 return CoderResult.UNDERFLOW; 365 int b2 = sa[sp + 1] & 0xff; 366 if (b2 < b2Min || b2 > b2Max || 367 (c = b2c[b1][b2 - b2Min]) == UNMAPPABLE_DECODING) { 368 if (!isDoubleByte(b1, b2)) 369 return CoderResult.malformedForLength(2); 370 return CoderResult.unmappableForLength(2); 371 } 372 inSize++; 373 } 374 if (dl - dp < 1) 375 return CoderResult.OVERFLOW; 376 377 da[dp++] = c; 378 } 379 sp += inSize; 380 } 381 return CoderResult.UNDERFLOW; 382 } finally { 383 src.position(sp - src.arrayOffset()); 384 dst.position(dp - dst.arrayOffset()); 385 } 386 } 387 decodeBufferLoop(ByteBuffer src, CharBuffer dst)388 protected CoderResult decodeBufferLoop(ByteBuffer src, CharBuffer dst) { 389 int mark = src.position(); 390 try { 391 while (src.hasRemaining()) { 392 int b1 = src.get() & 0xff; 393 int inSize = 1; 394 if (b1 == SO) { // Shift out 395 if (currentState != SBCS) 396 return CoderResult.malformedForLength(1); 397 else 398 currentState = DBCS; 399 } else if (b1 == SI) { 400 if (currentState != DBCS) 401 return CoderResult.malformedForLength(1); 402 else 403 currentState = SBCS; 404 } else { 405 char c = UNMAPPABLE_DECODING; 406 if (currentState == SBCS) { 407 c = b2cSB[b1]; 408 if (c == UNMAPPABLE_DECODING) 409 return CoderResult.unmappableForLength(1); 410 } else { 411 if (src.remaining() < 1) 412 return CoderResult.UNDERFLOW; 413 int b2 = src.get()&0xff; 414 if (b2 < b2Min || b2 > b2Max || 415 (c = b2c[b1][b2 - b2Min]) == UNMAPPABLE_DECODING) { 416 if (!isDoubleByte(b1, b2)) 417 return CoderResult.malformedForLength(2); 418 return CoderResult.unmappableForLength(2); 419 } 420 inSize++; 421 } 422 423 if (dst.remaining() < 1) 424 return CoderResult.OVERFLOW; 425 426 dst.put(c); 427 } 428 mark += inSize; 429 } 430 return CoderResult.UNDERFLOW; 431 } finally { 432 src.position(mark); 433 } 434 } 435 436 @Override decode(byte[] src, int sp, int len, char[] dst)437 public int decode(byte[] src, int sp, int len, char[] dst) { 438 int dp = 0; 439 int sl = sp + len; 440 currentState = SBCS; 441 char repl = replacement().charAt(0); 442 while (sp < sl) { 443 int b1 = src[sp++] & 0xff; 444 if (b1 == SO) { // Shift out 445 if (currentState != SBCS) 446 dst[dp++] = repl; 447 else 448 currentState = DBCS; 449 } else if (b1 == SI) { 450 if (currentState != DBCS) 451 dst[dp++] = repl; 452 else 453 currentState = SBCS; 454 } else { 455 char c = UNMAPPABLE_DECODING; 456 if (currentState == SBCS) { 457 c = b2cSB[b1]; 458 if (c == UNMAPPABLE_DECODING) 459 c = repl; 460 } else { 461 if (sl == sp) { 462 c = repl; 463 } else { 464 int b2 = src[sp++] & 0xff; 465 if (b2 < b2Min || b2 > b2Max || 466 (c = b2c[b1][b2 - b2Min]) == UNMAPPABLE_DECODING) { 467 c = repl; 468 } 469 } 470 } 471 dst[dp++] = c; 472 } 473 } 474 return dp; 475 } 476 } 477 478 // DBCS_ONLY 479 public static class Decoder_DBCSONLY extends Decoder { 480 static final char[] b2cSB_UNMAPPABLE; 481 static { 482 b2cSB_UNMAPPABLE = new char[0x100]; Arrays.fill(b2cSB_UNMAPPABLE, UNMAPPABLE_DECODING)483 Arrays.fill(b2cSB_UNMAPPABLE, UNMAPPABLE_DECODING); 484 } 485 486 // always returns unmappableForLenth(2) for doublebyte_only 487 @Override crMalformedOrUnmappable(int b1, int b2)488 protected CoderResult crMalformedOrUnmappable(int b1, int b2) { 489 return CoderResult.unmappableForLength(2); 490 } 491 Decoder_DBCSONLY(Charset cs, char[][] b2c, char[] b2cSB, int b2Min, int b2Max, boolean isASCIICompatible)492 public Decoder_DBCSONLY(Charset cs, char[][] b2c, char[] b2cSB, int b2Min, int b2Max, 493 boolean isASCIICompatible) { 494 super(cs, 0.5f, 1.0f, b2c, b2cSB_UNMAPPABLE, b2Min, b2Max, isASCIICompatible); 495 } 496 Decoder_DBCSONLY(Charset cs, char[][] b2c, char[] b2cSB, int b2Min, int b2Max)497 public Decoder_DBCSONLY(Charset cs, char[][] b2c, char[] b2cSB, int b2Min, int b2Max) { 498 super(cs, 0.5f, 1.0f, b2c, b2cSB_UNMAPPABLE, b2Min, b2Max, false); 499 } 500 } 501 502 // EUC_SIMPLE 503 // The only thing we need to "override" is to check SS2/SS3 and 504 // return "malformed" if found 505 public static class Decoder_EUC_SIM extends Decoder { 506 private final int SS2 = 0x8E; 507 private final int SS3 = 0x8F; 508 Decoder_EUC_SIM(Charset cs, char[][] b2c, char[] b2cSB, int b2Min, int b2Max, boolean isASCIICompatible)509 public Decoder_EUC_SIM(Charset cs, 510 char[][] b2c, char[] b2cSB, int b2Min, int b2Max, 511 boolean isASCIICompatible) { 512 super(cs, b2c, b2cSB, b2Min, b2Max, isASCIICompatible); 513 } 514 515 // No support provided for G2/G3 for SimpleEUC crMalformedOrUnderFlow(int b)516 protected CoderResult crMalformedOrUnderFlow(int b) { 517 if (b == SS2 || b == SS3 ) 518 return CoderResult.malformedForLength(1); 519 return CoderResult.UNDERFLOW; 520 } 521 crMalformedOrUnmappable(int b1, int b2)522 protected CoderResult crMalformedOrUnmappable(int b1, int b2) { 523 if (b1 == SS2 || b1 == SS3 ) 524 return CoderResult.malformedForLength(1); 525 return CoderResult.unmappableForLength(2); 526 } 527 528 @Override decode(byte[] src, int sp, int len, char[] dst)529 public int decode(byte[] src, int sp, int len, char[] dst) { 530 int dp = 0; 531 int sl = sp + len; 532 char repl = replacement().charAt(0); 533 while (sp < sl) { 534 int b1 = src[sp++] & 0xff; 535 char c = b2cSB[b1]; 536 if (c == UNMAPPABLE_DECODING) { 537 if (sp < sl) { 538 int b2 = src[sp++] & 0xff; 539 if (b2 < b2Min || b2 > b2Max || 540 (c = b2c[b1][b2 - b2Min]) == UNMAPPABLE_DECODING) { 541 if (b1 == SS2 || b1 == SS3) { 542 sp--; 543 } 544 c = repl; 545 } 546 } else { 547 c = repl; 548 } 549 } 550 dst[dp++] = c; 551 } 552 return dp; 553 } 554 } 555 556 public static class Encoder extends CharsetEncoder 557 implements ArrayEncoder 558 { 559 protected final int MAX_SINGLEBYTE = 0xff; 560 private final char[] c2b; 561 private final char[] c2bIndex; 562 protected Surrogate.Parser sgp; 563 final boolean isASCIICompatible; 564 Encoder(Charset cs, char[] c2b, char[] c2bIndex)565 public Encoder(Charset cs, char[] c2b, char[] c2bIndex) { 566 this(cs, c2b, c2bIndex, false); 567 } 568 Encoder(Charset cs, char[] c2b, char[] c2bIndex, boolean isASCIICompatible)569 public Encoder(Charset cs, char[] c2b, char[] c2bIndex, boolean isASCIICompatible) { 570 super(cs, 2.0f, 2.0f); 571 this.c2b = c2b; 572 this.c2bIndex = c2bIndex; 573 this.isASCIICompatible = isASCIICompatible; 574 } 575 Encoder(Charset cs, float avg, float max, byte[] repl, char[] c2b, char[] c2bIndex, boolean isASCIICompatible)576 public Encoder(Charset cs, float avg, float max, byte[] repl, char[] c2b, char[] c2bIndex, 577 boolean isASCIICompatible) { 578 super(cs, avg, max, repl); 579 this.c2b = c2b; 580 this.c2bIndex = c2bIndex; 581 this.isASCIICompatible = isASCIICompatible; 582 } 583 canEncode(char c)584 public boolean canEncode(char c) { 585 return encodeChar(c) != UNMAPPABLE_ENCODING; 586 } 587 sgp()588 protected Surrogate.Parser sgp() { 589 if (sgp == null) 590 sgp = new Surrogate.Parser(); 591 return sgp; 592 } 593 encodeArrayLoop(CharBuffer src, ByteBuffer dst)594 protected CoderResult encodeArrayLoop(CharBuffer src, ByteBuffer dst) { 595 char[] sa = src.array(); 596 int sp = src.arrayOffset() + src.position(); 597 int sl = src.arrayOffset() + src.limit(); 598 599 byte[] da = dst.array(); 600 int dp = dst.arrayOffset() + dst.position(); 601 int dl = dst.arrayOffset() + dst.limit(); 602 603 try { 604 while (sp < sl) { 605 char c = sa[sp]; 606 int bb = encodeChar(c); 607 if (bb == UNMAPPABLE_ENCODING) { 608 if (Character.isSurrogate(c)) { 609 if (sgp().parse(c, sa, sp, sl) < 0) 610 return sgp.error(); 611 return sgp.unmappableResult(); 612 } 613 return CoderResult.unmappableForLength(1); 614 } 615 616 if (bb > MAX_SINGLEBYTE) { // DoubleByte 617 if (dl - dp < 2) 618 return CoderResult.OVERFLOW; 619 da[dp++] = (byte)(bb >> 8); 620 da[dp++] = (byte)bb; 621 } else { // SingleByte 622 if (dl - dp < 1) 623 return CoderResult.OVERFLOW; 624 da[dp++] = (byte)bb; 625 } 626 627 sp++; 628 } 629 return CoderResult.UNDERFLOW; 630 } finally { 631 src.position(sp - src.arrayOffset()); 632 dst.position(dp - dst.arrayOffset()); 633 } 634 } 635 encodeBufferLoop(CharBuffer src, ByteBuffer dst)636 protected CoderResult encodeBufferLoop(CharBuffer src, ByteBuffer dst) { 637 int mark = src.position(); 638 try { 639 while (src.hasRemaining()) { 640 char c = src.get(); 641 int bb = encodeChar(c); 642 if (bb == UNMAPPABLE_ENCODING) { 643 if (Character.isSurrogate(c)) { 644 if (sgp().parse(c, src) < 0) 645 return sgp.error(); 646 return sgp.unmappableResult(); 647 } 648 return CoderResult.unmappableForLength(1); 649 } 650 if (bb > MAX_SINGLEBYTE) { // DoubleByte 651 if (dst.remaining() < 2) 652 return CoderResult.OVERFLOW; 653 dst.put((byte)(bb >> 8)); 654 dst.put((byte)(bb)); 655 } else { 656 if (dst.remaining() < 1) 657 return CoderResult.OVERFLOW; 658 dst.put((byte)bb); 659 } 660 mark++; 661 } 662 return CoderResult.UNDERFLOW; 663 } finally { 664 src.position(mark); 665 } 666 } 667 encodeLoop(CharBuffer src, ByteBuffer dst)668 protected CoderResult encodeLoop(CharBuffer src, ByteBuffer dst) { 669 if (src.hasArray() && dst.hasArray()) 670 return encodeArrayLoop(src, dst); 671 else 672 return encodeBufferLoop(src, dst); 673 } 674 675 protected byte[] repl = replacement(); implReplaceWith(byte[] newReplacement)676 protected void implReplaceWith(byte[] newReplacement) { 677 repl = newReplacement; 678 } 679 680 @Override encode(char[] src, int sp, int len, byte[] dst)681 public int encode(char[] src, int sp, int len, byte[] dst) { 682 int dp = 0; 683 int sl = sp + len; 684 int dl = dst.length; 685 while (sp < sl) { 686 char c = src[sp++]; 687 int bb = encodeChar(c); 688 if (bb == UNMAPPABLE_ENCODING) { 689 if (Character.isHighSurrogate(c) && sp < sl && 690 Character.isLowSurrogate(src[sp])) { 691 sp++; 692 } 693 dst[dp++] = repl[0]; 694 if (repl.length > 1) 695 dst[dp++] = repl[1]; 696 continue; 697 } //else 698 if (bb > MAX_SINGLEBYTE) { // DoubleByte 699 dst[dp++] = (byte)(bb >> 8); 700 dst[dp++] = (byte)bb; 701 } else { // SingleByte 702 dst[dp++] = (byte)bb; 703 } 704 } 705 return dp; 706 } 707 708 @Override encodeFromLatin1(byte[] src, int sp, int len, byte[] dst)709 public int encodeFromLatin1(byte[] src, int sp, int len, byte[] dst) { 710 int dp = 0; 711 int sl = sp + len; 712 while (sp < sl) { 713 char c = (char)(src[sp++] & 0xff); 714 int bb = encodeChar(c); 715 if (bb == UNMAPPABLE_ENCODING) { 716 // no surrogate pair in latin1 string 717 dst[dp++] = repl[0]; 718 if (repl.length > 1) { 719 dst[dp++] = repl[1]; 720 } 721 continue; 722 } //else 723 if (bb > MAX_SINGLEBYTE) { // DoubleByte 724 dst[dp++] = (byte)(bb >> 8); 725 dst[dp++] = (byte)bb; 726 } else { // SingleByte 727 dst[dp++] = (byte)bb; 728 } 729 730 } 731 return dp; 732 } 733 734 @Override encodeFromUTF16(byte[] src, int sp, int len, byte[] dst)735 public int encodeFromUTF16(byte[] src, int sp, int len, byte[] dst) { 736 int dp = 0; 737 int sl = sp + len; 738 while (sp < sl) { 739 char c = StringUTF16.getChar(src, sp++); 740 int bb = encodeChar(c); 741 if (bb == UNMAPPABLE_ENCODING) { 742 if (Character.isHighSurrogate(c) && sp < sl && 743 Character.isLowSurrogate(StringUTF16.getChar(src, sp))) { 744 sp++; 745 } 746 dst[dp++] = repl[0]; 747 if (repl.length > 1) { 748 dst[dp++] = repl[1]; 749 } 750 continue; 751 } //else 752 if (bb > MAX_SINGLEBYTE) { // DoubleByte 753 dst[dp++] = (byte)(bb >> 8); 754 dst[dp++] = (byte)bb; 755 } else { // SingleByte 756 dst[dp++] = (byte)bb; 757 } 758 } 759 return dp; 760 } 761 762 @Override isASCIICompatible()763 public boolean isASCIICompatible() { 764 return isASCIICompatible; 765 } 766 encodeChar(char ch)767 public int encodeChar(char ch) { 768 return c2b[c2bIndex[ch >> 8] + (ch & 0xff)]; 769 } 770 771 // init the c2b and c2bIndex tables from b2c. initC2B(String[] b2c, String b2cSB, String b2cNR, String c2bNR, int b2Min, int b2Max, char[] c2b, char[] c2bIndex)772 public static void initC2B(String[] b2c, String b2cSB, String b2cNR, String c2bNR, 773 int b2Min, int b2Max, 774 char[] c2b, char[] c2bIndex) 775 { 776 Arrays.fill(c2b, (char)UNMAPPABLE_ENCODING); 777 int off = 0x100; 778 779 char[][] b2c_ca = new char[b2c.length][]; 780 char[] b2cSB_ca = null; 781 if (b2cSB != null) 782 b2cSB_ca = b2cSB.toCharArray(); 783 784 for (int i = 0; i < b2c.length; i++) { 785 if (b2c[i] == null) 786 continue; 787 b2c_ca[i] = b2c[i].toCharArray(); 788 } 789 790 if (b2cNR != null) { 791 int j = 0; 792 while (j < b2cNR.length()) { 793 char b = b2cNR.charAt(j++); 794 char c = b2cNR.charAt(j++); 795 if (b < 0x100 && b2cSB_ca != null) { 796 if (b2cSB_ca[b] == c) 797 b2cSB_ca[b] = UNMAPPABLE_DECODING; 798 } else { 799 if (b2c_ca[b >> 8][(b & 0xff) - b2Min] == c) 800 b2c_ca[b >> 8][(b & 0xff) - b2Min] = UNMAPPABLE_DECODING; 801 } 802 } 803 } 804 805 if (b2cSB_ca != null) { // SingleByte 806 for (int b = 0; b < b2cSB_ca.length; b++) { 807 char c = b2cSB_ca[b]; 808 if (c == UNMAPPABLE_DECODING) 809 continue; 810 int index = c2bIndex[c >> 8]; 811 if (index == 0) { 812 index = off; 813 off += 0x100; 814 c2bIndex[c >> 8] = (char)index; 815 } 816 c2b[index + (c & 0xff)] = (char)b; 817 } 818 } 819 820 for (int b1 = 0; b1 < b2c.length; b1++) { // DoubleByte 821 char[] db = b2c_ca[b1]; 822 if (db == null) 823 continue; 824 for (int b2 = b2Min; b2 <= b2Max; b2++) { 825 char c = db[b2 - b2Min]; 826 if (c == UNMAPPABLE_DECODING) 827 continue; 828 int index = c2bIndex[c >> 8]; 829 if (index == 0) { 830 index = off; 831 off += 0x100; 832 c2bIndex[c >> 8] = (char)index; 833 } 834 c2b[index + (c & 0xff)] = (char)((b1 << 8) | b2); 835 } 836 } 837 838 if (c2bNR != null) { 839 // add c->b only nr entries 840 for (int i = 0; i < c2bNR.length(); i += 2) { 841 char b = c2bNR.charAt(i); 842 char c = c2bNR.charAt(i + 1); 843 int index = (c >> 8); 844 if (c2bIndex[index] == 0) { 845 c2bIndex[index] = (char)off; 846 off += 0x100; 847 } 848 index = c2bIndex[index] + (c & 0xff); 849 c2b[index] = b; 850 } 851 } 852 } 853 } 854 855 public static class Encoder_DBCSONLY extends Encoder { 856 Encoder_DBCSONLY(Charset cs, byte[] repl, char[] c2b, char[] c2bIndex, boolean isASCIICompatible)857 public Encoder_DBCSONLY(Charset cs, byte[] repl, 858 char[] c2b, char[] c2bIndex, 859 boolean isASCIICompatible) { 860 super(cs, 2.0f, 2.0f, repl, c2b, c2bIndex, isASCIICompatible); 861 } 862 encodeChar(char ch)863 public int encodeChar(char ch) { 864 int bb = super.encodeChar(ch); 865 if (bb <= MAX_SINGLEBYTE) 866 return UNMAPPABLE_ENCODING; 867 return bb; 868 } 869 } 870 871 public static class Encoder_EBCDIC extends Encoder { 872 static final int SBCS = 0; 873 static final int DBCS = 1; 874 static final byte SO = 0x0e; 875 static final byte SI = 0x0f; 876 877 protected int currentState = SBCS; 878 Encoder_EBCDIC(Charset cs, char[] c2b, char[] c2bIndex, boolean isASCIICompatible)879 public Encoder_EBCDIC(Charset cs, char[] c2b, char[] c2bIndex, 880 boolean isASCIICompatible) { 881 super(cs, 4.0f, 5.0f, new byte[] {(byte)0x6f}, c2b, c2bIndex, isASCIICompatible); 882 } 883 implReset()884 protected void implReset() { 885 currentState = SBCS; 886 } 887 implFlush(ByteBuffer out)888 protected CoderResult implFlush(ByteBuffer out) { 889 if (currentState == DBCS) { 890 if (out.remaining() < 1) 891 return CoderResult.OVERFLOW; 892 out.put(SI); 893 } 894 implReset(); 895 return CoderResult.UNDERFLOW; 896 } 897 encodeArrayLoop(CharBuffer src, ByteBuffer dst)898 protected CoderResult encodeArrayLoop(CharBuffer src, ByteBuffer dst) { 899 char[] sa = src.array(); 900 int sp = src.arrayOffset() + src.position(); 901 int sl = src.arrayOffset() + src.limit(); 902 byte[] da = dst.array(); 903 int dp = dst.arrayOffset() + dst.position(); 904 int dl = dst.arrayOffset() + dst.limit(); 905 906 try { 907 while (sp < sl) { 908 char c = sa[sp]; 909 int bb = encodeChar(c); 910 if (bb == UNMAPPABLE_ENCODING) { 911 if (Character.isSurrogate(c)) { 912 if (sgp().parse(c, sa, sp, sl) < 0) 913 return sgp.error(); 914 return sgp.unmappableResult(); 915 } 916 return CoderResult.unmappableForLength(1); 917 } 918 if (bb > MAX_SINGLEBYTE) { // DoubleByte 919 if (currentState == SBCS) { 920 if (dl - dp < 1) 921 return CoderResult.OVERFLOW; 922 currentState = DBCS; 923 da[dp++] = SO; 924 } 925 if (dl - dp < 2) 926 return CoderResult.OVERFLOW; 927 da[dp++] = (byte)(bb >> 8); 928 da[dp++] = (byte)bb; 929 } else { // SingleByte 930 if (currentState == DBCS) { 931 if (dl - dp < 1) 932 return CoderResult.OVERFLOW; 933 currentState = SBCS; 934 da[dp++] = SI; 935 } 936 if (dl - dp < 1) 937 return CoderResult.OVERFLOW; 938 da[dp++] = (byte)bb; 939 940 } 941 sp++; 942 } 943 return CoderResult.UNDERFLOW; 944 } finally { 945 src.position(sp - src.arrayOffset()); 946 dst.position(dp - dst.arrayOffset()); 947 } 948 } 949 encodeBufferLoop(CharBuffer src, ByteBuffer dst)950 protected CoderResult encodeBufferLoop(CharBuffer src, ByteBuffer dst) { 951 int mark = src.position(); 952 try { 953 while (src.hasRemaining()) { 954 char c = src.get(); 955 int bb = encodeChar(c); 956 if (bb == UNMAPPABLE_ENCODING) { 957 if (Character.isSurrogate(c)) { 958 if (sgp().parse(c, src) < 0) 959 return sgp.error(); 960 return sgp.unmappableResult(); 961 } 962 return CoderResult.unmappableForLength(1); 963 } 964 if (bb > MAX_SINGLEBYTE) { // DoubleByte 965 if (currentState == SBCS) { 966 if (dst.remaining() < 1) 967 return CoderResult.OVERFLOW; 968 currentState = DBCS; 969 dst.put(SO); 970 } 971 if (dst.remaining() < 2) 972 return CoderResult.OVERFLOW; 973 dst.put((byte)(bb >> 8)); 974 dst.put((byte)(bb)); 975 } else { // Single-byte 976 if (currentState == DBCS) { 977 if (dst.remaining() < 1) 978 return CoderResult.OVERFLOW; 979 currentState = SBCS; 980 dst.put(SI); 981 } 982 if (dst.remaining() < 1) 983 return CoderResult.OVERFLOW; 984 dst.put((byte)bb); 985 } 986 mark++; 987 } 988 return CoderResult.UNDERFLOW; 989 } finally { 990 src.position(mark); 991 } 992 } 993 994 @Override encode(char[] src, int sp, int len, byte[] dst)995 public int encode(char[] src, int sp, int len, byte[] dst) { 996 int dp = 0; 997 int sl = sp + len; 998 while (sp < sl) { 999 char c = src[sp++]; 1000 int bb = encodeChar(c); 1001 1002 if (bb == UNMAPPABLE_ENCODING) { 1003 if (Character.isHighSurrogate(c) && sp < sl && 1004 Character.isLowSurrogate(src[sp])) { 1005 sp++; 1006 } 1007 dst[dp++] = repl[0]; 1008 if (repl.length > 1) 1009 dst[dp++] = repl[1]; 1010 continue; 1011 } //else 1012 if (bb > MAX_SINGLEBYTE) { // DoubleByte 1013 if (currentState == SBCS) { 1014 currentState = DBCS; 1015 dst[dp++] = SO; 1016 } 1017 dst[dp++] = (byte)(bb >> 8); 1018 dst[dp++] = (byte)bb; 1019 } else { // SingleByte 1020 if (currentState == DBCS) { 1021 currentState = SBCS; 1022 dst[dp++] = SI; 1023 } 1024 dst[dp++] = (byte)bb; 1025 } 1026 } 1027 1028 if (currentState == DBCS) { 1029 currentState = SBCS; 1030 dst[dp++] = SI; 1031 } 1032 return dp; 1033 } 1034 1035 @Override encodeFromLatin1(byte[] src, int sp, int len, byte[] dst)1036 public int encodeFromLatin1(byte[] src, int sp, int len, byte[] dst) { 1037 int dp = 0; 1038 int sl = sp + len; 1039 while (sp < sl) { 1040 char c = (char)(src[sp++] & 0xff); 1041 int bb = encodeChar(c); 1042 if (bb == UNMAPPABLE_ENCODING) { 1043 // no surrogate pair in latin1 string 1044 dst[dp++] = repl[0]; 1045 if (repl.length > 1) 1046 dst[dp++] = repl[1]; 1047 continue; 1048 } //else 1049 if (bb > MAX_SINGLEBYTE) { // DoubleByte 1050 if (currentState == SBCS) { 1051 currentState = DBCS; 1052 dst[dp++] = SO; 1053 } 1054 dst[dp++] = (byte)(bb >> 8); 1055 dst[dp++] = (byte)bb; 1056 } else { // SingleByte 1057 if (currentState == DBCS) { 1058 currentState = SBCS; 1059 dst[dp++] = SI; 1060 } 1061 dst[dp++] = (byte)bb; 1062 } 1063 } 1064 if (currentState == DBCS) { 1065 currentState = SBCS; 1066 dst[dp++] = SI; 1067 } 1068 return dp; 1069 } 1070 1071 @Override encodeFromUTF16(byte[] src, int sp, int len, byte[] dst)1072 public int encodeFromUTF16(byte[] src, int sp, int len, byte[] dst) { 1073 int dp = 0; 1074 int sl = sp + len; 1075 while (sp < sl) { 1076 char c = StringUTF16.getChar(src, sp++); 1077 int bb = encodeChar(c); 1078 if (bb == UNMAPPABLE_ENCODING) { 1079 if (Character.isHighSurrogate(c) && sp < sl && 1080 Character.isLowSurrogate(StringUTF16.getChar(src, sp))) { 1081 sp++; 1082 } 1083 dst[dp++] = repl[0]; 1084 if (repl.length > 1) 1085 dst[dp++] = repl[1]; 1086 continue; 1087 } //else 1088 if (bb > MAX_SINGLEBYTE) { // DoubleByte 1089 if (currentState == SBCS) { 1090 currentState = DBCS; 1091 dst[dp++] = SO; 1092 } 1093 dst[dp++] = (byte)(bb >> 8); 1094 dst[dp++] = (byte)bb; 1095 } else { // SingleByte 1096 if (currentState == DBCS) { 1097 currentState = SBCS; 1098 dst[dp++] = SI; 1099 } 1100 dst[dp++] = (byte)bb; 1101 } 1102 } 1103 if (currentState == DBCS) { 1104 currentState = SBCS; 1105 dst[dp++] = SI; 1106 } 1107 return dp; 1108 } 1109 } 1110 1111 // EUC_SIMPLE 1112 public static class Encoder_EUC_SIM extends Encoder { Encoder_EUC_SIM(Charset cs, char[] c2b, char[] c2bIndex, boolean isASCIICompatible)1113 public Encoder_EUC_SIM(Charset cs, char[] c2b, char[] c2bIndex, 1114 boolean isASCIICompatible) { 1115 super(cs, c2b, c2bIndex, isASCIICompatible); 1116 } 1117 } 1118 1119 } 1120