1 // 2 // I18N.CJK.CP949 3 // 4 // Author: 5 // Hye-Shik Chang (perky@FreeBSD.org) 6 // Atsushi Enomoto <atsushi@ximian.com> 7 // 8 9 using System; 10 using System.Text; 11 using I18N.Common; 12 13 namespace I18N.CJK 14 { 15 [Serializable] 16 internal class CP949 : KoreanEncoding 17 { 18 // Magic number used by Windows for the UHC code page. 19 private const int UHC_CODE_PAGE = 949; 20 21 // Constructor. CP949()22 public CP949 () : base (UHC_CODE_PAGE, true) 23 { 24 } 25 26 // Get the mail body name for this encoding. 27 public override String BodyName 28 { 29 get { return "ks_c_5601-1987"; } 30 } 31 32 // Get the human-readable name for this encoding. 33 public override String EncodingName 34 { 35 get { return "Korean (UHC)"; } 36 } 37 38 // Get the mail agent header name for this encoding. 39 public override String HeaderName 40 { 41 get { return "ks_c_5601-1987"; } 42 } 43 44 // Get the IANA-preferred Web name for this encoding. 45 public override String WebName 46 { 47 get { return "ks_c_5601-1987"; } 48 } 49 50 /* 51 // Get the Windows code page represented by this object. 52 public override int WindowsCodePage 53 { 54 get { return UHC_PAGE; } 55 } 56 */ 57 } 58 59 [Serializable] 60 internal class CP51949 : KoreanEncoding 61 { 62 // Magic number used by Windows for the euc-kr code page. 63 private const int EUCKR_CODE_PAGE = 51949; 64 65 // Constructor. CP51949()66 public CP51949 () : base (EUCKR_CODE_PAGE, false) 67 { 68 } 69 70 // Get the mail body name for this encoding. 71 public override String BodyName 72 { 73 get { return "euc-kr"; } 74 } 75 76 // Get the human-readable name for this encoding. 77 public override String EncodingName 78 { 79 get { return "Korean (EUC)"; } 80 } 81 82 // Get the mail agent header name for this encoding. 83 public override String HeaderName 84 { 85 get { return "euc-kr"; } 86 } 87 88 // Get the IANA-preferred Web name for this encoding. 89 public override String WebName 90 { 91 get { return "euc-kr"; } 92 } 93 94 /* 95 // Get the Windows code page represented by this object. 96 public override int WindowsCodePage 97 { 98 get { return UHC_PAGE; } 99 } 100 */ 101 102 } 103 104 [Serializable] 105 internal class KoreanEncoding : DbcsEncoding 106 { 107 // Constructor. KoreanEncoding(int codepage, bool useUHC)108 public KoreanEncoding (int codepage, bool useUHC) 109 : base (codepage, 949) { 110 this.useUHC = useUHC; 111 } 112 GetConvert()113 internal override DbcsConvert GetConvert () 114 { 115 return DbcsConvert.KS; 116 } 117 118 bool useUHC; 119 120 #if !DISABLE_UNSAFE 121 // Get the bytes that result from encoding a character buffer. GetByteCountImpl(char* chars, int count)122 public unsafe override int GetByteCountImpl (char* chars, int count) 123 { 124 int index = 0; 125 int length = 0; 126 int end = count; 127 DbcsConvert convert = GetConvert (); 128 129 // 00 00 - FF FF 130 for (int i = 0; i < end; i++, charCount--) { 131 char c = chars[i]; 132 if (c <= 0x80 || c == 0xFF) { // ASCII 133 length++; 134 continue; 135 } 136 byte b1 = convert.u2n[((int)c) * 2]; 137 byte b2 = convert.u2n[((int)c) * 2 + 1]; 138 if (b1 == 0 && b2 == 0) { 139 // FIXME: handle fallback for GetByteCountImpl(). 140 length++; 141 } 142 else 143 length += 2; 144 } 145 return length; 146 } 147 148 // Get the bytes that result from encoding a character buffer. GetBytesImpl(char* chars, int charCount, byte* bytes, int byteCount)149 public unsafe override int GetBytesImpl (char* chars, int charCount, 150 byte* bytes, int byteCount) 151 { 152 int charIndex = 0; 153 int byteIndex = 0; 154 int end = charCount; 155 DbcsConvert convert = GetConvert (); 156 EncoderFallbackBuffer buffer = null; 157 158 // 00 00 - FF FF 159 int origIndex = byteIndex; 160 for (int = charIndex; i < end; i++, charCount--) { 161 char c = chars[i]; 162 if (c <= 0x80 || c == 0xFF) { // ASCII 163 bytes[byteIndex++] = (byte)c; 164 continue; 165 } 166 byte b1 = convert.u2n[((int)c) * 2]; 167 byte b2 = convert.u2n[((int)c) * 2 + 1]; 168 if (b1 == 0 && b2 == 0) { 169 HandleFallback (ref buffer, chars, ref i, ref charCount, 170 bytes, ref byteIndex, ref byteCount, null); 171 } else { 172 bytes[byteIndex++] = b1; 173 bytes[byteIndex++] = b2; 174 } 175 } 176 return byteIndex - origIndex; 177 } 178 #else 179 // Get the bytes that result from encoding a character buffer. GetByteCount(char[] chars, int index, int count)180 public override int GetByteCount(char[] chars, int index, int count) 181 { 182 int length = 0; 183 DbcsConvert convert = GetConvert(); 184 185 // 00 00 - FF FF 186 while (count-- > 0) 187 { 188 char c = chars[index++]; 189 if (c <= 0x80 || c == 0xFF) 190 { // ASCII 191 length++; 192 continue; 193 } 194 byte b1 = convert.u2n[((int)c) * 2]; 195 byte b2 = convert.u2n[((int)c) * 2 + 1]; 196 if (b1 == 0 && b2 == 0) 197 { 198 // FIXME: handle fallback for GetByteCountImpl(). 199 length++; 200 } 201 else 202 length += 2; 203 } 204 return length; 205 } 206 207 // Get the bytes that result from encoding a character buffer. GetBytes(char[] chars, int charIndex, int charCount, byte[] bytes, int byteIndex)208 public override int GetBytes(char[] chars, int charIndex, int charCount, byte[] bytes, int byteIndex) 209 { 210 int byteCount = bytes.Length; 211 int end = charIndex + charCount; 212 213 DbcsConvert convert = GetConvert(); 214 EncoderFallbackBuffer buffer = null; 215 216 // 00 00 - FF FF 217 int origIndex = byteIndex; 218 for (int i = charIndex; i < end; i++, charCount--) 219 { 220 char c = chars[i]; 221 if (c <= 0x80 || c == 0xFF) 222 { // ASCII 223 bytes[byteIndex++] = (byte)c; 224 continue; 225 } 226 byte b1 = convert.u2n[((int)c) * 2]; 227 byte b2 = convert.u2n[((int)c) * 2 + 1]; 228 if (b1 == 0 && b2 == 0) 229 { 230 HandleFallback (ref buffer, chars, ref i, ref charCount, 231 bytes, ref byteIndex, ref byteCount, null); 232 } 233 else 234 { 235 bytes[byteIndex++] = b1; 236 bytes[byteIndex++] = b2; 237 } 238 } 239 return byteIndex - origIndex; 240 } 241 #endif 242 // Get the characters that result from decoding a byte buffer. GetCharCount(byte[] bytes, int index, int count)243 public override int GetCharCount (byte[] bytes, int index, int count) 244 { 245 return GetDecoder ().GetCharCount (bytes, index, count); 246 } 247 248 // Get the characters that result from decoding a byte buffer. GetChars(byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex)249 public override int GetChars(byte[] bytes, int byteIndex, int byteCount, 250 char[] chars, int charIndex) 251 { 252 return GetDecoder ().GetChars (bytes, byteIndex, byteCount, chars, charIndex); 253 } 254 255 // Get a decoder that handles a rolling UHC state. GetDecoder()256 public override Decoder GetDecoder() 257 { 258 return new KoreanDecoder (GetConvert (), useUHC); 259 } 260 261 // Decoder that handles a rolling UHC state. 262 private sealed class KoreanDecoder : DbcsDecoder 263 { 264 // Constructor. KoreanDecoder(DbcsConvert convert, bool useUHC)265 public KoreanDecoder (DbcsConvert convert, bool useUHC) 266 : base(convert) 267 { 268 this.useUHC = useUHC; 269 } 270 bool useUHC; 271 int last_byte_count, last_byte_conv; 272 GetCharCount(byte[] bytes, int index, int count)273 public override int GetCharCount (byte[] bytes, int index, int count) 274 { 275 return GetCharCount (bytes, index, count, false); 276 } 277 278 public override GetCharCount(byte [] bytes, int index, int count, bool refresh)279 int GetCharCount (byte [] bytes, int index, int count, bool refresh) 280 { 281 CheckRange (bytes, index, count); 282 283 int lastByte = last_byte_count; 284 last_byte_count = 0; 285 int length = 0; 286 while (count-- > 0) { 287 int b = bytes[index++]; 288 if (lastByte == 0) { 289 if (b <= 0x80 || b == 0xFF) { // ASCII 290 length++; 291 continue; 292 } else { 293 lastByte = b; 294 continue; 295 } 296 } 297 298 char c1; 299 if (useUHC && lastByte < 0xa1) { // UHC Level 1 300 int ord = 8836 + (lastByte - 0x81) * 178; 301 302 if (b >= 0x41 && b <= 0x5A) 303 ord += b - 0x41; 304 else if (b >= 0x61 && b <= 0x7A) 305 ord += b - 0x61 + 26; 306 else if (b >= 0x81 && b <= 0xFE) 307 ord += b - 0x81 + 52; 308 else 309 ord = -1; 310 311 if (ord >= 0 && ord * 2 <= convert.n2u.Length) 312 c1 = (char)(convert.n2u[ord*2] + 313 convert.n2u[ord*2 + 1] * 256); 314 else 315 c1 = (char)0; 316 } else if (useUHC && lastByte <= 0xC6 && b < 0xA1) { // UHC Level 2 317 int ord = 14532 + (lastByte - 0xA1) * 84; 318 319 if (b >= 0x41 && b <= 0x5A) 320 ord += b - 0x41; 321 else if (b >= 0x61 && b <= 0x7A) 322 ord += b - 0x61 + 26; 323 else if (b >= 0x81 && b <= 0xA0) 324 ord += b - 0x81 + 52; 325 else 326 ord = -1; 327 328 if (ord >= 0 && ord * 2 <= convert.n2u.Length) 329 c1 = (char)(convert.n2u[ord*2] + 330 convert.n2u[ord*2 + 1] * 256); 331 else 332 c1 = (char)0; 333 } else if (b >= 0xA1 && b <= 0xFE) { // KS X 1001 334 int ord = ((lastByte - 0xA1) * 94 + b - 0xA1) * 2; 335 336 c1 = ord < 0 || ord >= convert.n2u.Length ? 337 '\0' : (char)(convert.n2u[ord] + 338 convert.n2u[ord + 1] * 256); 339 } else 340 c1 = (char)0; 341 342 if (c1 == 0) 343 // FIXME: fallback 344 length++; 345 else 346 length++; 347 lastByte = 0; 348 } 349 350 if (lastByte != 0) { 351 if (refresh) { 352 // FIXME: fallback 353 length++; 354 last_byte_count = 0; 355 } 356 else 357 last_byte_count = lastByte; 358 } 359 return length; 360 } 361 GetChars(byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex)362 public override int GetChars(byte[] bytes, int byteIndex, 363 int byteCount, char[] chars, int charIndex) 364 { 365 return GetChars (bytes, byteIndex, byteCount, chars, charIndex, false); 366 } 367 368 public override GetChars(byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex, bool refresh)369 int GetChars(byte[] bytes, int byteIndex, 370 int byteCount, char[] chars, int charIndex, bool refresh) 371 { 372 CheckRange (bytes, byteIndex, byteCount, chars, charIndex); 373 int origIndex = charIndex; 374 int lastByte = last_byte_conv; 375 last_byte_conv = 0; 376 while (byteCount-- > 0) { 377 int b = bytes[byteIndex++]; 378 if (lastByte == 0) { 379 if (b <= 0x80 || b == 0xFF) { // ASCII 380 chars[charIndex++] = (char)b; 381 continue; 382 } else { 383 lastByte = b; 384 continue; 385 } 386 } 387 388 char c1; 389 if (useUHC && lastByte < 0xa1) { // UHC Level 1 390 int ord = 8836 + (lastByte - 0x81) * 178; 391 392 if (b >= 0x41 && b <= 0x5A) 393 ord += b - 0x41; 394 else if (b >= 0x61 && b <= 0x7A) 395 ord += b - 0x61 + 26; 396 else if (b >= 0x81 && b <= 0xFE) 397 ord += b - 0x81 + 52; 398 else 399 ord = -1; 400 401 if (ord >= 0 && ord * 2 <= convert.n2u.Length) 402 c1 = (char)(convert.n2u[ord*2] + 403 convert.n2u[ord*2 + 1] * 256); 404 else 405 c1 = (char)0; 406 } else if (useUHC && lastByte <= 0xC6 && b < 0xA1) { // UHC Level 2 407 int ord = 14532 + (lastByte - 0xA1) * 84; 408 409 if (b >= 0x41 && b <= 0x5A) 410 ord += b - 0x41; 411 else if (b >= 0x61 && b <= 0x7A) 412 ord += b - 0x61 + 26; 413 else if (b >= 0x81 && b <= 0xA0) 414 ord += b - 0x81 + 52; 415 else 416 ord = -1; 417 418 if (ord >= 0 && ord * 2 <= convert.n2u.Length) 419 c1 = (char)(convert.n2u[ord*2] + 420 convert.n2u[ord*2 + 1] * 256); 421 else 422 c1 = (char)0; 423 } else if (b >= 0xA1 && b <= 0xFE) { // KS X 1001 424 int ord = ((lastByte - 0xA1) * 94 + b - 0xA1) * 2; 425 426 c1 = ord < 0 || ord >= convert.n2u.Length ? 427 '\0' : (char)(convert.n2u[ord] + 428 convert.n2u[ord + 1] * 256); 429 } else 430 c1 = (char)0; 431 432 if (c1 == 0) 433 chars[charIndex++] = '?'; 434 else 435 chars[charIndex++] = c1; 436 lastByte = 0; 437 } 438 439 if (lastByte != 0) { 440 if (refresh) { 441 chars[charIndex++] = '?'; 442 last_byte_conv = 0; 443 } 444 else 445 last_byte_conv = lastByte; 446 } 447 return charIndex - origIndex; 448 } 449 } 450 } 451 452 [Serializable] 453 internal class ENCuhc : CP949 454 { ENCuhc()455 public ENCuhc() {} 456 } 457 458 [Serializable] 459 internal class ENCeuc_kr: CP51949 460 { ENCeuc_kr()461 public ENCeuc_kr() {} 462 } 463 } 464 465 // ex: ts=8 sts=4 et 466