1 //------------------------------------------------------------------------------ 2 // <copyright file="XmlEncoding.cs" company="Microsoft"> 3 // Copyright (c) Microsoft Corporation. All rights reserved. 4 // </copyright> 5 // <owner current="true" primary="true">Microsoft</owner> 6 //------------------------------------------------------------------------------ 7 8 using System.Text; 9 using System.Diagnostics; 10 11 namespace System.Xml { 12 13 internal class UTF16Decoder : System.Text.Decoder { 14 private bool bigEndian; 15 private int lastByte; 16 private const int CharSize = 2; 17 UTF16Decoder( bool bigEndian )18 public UTF16Decoder( bool bigEndian ) { 19 this.lastByte = -1; 20 this.bigEndian = bigEndian; 21 } 22 GetCharCount( byte[] bytes, int index, int count )23 public override int GetCharCount( byte[] bytes, int index, int count ) { 24 return GetCharCount( bytes, index, count, false ); 25 } 26 GetCharCount( byte[] bytes, int index, int count, bool flush )27 public override int GetCharCount( byte[] bytes, int index, int count, bool flush ) { 28 int byteCount = count + ( ( lastByte >= 0 ) ? 1 : 0 ); 29 if ( flush && ( byteCount % CharSize != 0 ) ) { 30 throw new ArgumentException( Res.GetString( Res.Enc_InvalidByteInEncoding, new object[1] { -1 } ), (string)null ); 31 } 32 return byteCount / CharSize; 33 } 34 GetChars( byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex )35 public override int GetChars( byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex ) { 36 int charCount = GetCharCount( bytes, byteIndex, byteCount ); 37 38 if ( lastByte >= 0 ) { 39 if ( byteCount == 0 ) { 40 return charCount; 41 } 42 int nextByte = bytes[byteIndex++]; 43 byteCount--; 44 45 chars[charIndex++] = bigEndian 46 ? (char)( lastByte << 8 | nextByte ) 47 : (char)( nextByte << 8 | lastByte ); 48 lastByte = -1; 49 } 50 51 if ( ( byteCount & 1 ) != 0 ) { 52 lastByte = bytes[byteIndex + --byteCount]; 53 } 54 55 // use the fast BlockCopy if possible 56 if ( bigEndian == BitConverter.IsLittleEndian ) { 57 int byteEnd = byteIndex + byteCount; 58 if ( bigEndian ) { 59 while ( byteIndex < byteEnd ) { 60 int hi = bytes[byteIndex++]; 61 int lo = bytes[byteIndex++]; 62 chars[charIndex++] = (char)( hi << 8 | lo ); 63 } 64 } 65 else { 66 while ( byteIndex < byteEnd ) { 67 int lo = bytes[byteIndex++]; 68 int hi = bytes[byteIndex++]; 69 chars[charIndex++] = (char)( hi << 8 | lo ); 70 } 71 } 72 } 73 else { 74 Buffer.BlockCopy( bytes, byteIndex, chars, charIndex * CharSize, byteCount ); 75 } 76 return charCount; 77 } 78 Convert( byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex, int charCount, bool flush, out int bytesUsed, out int charsUsed, out bool completed )79 public override void Convert( byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex, int charCount, bool flush, out int bytesUsed, out int charsUsed, out bool completed ) { 80 charsUsed = 0; 81 bytesUsed = 0; 82 83 if ( lastByte >= 0 ) { 84 if ( byteCount == 0 ) { 85 completed = true; 86 return; 87 } 88 int nextByte = bytes[byteIndex++]; 89 byteCount--; 90 bytesUsed++; 91 92 chars[charIndex++] = bigEndian 93 ? (char)( lastByte << 8 | nextByte ) 94 : (char)( nextByte << 8 | lastByte ); 95 charCount--; 96 charsUsed++; 97 lastByte = -1; 98 } 99 100 if ( charCount * CharSize < byteCount ) { 101 byteCount = charCount * CharSize; 102 completed = false; 103 } 104 else { 105 completed = true; 106 } 107 108 if ( bigEndian == BitConverter.IsLittleEndian ) { 109 int i = byteIndex; 110 int byteEnd = i + ( byteCount & ~0x1 ); 111 if ( bigEndian ) { 112 while ( i < byteEnd ) { 113 int hi = bytes[i++]; 114 int lo = bytes[i++]; 115 chars[charIndex++] = (char)( hi << 8 | lo ); 116 } 117 } 118 else { 119 while ( i < byteEnd ) { 120 int lo = bytes[i++]; 121 int hi = bytes[i++]; 122 chars[charIndex++] = (char)( hi << 8 | lo ); 123 } 124 } 125 } 126 else { 127 Buffer.BlockCopy( bytes, byteIndex, chars, charIndex * CharSize, (int)(byteCount & ~0x1) ); 128 } 129 charsUsed += byteCount / CharSize; 130 bytesUsed += byteCount; 131 132 if ( ( byteCount & 1 ) != 0 ) { 133 lastByte = bytes[byteIndex + byteCount - 1]; 134 } 135 } 136 } 137 138 internal class SafeAsciiDecoder : Decoder { 139 SafeAsciiDecoder()140 public SafeAsciiDecoder() { 141 } 142 GetCharCount( byte[] bytes, int index, int count )143 public override int GetCharCount( byte[] bytes, int index, int count ) { 144 return count; 145 } 146 GetChars( byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex )147 public override int GetChars( byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex ) { 148 int i = byteIndex; 149 int j = charIndex; 150 while ( i < byteIndex + byteCount ) { 151 chars[j++] = (char)bytes[i++]; 152 } 153 return byteCount; 154 } 155 Convert( byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex, int charCount, bool flush, out int bytesUsed, out int charsUsed, out bool completed )156 public override void Convert( byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex, int charCount, bool flush, out int bytesUsed, out int charsUsed, out bool completed ) { 157 if ( charCount < byteCount ) { 158 byteCount = charCount; 159 completed = false; 160 } 161 else { 162 completed = true; 163 } 164 165 int i = byteIndex; 166 int j = charIndex; 167 int byteEndIndex = byteIndex + byteCount; 168 169 while ( i < byteEndIndex ) { 170 chars[j++] = (char)bytes[i++]; 171 } 172 173 charsUsed = byteCount; 174 bytesUsed = byteCount; 175 } 176 } 177 178 #if !SILVERLIGHT 179 internal class Ucs4Encoding : Encoding { 180 internal Ucs4Decoder ucs4Decoder; 181 182 public override string WebName { 183 get { 184 return this.EncodingName; 185 } 186 } 187 GetDecoder()188 public override Decoder GetDecoder() { 189 return ucs4Decoder; 190 } 191 GetByteCount( char[] chars, int index, int count )192 public override int GetByteCount( char[] chars, int index, int count ) { 193 return checked( count * 4 ); 194 } 195 GetByteCount( char[] chars )196 public override int GetByteCount( char[] chars ) { 197 return chars.Length * 4; 198 } 199 GetBytes( string s )200 public override byte[] GetBytes( string s ) { 201 return null; //ucs4Decoder.GetByteCount(chars, index, count); 202 } GetBytes( char[] chars, int charIndex, int charCount, byte[] bytes, int byteIndex )203 public override int GetBytes( char[] chars, int charIndex, int charCount, byte[] bytes, int byteIndex ) { 204 return 0; 205 } GetMaxByteCount( int charCount )206 public override int GetMaxByteCount( int charCount ) { 207 return 0; 208 } 209 GetCharCount( byte[] bytes, int index, int count )210 public override int GetCharCount( byte[] bytes, int index, int count ) { 211 return ucs4Decoder.GetCharCount( bytes, index, count ); 212 } 213 GetChars( byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex )214 public override int GetChars( byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex ) { 215 return ucs4Decoder.GetChars( bytes, byteIndex, byteCount, chars, charIndex ); 216 } 217 GetMaxCharCount( int byteCount )218 public override int GetMaxCharCount( int byteCount ) { 219 return ( byteCount + 3 ) / 4; 220 } 221 222 public override int CodePage { 223 get { 224 return 0; 225 } 226 } 227 GetCharCount( byte[] bytes )228 public override int GetCharCount( byte[] bytes ) { 229 return bytes.Length / 4; 230 } 231 GetEncoder()232 public override Encoder GetEncoder() { 233 return null; 234 } 235 236 internal static Encoding UCS4_Littleendian { 237 get { 238 return new Ucs4Encoding4321(); 239 } 240 } 241 242 internal static Encoding UCS4_Bigendian { 243 get { 244 return new Ucs4Encoding1234(); 245 } 246 } 247 248 internal static Encoding UCS4_2143 { 249 get { 250 return new Ucs4Encoding2143(); 251 } 252 } 253 internal static Encoding UCS4_3412 { 254 get { 255 return new Ucs4Encoding3412(); 256 } 257 } 258 } 259 260 internal class Ucs4Encoding1234 : Ucs4Encoding { 261 Ucs4Encoding1234()262 public Ucs4Encoding1234() { 263 ucs4Decoder = new Ucs4Decoder1234(); 264 } 265 266 public override string EncodingName { 267 get { 268 return "ucs-4 (Bigendian)"; 269 } 270 } 271 GetPreamble()272 public override byte[] GetPreamble() { 273 return new byte[4] { 0x00, 0x00, 0xfe, 0xff }; 274 } 275 } 276 277 internal class Ucs4Encoding4321 : Ucs4Encoding { Ucs4Encoding4321()278 public Ucs4Encoding4321() { 279 ucs4Decoder = new Ucs4Decoder4321(); 280 } 281 282 public override string EncodingName { 283 get { 284 return "ucs-4"; 285 } 286 } 287 GetPreamble()288 public override byte[] GetPreamble() { 289 return new byte[4] { 0xff, 0xfe, 0x00, 0x00 }; 290 } 291 } 292 293 internal class Ucs4Encoding2143 : Ucs4Encoding { Ucs4Encoding2143()294 public Ucs4Encoding2143() { 295 ucs4Decoder = new Ucs4Decoder2143(); 296 } 297 298 public override string EncodingName { 299 get { 300 return "ucs-4 (order 2143)"; 301 } 302 } GetPreamble()303 public override byte[] GetPreamble() { 304 return new byte[4] { 0x00, 0x00, 0xff, 0xfe }; 305 } 306 } 307 308 internal class Ucs4Encoding3412 : Ucs4Encoding { Ucs4Encoding3412()309 public Ucs4Encoding3412() { 310 ucs4Decoder = new Ucs4Decoder3412(); 311 } 312 313 public override string EncodingName { 314 get { 315 return "ucs-4 (order 3412)"; 316 } 317 } 318 GetPreamble()319 public override byte[] GetPreamble() { 320 return new byte[4] { 0xfe, 0xff, 0x00, 0x00 }; 321 } 322 } 323 324 internal abstract class Ucs4Decoder : Decoder { 325 326 internal byte [] lastBytes = new byte[4]; 327 internal int lastBytesCount = 0; 328 GetCharCount( byte[] bytes, int index, int count )329 public override int GetCharCount( byte[] bytes, int index, int count ) { 330 return ( count + lastBytesCount ) / 4; 331 } 332 GetFullChars( byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex )333 internal abstract int GetFullChars( byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex ); 334 GetChars( byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex )335 public override int GetChars( byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex ) { 336 // finish a character from the bytes that were cached last time 337 int i = lastBytesCount; 338 if ( lastBytesCount > 0 ) { 339 // copy remaining bytes into the cache 340 for ( ; lastBytesCount < 4 && byteCount > 0; lastBytesCount++ ) { 341 lastBytes[lastBytesCount] = bytes[byteIndex]; 342 byteIndex++; 343 byteCount--; 344 } 345 // still not enough bytes -> return 346 if ( lastBytesCount < 4 ) { 347 return 0; 348 } 349 // decode 1 character from the byte cache 350 i = GetFullChars( lastBytes, 0 , 4, chars, charIndex ); 351 Debug.Assert( i == 1 ); 352 charIndex += i; 353 lastBytesCount = 0; 354 } 355 else { 356 i = 0; 357 } 358 359 // decode block of byte quadruplets 360 i = GetFullChars( bytes, byteIndex, byteCount, chars, charIndex ) + i; 361 362 // cache remaining bytes that does not make up a character 363 int bytesLeft = ( byteCount & 0x3 ); 364 if ( bytesLeft >= 0 ) { 365 for( int j = 0; j < bytesLeft; j++ ) { 366 lastBytes[j] = bytes[byteIndex + byteCount - bytesLeft + j]; 367 } 368 lastBytesCount = bytesLeft; 369 } 370 return i; 371 } 372 Convert( byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex, int charCount, bool flush, out int bytesUsed, out int charsUsed, out bool completed )373 public override void Convert( byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex, int charCount, bool flush, out int bytesUsed, out int charsUsed, out bool completed ) { 374 bytesUsed = 0; 375 charsUsed = 0; 376 // finish a character from the bytes that were cached last time 377 int i = 0; 378 int lbc = lastBytesCount; 379 if ( lbc > 0 ) { 380 // copy remaining bytes into the cache 381 for ( ; lbc < 4 && byteCount > 0; lbc++ ) { 382 lastBytes[lbc] = bytes[byteIndex]; 383 byteIndex++; 384 byteCount--; 385 bytesUsed++; 386 } 387 // still not enough bytes -> return 388 if ( lbc < 4 ) { 389 lastBytesCount = lbc; 390 completed = true; 391 return; 392 } 393 // decode 1 character from the byte cache 394 i = GetFullChars( lastBytes, 0 , 4, chars, charIndex ); 395 Debug.Assert( i == 1 ); 396 charIndex += i; 397 charCount -= i; 398 charsUsed = i; 399 400 lastBytesCount = 0; 401 402 // if that's all that was requested -> return 403 if ( charCount == 0 ) { 404 completed = ( byteCount == 0 ); 405 return; 406 } 407 } 408 else { 409 i = 0; 410 } 411 412 // modify the byte count for GetFullChars depending on how many characters were requested 413 if ( charCount * 4 < byteCount ) { 414 byteCount = charCount * 4; 415 completed = false; 416 } 417 else { 418 completed = true; 419 } 420 bytesUsed += byteCount; 421 422 // decode block of byte quadruplets 423 charsUsed = GetFullChars( bytes, byteIndex, byteCount, chars, charIndex ) + i; 424 425 // cache remaining bytes that does not make up a character 426 int bytesLeft = ( byteCount & 0x3 ); 427 if ( bytesLeft >= 0 ) { 428 for( int j = 0; j < bytesLeft; j++ ) { 429 lastBytes[j] = bytes[byteIndex + byteCount - bytesLeft + j]; 430 } 431 lastBytesCount = bytesLeft; 432 } 433 } 434 Ucs4ToUTF16(uint code, char[] chars, int charIndex)435 internal void Ucs4ToUTF16(uint code, char[] chars, int charIndex) { 436 chars[charIndex] = (char)(XmlCharType.SurHighStart + (char)((code >> 16) - 1) + (char)((code >> 10) & 0x3F)); 437 chars[charIndex + 1] = (char)(XmlCharType.SurLowStart + (char)(code & 0x3FF)); 438 } 439 } 440 441 internal class Ucs4Decoder4321 : Ucs4Decoder { 442 GetFullChars( byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex )443 internal override int GetFullChars( byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex ) { 444 uint code; 445 int i, j; 446 447 byteCount += byteIndex; 448 449 for ( i = byteIndex, j = charIndex; i + 3 < byteCount; ) { 450 code = (uint)( ( bytes[i+3] << 24 ) | ( bytes[i+2] << 16 ) | ( bytes[i+1] << 8 ) | bytes[i] ); 451 if ( code > 0x10FFFF ) { 452 throw new ArgumentException( Res.GetString( Res.Enc_InvalidByteInEncoding, new object[1] { i } ), (string)null ); 453 } 454 else if ( code > 0xFFFF ) { 455 Ucs4ToUTF16(code, chars, j); 456 j++; 457 } 458 else { 459 if ( XmlCharType.IsSurrogate( (int)code ) ) { 460 throw new XmlException( Res.Xml_InvalidCharInThisEncoding, string.Empty ); 461 } 462 else { 463 chars[j] = (char)code; 464 } 465 } 466 j++; 467 i += 4; 468 } 469 return j - charIndex; 470 } 471 }; 472 473 internal class Ucs4Decoder1234 : Ucs4Decoder { 474 GetFullChars( byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex )475 internal override int GetFullChars( byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex ) { 476 uint code; 477 int i,j; 478 479 byteCount += byteIndex; 480 481 for ( i = byteIndex, j = charIndex; i+3 < byteCount; ) { 482 code = (uint)( ( bytes[i] << 24 ) | ( bytes[i+1] << 16 ) | ( bytes[i+2] << 8 ) | bytes[i+3] ); 483 if ( code > 0x10FFFF ) { 484 throw new ArgumentException( Res.GetString( Res.Enc_InvalidByteInEncoding, new object[1] { i } ), (string)null ); 485 } 486 else if ( code > 0xFFFF ) { 487 Ucs4ToUTF16(code, chars, j); 488 j++; 489 } 490 else { 491 if ( XmlCharType.IsSurrogate( (int)code ) ) { 492 throw new XmlException( Res.Xml_InvalidCharInThisEncoding, string.Empty ); 493 } 494 else { 495 chars[j] = (char)code; 496 } 497 } 498 j++; 499 i += 4; 500 } 501 return j - charIndex; 502 } 503 } 504 505 506 internal class Ucs4Decoder2143 : Ucs4Decoder { 507 GetFullChars( byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex )508 internal override int GetFullChars( byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex ) { 509 uint code; 510 int i,j; 511 512 byteCount += byteIndex; 513 514 for ( i = byteIndex, j = charIndex; i+3 < byteCount; ) { 515 code = (uint)( ( bytes[i+1] << 24 ) | ( bytes[i] << 16 ) | ( bytes[i+3] << 8 ) | bytes[i+2] ); 516 if ( code > 0x10FFFF ) { 517 throw new ArgumentException( Res.GetString( Res.Enc_InvalidByteInEncoding, new object[1] { i } ), (string)null ); 518 } 519 else if ( code > 0xFFFF ) { 520 Ucs4ToUTF16(code, chars, j); 521 j++; 522 } 523 else { 524 if ( XmlCharType.IsSurrogate( (int)code ) ) { 525 throw new XmlException( Res.Xml_InvalidCharInThisEncoding, string.Empty ); 526 } 527 else { 528 chars[j] = (char)code; 529 } 530 } 531 j++; 532 i += 4; 533 } 534 return j - charIndex; 535 } 536 } 537 538 539 internal class Ucs4Decoder3412 : Ucs4Decoder { 540 GetFullChars( byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex )541 internal override int GetFullChars( byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex ) { 542 uint code; 543 int i,j; 544 545 byteCount += byteIndex; 546 547 for ( i = byteIndex, j = charIndex; i+3 < byteCount; ) { 548 code = (uint)( ( bytes[i+2] << 24 ) | ( bytes[i+3] << 16 ) | ( bytes[i] << 8 ) | bytes[i+1] ); 549 if ( code > 0x10FFFF ) { 550 throw new ArgumentException( Res.GetString( Res.Enc_InvalidByteInEncoding, new object[1] { i } ), (string)null ); 551 } 552 else if ( code > 0xFFFF ) { 553 Ucs4ToUTF16(code, chars, j); 554 j++; 555 } 556 else { 557 if ( XmlCharType.IsSurrogate( (int)code ) ) { 558 throw new XmlException( Res.Xml_InvalidCharInThisEncoding, string.Empty ); 559 } 560 else { 561 chars[j] = (char)code; 562 } 563 } 564 j++; 565 i += 4; 566 } 567 return j - charIndex; 568 } 569 } 570 #endif 571 } 572