1 // Licensed to the .NET Foundation under one or more agreements. 2 // The .NET Foundation licenses this file to you under the MIT license. 3 // See the LICENSE file in the project root for more information. 4 5 using System.Diagnostics; 6 using System.Globalization; 7 using System.Threading; 8 using System.Runtime.InteropServices; 9 using System.Runtime.Serialization; 10 using System.Diagnostics.CodeAnalysis; 11 12 namespace System.Text 13 { 14 // This abstract base class represents a character encoding. The class provides 15 // methods to convert arrays and strings of Unicode characters to and from 16 // arrays of bytes. A number of Encoding implementations are provided in 17 // the System.Text package, including: 18 // 19 // ASCIIEncoding, which encodes Unicode characters as single 7-bit 20 // ASCII characters. This encoding only supports character values between 0x00 21 // and 0x7F. 22 // BaseCodePageEncoding, which encapsulates a Windows code page. Any 23 // installed code page can be accessed through this encoding, and conversions 24 // are performed using the WideCharToMultiByte and 25 // MultiByteToWideChar Windows API functions. 26 // UnicodeEncoding, which encodes each Unicode character as two 27 // consecutive bytes. Both little-endian (code page 1200) and big-endian (code 28 // page 1201) encodings are recognized. 29 // UTF7Encoding, which encodes Unicode characters using the UTF-7 30 // encoding (UTF-7 stands for UCS Transformation Format, 7-bit form). This 31 // encoding supports all Unicode character values, and can also be accessed 32 // as code page 65000. 33 // UTF8Encoding, which encodes Unicode characters using the UTF-8 34 // encoding (UTF-8 stands for UCS Transformation Format, 8-bit form). This 35 // encoding supports all Unicode character values, and can also be accessed 36 // as code page 65001. 37 // UTF32Encoding, both 12000 (little endian) & 12001 (big endian) 38 // 39 // In addition to directly instantiating Encoding objects, an 40 // application can use the ForCodePage, GetASCII, 41 // GetDefault, GetUnicode, GetUTF7, and GetUTF8 42 // methods in this class to obtain encodings. 43 // 44 // Through an encoding, the GetBytes method is used to convert arrays 45 // of characters to arrays of bytes, and the GetChars method is used to 46 // convert arrays of bytes to arrays of characters. The GetBytes and 47 // GetChars methods maintain no state between conversions, and are 48 // generally intended for conversions of complete blocks of bytes and 49 // characters in one operation. When the data to be converted is only available 50 // in sequential blocks (such as data read from a stream) or when the amount of 51 // data is so large that it needs to be divided into smaller blocks, an 52 // application may choose to use a Decoder or an Encoder to 53 // perform the conversion. Decoders and encoders allow sequential blocks of 54 // data to be converted and they maintain the state required to support 55 // conversions of data that spans adjacent blocks. Decoders and encoders are 56 // obtained using the GetDecoder and GetEncoder methods. 57 // 58 // The core GetBytes and GetChars methods require the caller 59 // to provide the destination buffer and ensure that the buffer is large enough 60 // to hold the entire result of the conversion. When using these methods, 61 // either directly on an Encoding object or on an associated 62 // Decoder or Encoder, an application can use one of two methods 63 // to allocate destination buffers. 64 // 65 // The GetByteCount and GetCharCount methods can be used to 66 // compute the exact size of the result of a particular conversion, and an 67 // appropriately sized buffer for that conversion can then be allocated. 68 // The GetMaxByteCount and GetMaxCharCount methods can be 69 // be used to compute the maximum possible size of a conversion of a given 70 // number of bytes or characters, and a buffer of that size can then be reused 71 // for multiple conversions. 72 // 73 // The first method generally uses less memory, whereas the second method 74 // generally executes faster. 75 // 76 77 public abstract class Encoding : ICloneable 78 { 79 // For netcore we use UTF8 as default encoding since ANSI isn't available 80 private static readonly UTF8Encoding.UTF8EncodingSealed s_defaultEncoding = new UTF8Encoding.UTF8EncodingSealed(encoderShouldEmitUTF8Identifier: false); 81 82 // Returns an encoding for the system's current ANSI code page. 83 public static Encoding Default => s_defaultEncoding; 84 85 // 86 // The following values are from mlang.idl. These values 87 // should be in sync with those in mlang.idl. 88 // 89 internal const int MIMECONTF_MAILNEWS = 0x00000001; 90 internal const int MIMECONTF_BROWSER = 0x00000002; 91 internal const int MIMECONTF_SAVABLE_MAILNEWS = 0x00000100; 92 internal const int MIMECONTF_SAVABLE_BROWSER = 0x00000200; 93 94 // Special Case Code Pages 95 private const int CodePageDefault = 0; 96 private const int CodePageNoOEM = 1; // OEM Code page not supported 97 private const int CodePageNoMac = 2; // MAC code page not supported 98 private const int CodePageNoThread = 3; // Thread code page not supported 99 private const int CodePageNoSymbol = 42; // Symbol code page not supported 100 private const int CodePageUnicode = 1200; // Unicode 101 private const int CodePageBigEndian = 1201; // Big Endian Unicode 102 private const int CodePageWindows1252 = 1252; // Windows 1252 code page 103 104 // 20936 has same code page as 10008, so we'll special case it 105 private const int CodePageMacGB2312 = 10008; 106 private const int CodePageGB2312 = 20936; 107 private const int CodePageMacKorean = 10003; 108 private const int CodePageDLLKorean = 20949; 109 110 // ISO 2022 Code Pages 111 private const int ISO2022JP = 50220; 112 private const int ISO2022JPESC = 50221; 113 private const int ISO2022JPSISO = 50222; 114 private const int ISOKorean = 50225; 115 private const int ISOSimplifiedCN = 50227; 116 private const int EUCJP = 51932; 117 private const int ChineseHZ = 52936; // HZ has ~}~{~~ sequences 118 119 // 51936 is the same as 936 120 private const int DuplicateEUCCN = 51936; 121 private const int EUCCN = 936; 122 123 private const int EUCKR = 51949; 124 125 // Latin 1 & ASCII Code Pages 126 internal const int CodePageASCII = 20127; // ASCII 127 internal const int ISO_8859_1 = 28591; // Latin1 128 129 // ISCII 130 private const int ISCIIAssemese = 57006; 131 private const int ISCIIBengali = 57003; 132 private const int ISCIIDevanagari = 57002; 133 private const int ISCIIGujarathi = 57010; 134 private const int ISCIIKannada = 57008; 135 private const int ISCIIMalayalam = 57009; 136 private const int ISCIIOriya = 57007; 137 private const int ISCIIPanjabi = 57011; 138 private const int ISCIITamil = 57004; 139 private const int ISCIITelugu = 57005; 140 141 // GB18030 142 private const int GB18030 = 54936; 143 144 // Other 145 private const int ISO_8859_8I = 38598; 146 private const int ISO_8859_8_Visual = 28598; 147 148 // 50229 is currently unsupported // "Chinese Traditional (ISO-2022)" 149 private const int ENC50229 = 50229; 150 151 // Special code pages 152 private const int CodePageUTF7 = 65000; 153 private const int CodePageUTF8 = 65001; 154 private const int CodePageUTF32 = 12000; 155 private const int CodePageUTF32BE = 12001; 156 157 internal int _codePage = 0; 158 159 internal CodePageDataItem _dataItem = null; 160 161 // Because of encoders we may be read only 162 [OptionalField(VersionAdded = 2)] 163 private bool _isReadOnly = true; 164 165 // Encoding (encoder) fallback 166 internal EncoderFallback encoderFallback = null; 167 internal DecoderFallback decoderFallback = null; 168 Encoding()169 protected Encoding() : this(0) 170 { 171 } 172 173 Encoding(int codePage)174 protected Encoding(int codePage) 175 { 176 // Validate code page 177 if (codePage < 0) 178 { 179 throw new ArgumentOutOfRangeException(nameof(codePage)); 180 } 181 182 // Remember code page 183 _codePage = codePage; 184 185 // Use default encoder/decoder fallbacks 186 this.SetDefaultFallbacks(); 187 } 188 189 // This constructor is needed to allow any sub-classing implementation to provide encoder/decoder fallback objects 190 // because the encoding object is always created as read-only object and don't allow setting encoder/decoder fallback 191 // after the creation is done. Encoding(int codePage, EncoderFallback encoderFallback, DecoderFallback decoderFallback)192 protected Encoding(int codePage, EncoderFallback encoderFallback, DecoderFallback decoderFallback) 193 { 194 // Validate code page 195 if (codePage < 0) 196 { 197 throw new ArgumentOutOfRangeException(nameof(codePage)); 198 } 199 200 // Remember code page 201 _codePage = codePage; 202 203 this.encoderFallback = encoderFallback ?? new InternalEncoderBestFitFallback(this); 204 this.decoderFallback = decoderFallback ?? new InternalDecoderBestFitFallback(this); 205 } 206 207 // Default fallback that we'll use. SetDefaultFallbacks()208 internal virtual void SetDefaultFallbacks() 209 { 210 // For UTF-X encodings, we use a replacement fallback with an "\xFFFD" string, 211 // For ASCII we use "?" replacement fallback, etc. 212 encoderFallback = new InternalEncoderBestFitFallback(this); 213 decoderFallback = new InternalDecoderBestFitFallback(this); 214 } 215 216 // Converts a byte array from one encoding to another. The bytes in the 217 // bytes array are converted from srcEncoding to 218 // dstEncoding, and the returned value is a new byte array 219 // containing the result of the conversion. 220 // Convert(Encoding srcEncoding, Encoding dstEncoding, byte[] bytes)221 public static byte[] Convert(Encoding srcEncoding, Encoding dstEncoding, 222 byte[] bytes) 223 { 224 if (bytes == null) 225 throw new ArgumentNullException(nameof(bytes)); 226 227 return Convert(srcEncoding, dstEncoding, bytes, 0, bytes.Length); 228 } 229 230 // Converts a range of bytes in a byte array from one encoding to another. 231 // This method converts count bytes from bytes starting at 232 // index index from srcEncoding to dstEncoding, and 233 // returns a new byte array containing the result of the conversion. 234 // Convert(Encoding srcEncoding, Encoding dstEncoding, byte[] bytes, int index, int count)235 public static byte[] Convert(Encoding srcEncoding, Encoding dstEncoding, 236 byte[] bytes, int index, int count) 237 { 238 if (srcEncoding == null || dstEncoding == null) 239 { 240 throw new ArgumentNullException((srcEncoding == null ? nameof(srcEncoding) : nameof(dstEncoding)), 241 SR.ArgumentNull_Array); 242 } 243 if (bytes == null) 244 { 245 throw new ArgumentNullException(nameof(bytes), 246 SR.ArgumentNull_Array); 247 } 248 249 return dstEncoding.GetBytes(srcEncoding.GetChars(bytes, index, count)); 250 } 251 RegisterProvider(EncodingProvider provider)252 public static void RegisterProvider(EncodingProvider provider) 253 { 254 // Parameters validated inside EncodingProvider 255 EncodingProvider.AddProvider(provider); 256 } 257 GetEncoding(int codepage)258 public static Encoding GetEncoding(int codepage) 259 { 260 Encoding result = EncodingProvider.GetEncodingFromProvider(codepage); 261 if (result != null) 262 return result; 263 264 // 265 // NOTE: If you add a new encoding that can be retrieved by codepage, be sure to 266 // add the corresponding item in EncodingTable. 267 // Otherwise, the code below will throw exception when trying to call 268 // EncodingTable.GetDataItem(). 269 // 270 if (codepage < 0 || codepage > 65535) 271 { 272 throw new ArgumentOutOfRangeException( 273 nameof(codepage), SR.Format(SR.ArgumentOutOfRange_Range, 0, 65535)); 274 } 275 276 277 switch (codepage) 278 { 279 case CodePageDefault: return Default; // 0 280 case CodePageUnicode: return Unicode; // 1200 281 case CodePageBigEndian: return BigEndianUnicode; // 1201 282 case CodePageUTF32: return UTF32; // 12000 283 case CodePageUTF32BE: return BigEndianUTF32; // 12001 284 case CodePageUTF7: return UTF7; // 65000 285 case CodePageUTF8: return UTF8; // 65001 286 case CodePageASCII: return ASCII; // 20127 287 case ISO_8859_1: return Latin1; // 28591 288 289 // We don't allow the following special code page values that Win32 allows. 290 case CodePageNoOEM: // 1 CP_OEMCP 291 case CodePageNoMac: // 2 CP_MACCP 292 case CodePageNoThread: // 3 CP_THREAD_ACP 293 case CodePageNoSymbol: // 42 CP_SYMBOL 294 throw new ArgumentException(SR.Format(SR.Argument_CodepageNotSupported, codepage), nameof(codepage)); 295 } 296 297 // Is it a valid code page? 298 if (EncodingTable.GetCodePageDataItem(codepage) == null) 299 { 300 throw new NotSupportedException( 301 SR.Format(SR.NotSupported_NoCodepageData, codepage)); 302 } 303 304 return UTF8; 305 } 306 GetEncoding(int codepage, EncoderFallback encoderFallback, DecoderFallback decoderFallback)307 public static Encoding GetEncoding(int codepage, 308 EncoderFallback encoderFallback, DecoderFallback decoderFallback) 309 { 310 Encoding baseEncoding = EncodingProvider.GetEncodingFromProvider(codepage, encoderFallback, decoderFallback); 311 312 if (baseEncoding != null) 313 return baseEncoding; 314 315 // Get the default encoding (which is cached and read only) 316 baseEncoding = GetEncoding(codepage); 317 318 // Clone it and set the fallback 319 Encoding fallbackEncoding = (Encoding)baseEncoding.Clone(); 320 fallbackEncoding.EncoderFallback = encoderFallback; 321 fallbackEncoding.DecoderFallback = decoderFallback; 322 323 return fallbackEncoding; 324 } 325 326 // Returns an Encoding object for a given name or a given code page value. 327 // GetEncoding(String name)328 public static Encoding GetEncoding(String name) 329 { 330 Encoding baseEncoding = EncodingProvider.GetEncodingFromProvider(name); 331 if (baseEncoding != null) 332 return baseEncoding; 333 334 // 335 // NOTE: If you add a new encoding that can be requested by name, be sure to 336 // add the corresponding item in EncodingTable. 337 // Otherwise, the code below will throw exception when trying to call 338 // EncodingTable.GetCodePageFromName(). 339 // 340 return GetEncoding(EncodingTable.GetCodePageFromName(name)); 341 } 342 343 // Returns an Encoding object for a given name or a given code page value. 344 // GetEncoding(String name, EncoderFallback encoderFallback, DecoderFallback decoderFallback)345 public static Encoding GetEncoding(String name, 346 EncoderFallback encoderFallback, DecoderFallback decoderFallback) 347 { 348 Encoding baseEncoding = EncodingProvider.GetEncodingFromProvider(name, encoderFallback, decoderFallback); 349 if (baseEncoding != null) 350 return baseEncoding; 351 352 // 353 // NOTE: If you add a new encoding that can be requested by name, be sure to 354 // add the corresponding item in EncodingTable. 355 // Otherwise, the code below will throw exception when trying to call 356 // EncodingTable.GetCodePageFromName(). 357 // 358 return (GetEncoding(EncodingTable.GetCodePageFromName(name), encoderFallback, decoderFallback)); 359 } 360 361 // Return a list of all EncodingInfo objects describing all of our encodings GetEncodings()362 public static EncodingInfo[] GetEncodings() 363 { 364 return EncodingTable.GetEncodings(); 365 } 366 GetPreamble()367 public virtual byte[] GetPreamble() 368 { 369 return Array.Empty<byte>(); 370 } 371 372 public virtual ReadOnlySpan<byte> Preamble => GetPreamble(); 373 GetDataItem()374 private void GetDataItem() 375 { 376 if (_dataItem == null) 377 { 378 _dataItem = EncodingTable.GetCodePageDataItem(_codePage); 379 if (_dataItem == null) 380 { 381 throw new NotSupportedException(SR.Format(SR.NotSupported_NoCodepageData, _codePage)); 382 } 383 } 384 } 385 386 // Returns the name for this encoding that can be used with mail agent body tags. 387 // If the encoding may not be used, the string is empty. 388 389 public virtual String BodyName 390 { 391 get 392 { 393 if (_dataItem == null) 394 { 395 GetDataItem(); 396 } 397 return (_dataItem.BodyName); 398 } 399 } 400 401 // Returns the human-readable description of the encoding ( e.g. Hebrew (DOS)). 402 #if PROJECTN 403 public virtual String EncodingName 404 { 405 get 406 { 407 string encodingName = GetLocalizedEncodingNameResource(this.CodePage); 408 if (encodingName == null) 409 { 410 throw new NotSupportedException(SR.Format(SR.MissingEncodingNameResource, this.CodePage)); 411 } 412 413 if (encodingName.StartsWith("Globalization_cp_", StringComparison.Ordinal)) 414 { 415 // On ProjectN, resource strings are stripped from retail builds and replaced by 416 // their identifier names. Since this property is meant to be a localized string, 417 // but we don't localize ProjectN, we specifically need to do something reasonable 418 // in this case. This currently returns the English name of the encoding from a 419 // static data table. 420 encodingName = EncodingTable.GetCodePageDataItem(this.CodePage).EnglishName; 421 if (encodingName == null) 422 { 423 throw new NotSupportedException(SR.Format(SR.MissingEncodingNameResource, this.WebName, this.CodePage)); 424 } 425 } 426 return encodingName; 427 } 428 } 429 GetLocalizedEncodingNameResource(int codePage)430 private static string GetLocalizedEncodingNameResource(int codePage) 431 { 432 switch (codePage) 433 { 434 case 1200: return SR.Globalization_cp_1200; 435 case 1201: return SR.Globalization_cp_1201; 436 case 12000: return SR.Globalization_cp_12000; 437 case 12001: return SR.Globalization_cp_12001; 438 case 20127: return SR.Globalization_cp_20127; 439 case 28591: return SR.Globalization_cp_28591; 440 case 65000: return SR.Globalization_cp_65000; 441 case 65001: return SR.Globalization_cp_65001; 442 default: return null; 443 } 444 } 445 #else 446 public virtual String EncodingName 447 { 448 get 449 { 450 return SR.GetResourceString("Globalization_cp_" + _codePage.ToString()); 451 } 452 } 453 #endif 454 // Returns the name for this encoding that can be used with mail agent header 455 // tags. If the encoding may not be used, the string is empty. 456 457 public virtual String HeaderName 458 { 459 get 460 { 461 if (_dataItem == null) 462 { 463 GetDataItem(); 464 } 465 return (_dataItem.HeaderName); 466 } 467 } 468 469 // Returns the IANA preferred name for this encoding. 470 public virtual String WebName 471 { 472 get 473 { 474 if (_dataItem == null) 475 { 476 GetDataItem(); 477 } 478 return (_dataItem.WebName); 479 } 480 } 481 482 // Returns the windows code page that most closely corresponds to this encoding. 483 484 public virtual int WindowsCodePage 485 { 486 get 487 { 488 if (_dataItem == null) 489 { 490 GetDataItem(); 491 } 492 return (_dataItem.UIFamilyCodePage); 493 } 494 } 495 496 497 // True if and only if the encoding is used for display by browsers clients. 498 499 public virtual bool IsBrowserDisplay 500 { 501 get 502 { 503 if (_dataItem == null) 504 { 505 GetDataItem(); 506 } 507 return ((_dataItem.Flags & MIMECONTF_BROWSER) != 0); 508 } 509 } 510 511 // True if and only if the encoding is used for saving by browsers clients. 512 513 public virtual bool IsBrowserSave 514 { 515 get 516 { 517 if (_dataItem == null) 518 { 519 GetDataItem(); 520 } 521 return ((_dataItem.Flags & MIMECONTF_SAVABLE_BROWSER) != 0); 522 } 523 } 524 525 // True if and only if the encoding is used for display by mail and news clients. 526 527 public virtual bool IsMailNewsDisplay 528 { 529 get 530 { 531 if (_dataItem == null) 532 { 533 GetDataItem(); 534 } 535 return ((_dataItem.Flags & MIMECONTF_MAILNEWS) != 0); 536 } 537 } 538 539 540 // True if and only if the encoding is used for saving documents by mail and 541 // news clients 542 543 public virtual bool IsMailNewsSave 544 { 545 get 546 { 547 if (_dataItem == null) 548 { 549 GetDataItem(); 550 } 551 return ((_dataItem.Flags & MIMECONTF_SAVABLE_MAILNEWS) != 0); 552 } 553 } 554 555 // True if and only if the encoding only uses single byte code points. (Ie, ASCII, 1252, etc) 556 557 public virtual bool IsSingleByte 558 { 559 get 560 { 561 return false; 562 } 563 } 564 565 566 public EncoderFallback EncoderFallback 567 { 568 get 569 { 570 return encoderFallback; 571 } 572 573 set 574 { 575 if (this.IsReadOnly) 576 throw new InvalidOperationException(SR.InvalidOperation_ReadOnly); 577 578 if (value == null) 579 throw new ArgumentNullException(nameof(value)); 580 581 encoderFallback = value; 582 } 583 } 584 585 586 public DecoderFallback DecoderFallback 587 { 588 get 589 { 590 return decoderFallback; 591 } 592 593 set 594 { 595 if (this.IsReadOnly) 596 throw new InvalidOperationException(SR.InvalidOperation_ReadOnly); 597 598 if (value == null) 599 throw new ArgumentNullException(nameof(value)); 600 601 decoderFallback = value; 602 } 603 } 604 605 Clone()606 public virtual Object Clone() 607 { 608 Encoding newEncoding = (Encoding)this.MemberwiseClone(); 609 610 // New one should be readable 611 newEncoding._isReadOnly = false; 612 return newEncoding; 613 } 614 615 616 public bool IsReadOnly 617 { 618 get 619 { 620 return (_isReadOnly); 621 } 622 } 623 624 // Returns an encoding for the ASCII character set. The returned encoding 625 // will be an instance of the ASCIIEncoding class. 626 627 public static Encoding ASCII => ASCIIEncoding.s_default; 628 629 // Returns an encoding for the Latin1 character set. The returned encoding 630 // will be an instance of the Latin1Encoding class. 631 // 632 // This is for our optimizations 633 private static Encoding Latin1 => Latin1Encoding.s_default; 634 635 // Returns the number of bytes required to encode the given character 636 // array. 637 // GetByteCount(char[] chars)638 public virtual int GetByteCount(char[] chars) 639 { 640 if (chars == null) 641 { 642 throw new ArgumentNullException(nameof(chars), 643 SR.ArgumentNull_Array); 644 } 645 646 return GetByteCount(chars, 0, chars.Length); 647 } 648 GetByteCount(String s)649 public virtual int GetByteCount(String s) 650 { 651 if (s == null) 652 throw new ArgumentNullException(nameof(s)); 653 654 char[] chars = s.ToCharArray(); 655 return GetByteCount(chars, 0, chars.Length); 656 } 657 658 // Returns the number of bytes required to encode a range of characters in 659 // a character array. 660 // GetByteCount(char[] chars, int index, int count)661 public abstract int GetByteCount(char[] chars, int index, int count); 662 663 // Returns the number of bytes required to encode a string range. 664 // GetByteCount(string s, int index, int count)665 public int GetByteCount(string s, int index, int count) 666 { 667 if (s == null) 668 throw new ArgumentNullException(nameof(s), 669 SR.ArgumentNull_String); 670 if (index < 0) 671 throw new ArgumentOutOfRangeException(nameof(index), 672 SR.ArgumentOutOfRange_NeedNonNegNum); 673 if (count < 0) 674 throw new ArgumentOutOfRangeException(nameof(count), 675 SR.ArgumentOutOfRange_NeedNonNegNum); 676 if (index > s.Length - count) 677 throw new ArgumentOutOfRangeException(nameof(index), 678 SR.ArgumentOutOfRange_IndexCount); 679 680 unsafe 681 { 682 fixed (char* pChar = s) 683 { 684 return GetByteCount(pChar + index, count); 685 } 686 } 687 } 688 689 // We expect this to be the workhorse for NLS encodings 690 // unfortunately for existing overrides, it has to call the [] version, 691 // which is really slow, so this method should be avoided if you're calling 692 // a 3rd party encoding. 693 [CLSCompliant(false)] GetByteCount(char* chars, int count)694 public virtual unsafe int GetByteCount(char* chars, int count) 695 { 696 // Validate input parameters 697 if (chars == null) 698 throw new ArgumentNullException(nameof(chars), 699 SR.ArgumentNull_Array); 700 701 if (count < 0) 702 throw new ArgumentOutOfRangeException(nameof(count), 703 SR.ArgumentOutOfRange_NeedNonNegNum); 704 705 char[] arrChar = new char[count]; 706 int index; 707 708 for (index = 0; index < count; index++) 709 arrChar[index] = chars[index]; 710 711 return GetByteCount(arrChar, 0, count); 712 } 713 GetByteCount(ReadOnlySpan<char> chars)714 public virtual unsafe int GetByteCount(ReadOnlySpan<char> chars) 715 { 716 fixed (char* charsPtr = &MemoryMarshal.GetReference(chars)) 717 { 718 return GetByteCount(charsPtr, chars.Length); 719 } 720 } 721 722 // For NLS Encodings, workhorse takes an encoder (may be null) 723 // Always validate parameters before calling internal version, which will only assert. GetByteCount(char* chars, int count, EncoderNLS encoder)724 internal virtual unsafe int GetByteCount(char* chars, int count, EncoderNLS encoder) 725 { 726 Debug.Assert(chars != null); 727 Debug.Assert(count >= 0); 728 729 return GetByteCount(chars, count); 730 } 731 732 // Returns a byte array containing the encoded representation of the given 733 // character array. 734 // GetBytes(char[] chars)735 public virtual byte[] GetBytes(char[] chars) 736 { 737 if (chars == null) 738 { 739 throw new ArgumentNullException(nameof(chars), 740 SR.ArgumentNull_Array); 741 } 742 return GetBytes(chars, 0, chars.Length); 743 } 744 745 // Returns a byte array containing the encoded representation of a range 746 // of characters in a character array. 747 // GetBytes(char[] chars, int index, int count)748 public virtual byte[] GetBytes(char[] chars, int index, int count) 749 { 750 byte[] result = new byte[GetByteCount(chars, index, count)]; 751 GetBytes(chars, index, count, result, 0); 752 return result; 753 } 754 755 // Encodes a range of characters in a character array into a range of bytes 756 // in a byte array. An exception occurs if the byte array is not large 757 // enough to hold the complete encoding of the characters. The 758 // GetByteCount method can be used to determine the exact number of 759 // bytes that will be produced for a given range of characters. 760 // Alternatively, the GetMaxByteCount method can be used to 761 // determine the maximum number of bytes that will be produced for a given 762 // number of characters, regardless of the actual character values. 763 // GetBytes(char[] chars, int charIndex, int charCount, byte[] bytes, int byteIndex)764 public abstract int GetBytes(char[] chars, int charIndex, int charCount, 765 byte[] bytes, int byteIndex); 766 767 // Returns a byte array containing the encoded representation of the given 768 // string. 769 // GetBytes(String s)770 public virtual byte[] GetBytes(String s) 771 { 772 if (s == null) 773 throw new ArgumentNullException(nameof(s), 774 SR.ArgumentNull_String); 775 776 int byteCount = GetByteCount(s); 777 byte[] bytes = new byte[byteCount]; 778 int bytesReceived = GetBytes(s, 0, s.Length, bytes, 0); 779 Debug.Assert(byteCount == bytesReceived); 780 return bytes; 781 } 782 783 // Returns a byte array containing the encoded representation of the given 784 // string range. 785 // GetBytes(string s, int index, int count)786 public byte[] GetBytes(string s, int index, int count) 787 { 788 if (s == null) 789 throw new ArgumentNullException(nameof(s), 790 SR.ArgumentNull_String); 791 if (index < 0) 792 throw new ArgumentOutOfRangeException(nameof(index), 793 SR.ArgumentOutOfRange_NeedNonNegNum); 794 if (count < 0) 795 throw new ArgumentOutOfRangeException(nameof(count), 796 SR.ArgumentOutOfRange_NeedNonNegNum); 797 if (index > s.Length - count) 798 throw new ArgumentOutOfRangeException(nameof(index), 799 SR.ArgumentOutOfRange_IndexCount); 800 801 unsafe 802 { 803 fixed (char* pChar = s) 804 { 805 int byteCount = GetByteCount(pChar + index, count); 806 if (byteCount == 0) 807 return Array.Empty<byte>(); 808 809 byte[] bytes = new byte[byteCount]; 810 fixed (byte* pBytes = &bytes[0]) 811 { 812 int bytesReceived = GetBytes(pChar + index, count, pBytes, byteCount); 813 Debug.Assert(byteCount == bytesReceived); 814 } 815 return bytes; 816 } 817 } 818 } 819 GetBytes(String s, int charIndex, int charCount, byte[] bytes, int byteIndex)820 public virtual int GetBytes(String s, int charIndex, int charCount, 821 byte[] bytes, int byteIndex) 822 { 823 if (s == null) 824 throw new ArgumentNullException(nameof(s)); 825 return GetBytes(s.ToCharArray(), charIndex, charCount, bytes, byteIndex); 826 } 827 828 // This is our internal workhorse 829 // Always validate parameters before calling internal version, which will only assert. GetBytes(char* chars, int charCount, byte* bytes, int byteCount, EncoderNLS encoder)830 internal virtual unsafe int GetBytes(char* chars, int charCount, 831 byte* bytes, int byteCount, EncoderNLS encoder) 832 { 833 return GetBytes(chars, charCount, bytes, byteCount); 834 } 835 836 // We expect this to be the workhorse for NLS Encodings, but for existing 837 // ones we need a working (if slow) default implementation) 838 // 839 // WARNING WARNING WARNING 840 // 841 // WARNING: If this breaks it could be a security threat. Obviously we 842 // call this internally, so you need to make sure that your pointers, counts 843 // and indexes are correct when you call this method. 844 // 845 // In addition, we have internal code, which will be marked as "safe" calling 846 // this code. However this code is dependent upon the implementation of an 847 // external GetBytes() method, which could be overridden by a third party and 848 // the results of which cannot be guaranteed. We use that result to copy 849 // the byte[] to our byte* output buffer. If the result count was wrong, we 850 // could easily overflow our output buffer. Therefore we do an extra test 851 // when we copy the buffer so that we don't overflow byteCount either. 852 853 [CLSCompliant(false)] GetBytes(char* chars, int charCount, byte* bytes, int byteCount)854 public virtual unsafe int GetBytes(char* chars, int charCount, 855 byte* bytes, int byteCount) 856 { 857 // Validate input parameters 858 if (bytes == null || chars == null) 859 throw new ArgumentNullException(bytes == null ? nameof(bytes) : nameof(chars), 860 SR.ArgumentNull_Array); 861 862 if (charCount < 0 || byteCount < 0) 863 throw new ArgumentOutOfRangeException((charCount < 0 ? nameof(charCount) : nameof(byteCount)), 864 SR.ArgumentOutOfRange_NeedNonNegNum); 865 866 // Get the char array to convert 867 char[] arrChar = new char[charCount]; 868 869 int index; 870 for (index = 0; index < charCount; index++) 871 arrChar[index] = chars[index]; 872 873 // Get the byte array to fill 874 byte[] arrByte = new byte[byteCount]; 875 876 // Do the work 877 int result = GetBytes(arrChar, 0, charCount, arrByte, 0); 878 879 Debug.Assert(result <= byteCount, "[Encoding.GetBytes]Returned more bytes than we have space for"); 880 881 // Copy the byte array 882 // WARNING: We MUST make sure that we don't copy too many bytes. We can't 883 // rely on result because it could be a 3rd party implementation. We need 884 // to make sure we never copy more than byteCount bytes no matter the value 885 // of result 886 if (result < byteCount) 887 byteCount = result; 888 889 // Copy the data, don't overrun our array! 890 for (index = 0; index < byteCount; index++) 891 bytes[index] = arrByte[index]; 892 893 return byteCount; 894 } 895 GetBytes(ReadOnlySpan<char> chars, Span<byte> bytes)896 public virtual unsafe int GetBytes(ReadOnlySpan<char> chars, Span<byte> bytes) 897 { 898 fixed (char* charsPtr = &MemoryMarshal.GetReference(chars)) 899 fixed (byte* bytesPtr = &MemoryMarshal.GetReference(bytes)) 900 { 901 return GetBytes(charsPtr, chars.Length, bytesPtr, bytes.Length); 902 } 903 } 904 905 // Returns the number of characters produced by decoding the given byte 906 // array. 907 // GetCharCount(byte[] bytes)908 public virtual int GetCharCount(byte[] bytes) 909 { 910 if (bytes == null) 911 { 912 throw new ArgumentNullException(nameof(bytes), 913 SR.ArgumentNull_Array); 914 } 915 return GetCharCount(bytes, 0, bytes.Length); 916 } 917 918 // Returns the number of characters produced by decoding a range of bytes 919 // in a byte array. 920 // GetCharCount(byte[] bytes, int index, int count)921 public abstract int GetCharCount(byte[] bytes, int index, int count); 922 923 // We expect this to be the workhorse for NLS Encodings, but for existing 924 // ones we need a working (if slow) default implementation) 925 [CLSCompliant(false)] GetCharCount(byte* bytes, int count)926 public virtual unsafe int GetCharCount(byte* bytes, int count) 927 { 928 // Validate input parameters 929 if (bytes == null) 930 throw new ArgumentNullException(nameof(bytes), 931 SR.ArgumentNull_Array); 932 933 if (count < 0) 934 throw new ArgumentOutOfRangeException(nameof(count), 935 SR.ArgumentOutOfRange_NeedNonNegNum); 936 937 byte[] arrbyte = new byte[count]; 938 int index; 939 940 for (index = 0; index < count; index++) 941 arrbyte[index] = bytes[index]; 942 943 return GetCharCount(arrbyte, 0, count); 944 } 945 GetCharCount(ReadOnlySpan<byte> bytes)946 public virtual unsafe int GetCharCount(ReadOnlySpan<byte> bytes) 947 { 948 fixed (byte* bytesPtr = &MemoryMarshal.GetReference(bytes)) 949 { 950 return GetCharCount(bytesPtr, bytes.Length); 951 } 952 } 953 954 // This is our internal workhorse 955 // Always validate parameters before calling internal version, which will only assert. GetCharCount(byte* bytes, int count, DecoderNLS decoder)956 internal virtual unsafe int GetCharCount(byte* bytes, int count, DecoderNLS decoder) 957 { 958 return GetCharCount(bytes, count); 959 } 960 961 // Returns a character array containing the decoded representation of a 962 // given byte array. 963 // GetChars(byte[] bytes)964 public virtual char[] GetChars(byte[] bytes) 965 { 966 if (bytes == null) 967 { 968 throw new ArgumentNullException(nameof(bytes), 969 SR.ArgumentNull_Array); 970 } 971 return GetChars(bytes, 0, bytes.Length); 972 } 973 974 // Returns a character array containing the decoded representation of a 975 // range of bytes in a byte array. 976 // GetChars(byte[] bytes, int index, int count)977 public virtual char[] GetChars(byte[] bytes, int index, int count) 978 { 979 char[] result = new char[GetCharCount(bytes, index, count)]; 980 GetChars(bytes, index, count, result, 0); 981 return result; 982 } 983 984 // Decodes a range of bytes in a byte array into a range of characters in a 985 // character array. An exception occurs if the character array is not large 986 // enough to hold the complete decoding of the bytes. The 987 // GetCharCount method can be used to determine the exact number of 988 // characters that will be produced for a given range of bytes. 989 // Alternatively, the GetMaxCharCount method can be used to 990 // determine the maximum number of characters that will be produced for a 991 // given number of bytes, regardless of the actual byte values. 992 // 993 GetChars(byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex)994 public abstract int GetChars(byte[] bytes, int byteIndex, int byteCount, 995 char[] chars, int charIndex); 996 997 998 // We expect this to be the workhorse for NLS Encodings, but for existing 999 // ones we need a working (if slow) default implementation) 1000 // 1001 // WARNING WARNING WARNING 1002 // 1003 // WARNING: If this breaks it could be a security threat. Obviously we 1004 // call this internally, so you need to make sure that your pointers, counts 1005 // and indexes are correct when you call this method. 1006 // 1007 // In addition, we have internal code, which will be marked as "safe" calling 1008 // this code. However this code is dependent upon the implementation of an 1009 // external GetChars() method, which could be overridden by a third party and 1010 // the results of which cannot be guaranteed. We use that result to copy 1011 // the char[] to our char* output buffer. If the result count was wrong, we 1012 // could easily overflow our output buffer. Therefore we do an extra test 1013 // when we copy the buffer so that we don't overflow charCount either. 1014 1015 [CLSCompliant(false)] GetChars(byte* bytes, int byteCount, char* chars, int charCount)1016 public virtual unsafe int GetChars(byte* bytes, int byteCount, 1017 char* chars, int charCount) 1018 { 1019 // Validate input parameters 1020 if (chars == null || bytes == null) 1021 throw new ArgumentNullException(chars == null ? nameof(chars) : nameof(bytes), 1022 SR.ArgumentNull_Array); 1023 1024 if (byteCount < 0 || charCount < 0) 1025 throw new ArgumentOutOfRangeException((byteCount < 0 ? nameof(byteCount) : nameof(charCount)), 1026 SR.ArgumentOutOfRange_NeedNonNegNum); 1027 1028 // Get the byte array to convert 1029 byte[] arrByte = new byte[byteCount]; 1030 1031 int index; 1032 for (index = 0; index < byteCount; index++) 1033 arrByte[index] = bytes[index]; 1034 1035 // Get the char array to fill 1036 char[] arrChar = new char[charCount]; 1037 1038 // Do the work 1039 int result = GetChars(arrByte, 0, byteCount, arrChar, 0); 1040 1041 Debug.Assert(result <= charCount, "[Encoding.GetChars]Returned more chars than we have space for"); 1042 1043 // Copy the char array 1044 // WARNING: We MUST make sure that we don't copy too many chars. We can't 1045 // rely on result because it could be a 3rd party implementation. We need 1046 // to make sure we never copy more than charCount chars no matter the value 1047 // of result 1048 if (result < charCount) 1049 charCount = result; 1050 1051 // Copy the data, don't overrun our array! 1052 for (index = 0; index < charCount; index++) 1053 chars[index] = arrChar[index]; 1054 1055 return charCount; 1056 } 1057 GetChars(ReadOnlySpan<byte> bytes, Span<char> chars)1058 public virtual unsafe int GetChars(ReadOnlySpan<byte> bytes, Span<char> chars) 1059 { 1060 fixed (byte* bytesPtr = &MemoryMarshal.GetReference(bytes)) 1061 fixed (char* charsPtr = &MemoryMarshal.GetReference(chars)) 1062 { 1063 return GetChars(bytesPtr, bytes.Length, charsPtr, chars.Length); 1064 } 1065 } 1066 1067 // This is our internal workhorse 1068 // Always validate parameters before calling internal version, which will only assert. GetChars(byte* bytes, int byteCount, char* chars, int charCount, DecoderNLS decoder)1069 internal virtual unsafe int GetChars(byte* bytes, int byteCount, 1070 char* chars, int charCount, DecoderNLS decoder) 1071 { 1072 return GetChars(bytes, byteCount, chars, charCount); 1073 } 1074 1075 1076 [CLSCompliant(false)] GetString(byte* bytes, int byteCount)1077 public unsafe string GetString(byte* bytes, int byteCount) 1078 { 1079 if (bytes == null) 1080 throw new ArgumentNullException(nameof(bytes), SR.ArgumentNull_Array); 1081 1082 if (byteCount < 0) 1083 throw new ArgumentOutOfRangeException(nameof(byteCount), SR.ArgumentOutOfRange_NeedNonNegNum); 1084 1085 return String.CreateStringFromEncoding(bytes, byteCount, this); 1086 } 1087 GetString(ReadOnlySpan<byte> bytes)1088 public unsafe string GetString(ReadOnlySpan<byte> bytes) 1089 { 1090 fixed (byte* bytesPtr = &MemoryMarshal.GetReference(bytes)) 1091 { 1092 return GetString(bytesPtr, bytes.Length); 1093 } 1094 } 1095 1096 1097 // Returns the code page identifier of this encoding. The returned value is 1098 // an integer between 0 and 65535 if the encoding has a code page 1099 // identifier, or -1 if the encoding does not represent a code page. 1100 // 1101 1102 public virtual int CodePage 1103 { 1104 get 1105 { 1106 return _codePage; 1107 } 1108 } 1109 1110 // IsAlwaysNormalized 1111 // Returns true if the encoding is always normalized for the specified encoding form IsAlwaysNormalized()1112 public bool IsAlwaysNormalized() 1113 { 1114 return this.IsAlwaysNormalized(NormalizationForm.FormC); 1115 } 1116 IsAlwaysNormalized(NormalizationForm form)1117 public virtual bool IsAlwaysNormalized(NormalizationForm form) 1118 { 1119 // Assume false unless the encoding knows otherwise 1120 return false; 1121 } 1122 1123 // Returns a Decoder object for this encoding. The returned object 1124 // can be used to decode a sequence of bytes into a sequence of characters. 1125 // Contrary to the GetChars family of methods, a Decoder can 1126 // convert partial sequences of bytes into partial sequences of characters 1127 // by maintaining the appropriate state between the conversions. 1128 // 1129 // This default implementation returns a Decoder that simply 1130 // forwards calls to the GetCharCount and GetChars methods to 1131 // the corresponding methods of this encoding. Encodings that require state 1132 // to be maintained between successive conversions should override this 1133 // method and return an instance of an appropriate Decoder 1134 // implementation. 1135 // 1136 GetDecoder()1137 public virtual Decoder GetDecoder() 1138 { 1139 return new DefaultDecoder(this); 1140 } 1141 1142 // Returns an Encoder object for this encoding. The returned object 1143 // can be used to encode a sequence of characters into a sequence of bytes. 1144 // Contrary to the GetBytes family of methods, an Encoder can 1145 // convert partial sequences of characters into partial sequences of bytes 1146 // by maintaining the appropriate state between the conversions. 1147 // 1148 // This default implementation returns an Encoder that simply 1149 // forwards calls to the GetByteCount and GetBytes methods to 1150 // the corresponding methods of this encoding. Encodings that require state 1151 // to be maintained between successive conversions should override this 1152 // method and return an instance of an appropriate Encoder 1153 // implementation. 1154 // 1155 GetEncoder()1156 public virtual Encoder GetEncoder() 1157 { 1158 return new DefaultEncoder(this); 1159 } 1160 1161 // Returns the maximum number of bytes required to encode a given number of 1162 // characters. This method can be used to determine an appropriate buffer 1163 // size for byte arrays passed to the GetBytes method of this 1164 // encoding or the GetBytes method of an Encoder for this 1165 // encoding. All encodings must guarantee that no buffer overflow 1166 // exceptions will occur if buffers are sized according to the results of 1167 // this method. 1168 // 1169 // WARNING: If you're using something besides the default replacement encoder fallback, 1170 // then you could have more bytes than this returned from an actual call to GetBytes(). 1171 // GetMaxByteCount(int charCount)1172 public abstract int GetMaxByteCount(int charCount); 1173 1174 // Returns the maximum number of characters produced by decoding a given 1175 // number of bytes. This method can be used to determine an appropriate 1176 // buffer size for character arrays passed to the GetChars method of 1177 // this encoding or the GetChars method of a Decoder for this 1178 // encoding. All encodings must guarantee that no buffer overflow 1179 // exceptions will occur if buffers are sized according to the results of 1180 // this method. 1181 // GetMaxCharCount(int byteCount)1182 public abstract int GetMaxCharCount(int byteCount); 1183 1184 // Returns a string containing the decoded representation of a given byte 1185 // array. 1186 // GetString(byte[] bytes)1187 public virtual String GetString(byte[] bytes) 1188 { 1189 if (bytes == null) 1190 throw new ArgumentNullException(nameof(bytes), 1191 SR.ArgumentNull_Array); 1192 1193 return GetString(bytes, 0, bytes.Length); 1194 } 1195 1196 // Returns a string containing the decoded representation of a range of 1197 // bytes in a byte array. 1198 // 1199 // Internally we override this for performance 1200 // GetString(byte[] bytes, int index, int count)1201 public virtual String GetString(byte[] bytes, int index, int count) 1202 { 1203 return new String(GetChars(bytes, index, count)); 1204 } 1205 1206 // Returns an encoding for Unicode format. The returned encoding will be 1207 // an instance of the UnicodeEncoding class. 1208 // 1209 // It will use little endian byte order, but will detect 1210 // input in big endian if it finds a byte order mark per Unicode 2.0. 1211 1212 public static Encoding Unicode => UnicodeEncoding.s_littleEndianDefault; 1213 1214 // Returns an encoding for Unicode format. The returned encoding will be 1215 // an instance of the UnicodeEncoding class. 1216 // 1217 // It will use big endian byte order, but will detect 1218 // input in little endian if it finds a byte order mark per Unicode 2.0. 1219 1220 public static Encoding BigEndianUnicode => UnicodeEncoding.s_bigEndianDefault; 1221 1222 // Returns an encoding for the UTF-7 format. The returned encoding will be 1223 // an instance of the UTF7Encoding class. 1224 1225 public static Encoding UTF7 => UTF7Encoding.s_default; 1226 1227 // Returns an encoding for the UTF-8 format. The returned encoding will be 1228 // an instance of the UTF8Encoding class. 1229 1230 public static Encoding UTF8 => UTF8Encoding.s_default; 1231 1232 // Returns an encoding for the UTF-32 format. The returned encoding will be 1233 // an instance of the UTF32Encoding class. 1234 1235 public static Encoding UTF32 => UTF32Encoding.s_default; 1236 1237 // Returns an encoding for the UTF-32 format. The returned encoding will be 1238 // an instance of the UTF32Encoding class. 1239 // 1240 // It will use big endian byte order. 1241 1242 private static Encoding BigEndianUTF32 => UTF32Encoding.s_bigEndianDefault; 1243 Equals(Object value)1244 public override bool Equals(Object value) 1245 { 1246 Encoding that = value as Encoding; 1247 if (that != null) 1248 return (_codePage == that._codePage) && 1249 (EncoderFallback.Equals(that.EncoderFallback)) && 1250 (DecoderFallback.Equals(that.DecoderFallback)); 1251 return (false); 1252 } 1253 1254 GetHashCode()1255 public override int GetHashCode() 1256 { 1257 return _codePage + this.EncoderFallback.GetHashCode() + this.DecoderFallback.GetHashCode(); 1258 } 1259 GetBestFitUnicodeToBytesData()1260 internal virtual char[] GetBestFitUnicodeToBytesData() 1261 { 1262 // Normally we don't have any best fit data. 1263 return Array.Empty<char>(); 1264 } 1265 GetBestFitBytesToUnicodeData()1266 internal virtual char[] GetBestFitBytesToUnicodeData() 1267 { 1268 // Normally we don't have any best fit data. 1269 return Array.Empty<char>(); 1270 } 1271 ThrowBytesOverflow()1272 internal void ThrowBytesOverflow() 1273 { 1274 // Special message to include fallback type in case fallback's GetMaxCharCount is broken 1275 // This happens if user has implemented an encoder fallback with a broken GetMaxCharCount 1276 throw new ArgumentException( 1277 SR.Format(SR.Argument_EncodingConversionOverflowBytes, EncodingName, EncoderFallback.GetType()), "bytes"); 1278 } 1279 ThrowBytesOverflow(EncoderNLS encoder, bool nothingEncoded)1280 internal void ThrowBytesOverflow(EncoderNLS encoder, bool nothingEncoded) 1281 { 1282 if (encoder == null || encoder._throwOnOverflow || nothingEncoded) 1283 { 1284 if (encoder != null && encoder.InternalHasFallbackBuffer) 1285 encoder.FallbackBuffer.InternalReset(); 1286 // Special message to include fallback type in case fallback's GetMaxCharCount is broken 1287 // This happens if user has implemented an encoder fallback with a broken GetMaxCharCount 1288 ThrowBytesOverflow(); 1289 } 1290 1291 // If we didn't throw, we are in convert and have to remember our flushing 1292 encoder.ClearMustFlush(); 1293 } 1294 ThrowCharsOverflow()1295 internal void ThrowCharsOverflow() 1296 { 1297 // Special message to include fallback type in case fallback's GetMaxCharCount is broken 1298 // This happens if user has implemented a decoder fallback with a broken GetMaxCharCount 1299 throw new ArgumentException( 1300 SR.Format(SR.Argument_EncodingConversionOverflowChars, EncodingName, DecoderFallback.GetType()), "chars"); 1301 } 1302 ThrowCharsOverflow(DecoderNLS decoder, bool nothingDecoded)1303 internal void ThrowCharsOverflow(DecoderNLS decoder, bool nothingDecoded) 1304 { 1305 if (decoder == null || decoder._throwOnOverflow || nothingDecoded) 1306 { 1307 if (decoder != null && decoder.InternalHasFallbackBuffer) 1308 decoder.FallbackBuffer.InternalReset(); 1309 1310 // Special message to include fallback type in case fallback's GetMaxCharCount is broken 1311 // This happens if user has implemented a decoder fallback with a broken GetMaxCharCount 1312 ThrowCharsOverflow(); 1313 } 1314 1315 // If we didn't throw, we are in convert and have to remember our flushing 1316 decoder.ClearMustFlush(); 1317 } 1318 1319 internal sealed class DefaultEncoder : Encoder, IObjectReference 1320 { 1321 private Encoding _encoding; 1322 DefaultEncoder(Encoding encoding)1323 public DefaultEncoder(Encoding encoding) 1324 { 1325 _encoding = encoding; 1326 } 1327 GetRealObject(StreamingContext context)1328 public Object GetRealObject(StreamingContext context) 1329 { 1330 throw new PlatformNotSupportedException(); 1331 } 1332 1333 // Returns the number of bytes the next call to GetBytes will 1334 // produce if presented with the given range of characters and the given 1335 // value of the flush parameter. The returned value takes into 1336 // account the state in which the encoder was left following the last call 1337 // to GetBytes. The state of the encoder is not affected by a call 1338 // to this method. 1339 // 1340 GetByteCount(char[] chars, int index, int count, bool flush)1341 public override int GetByteCount(char[] chars, int index, int count, bool flush) 1342 { 1343 return _encoding.GetByteCount(chars, index, count); 1344 } 1345 GetByteCount(char* chars, int count, bool flush)1346 public unsafe override int GetByteCount(char* chars, int count, bool flush) 1347 { 1348 return _encoding.GetByteCount(chars, count); 1349 } 1350 1351 // Encodes a range of characters in a character array into a range of bytes 1352 // in a byte array. The method encodes charCount characters from 1353 // chars starting at index charIndex, storing the resulting 1354 // bytes in bytes starting at index byteIndex. The encoding 1355 // takes into account the state in which the encoder was left following the 1356 // last call to this method. The flush parameter indicates whether 1357 // the encoder should flush any shift-states and partial characters at the 1358 // end of the conversion. To ensure correct termination of a sequence of 1359 // blocks of encoded bytes, the last call to GetBytes should specify 1360 // a value of true for the flush parameter. 1361 // 1362 // An exception occurs if the byte array is not large enough to hold the 1363 // complete encoding of the characters. The GetByteCount method can 1364 // be used to determine the exact number of bytes that will be produced for 1365 // a given range of characters. Alternatively, the GetMaxByteCount 1366 // method of the Encoding that produced this encoder can be used to 1367 // determine the maximum number of bytes that will be produced for a given 1368 // number of characters, regardless of the actual character values. 1369 // 1370 GetBytes(char[] chars, int charIndex, int charCount, byte[] bytes, int byteIndex, bool flush)1371 public override int GetBytes(char[] chars, int charIndex, int charCount, 1372 byte[] bytes, int byteIndex, bool flush) 1373 { 1374 return _encoding.GetBytes(chars, charIndex, charCount, bytes, byteIndex); 1375 } 1376 GetBytes(char* chars, int charCount, byte* bytes, int byteCount, bool flush)1377 public unsafe override int GetBytes(char* chars, int charCount, 1378 byte* bytes, int byteCount, bool flush) 1379 { 1380 return _encoding.GetBytes(chars, charCount, bytes, byteCount); 1381 } 1382 } 1383 1384 internal sealed class DefaultDecoder : Decoder, IObjectReference 1385 { 1386 private Encoding _encoding; 1387 DefaultDecoder(Encoding encoding)1388 public DefaultDecoder(Encoding encoding) 1389 { 1390 _encoding = encoding; 1391 } 1392 GetRealObject(StreamingContext context)1393 public Object GetRealObject(StreamingContext context) 1394 { 1395 throw new PlatformNotSupportedException(); 1396 } 1397 1398 // Returns the number of characters the next call to GetChars will 1399 // produce if presented with the given range of bytes. The returned value 1400 // takes into account the state in which the decoder was left following the 1401 // last call to GetChars. The state of the decoder is not affected 1402 // by a call to this method. 1403 // 1404 GetCharCount(byte[] bytes, int index, int count)1405 public override int GetCharCount(byte[] bytes, int index, int count) 1406 { 1407 return GetCharCount(bytes, index, count, false); 1408 } 1409 GetCharCount(byte[] bytes, int index, int count, bool flush)1410 public override int GetCharCount(byte[] bytes, int index, int count, bool flush) 1411 { 1412 return _encoding.GetCharCount(bytes, index, count); 1413 } 1414 GetCharCount(byte* bytes, int count, bool flush)1415 public unsafe override int GetCharCount(byte* bytes, int count, bool flush) 1416 { 1417 // By default just call the encoding version, no flush by default 1418 return _encoding.GetCharCount(bytes, count); 1419 } 1420 1421 // Decodes a range of bytes in a byte array into a range of characters 1422 // in a character array. The method decodes byteCount bytes from 1423 // bytes starting at index byteIndex, storing the resulting 1424 // characters in chars starting at index charIndex. The 1425 // decoding takes into account the state in which the decoder was left 1426 // following the last call to this method. 1427 // 1428 // An exception occurs if the character array is not large enough to 1429 // hold the complete decoding of the bytes. The GetCharCount method 1430 // can be used to determine the exact number of characters that will be 1431 // produced for a given range of bytes. Alternatively, the 1432 // GetMaxCharCount method of the Encoding that produced this 1433 // decoder can be used to determine the maximum number of characters that 1434 // will be produced for a given number of bytes, regardless of the actual 1435 // byte values. 1436 // 1437 GetChars(byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex)1438 public override int GetChars(byte[] bytes, int byteIndex, int byteCount, 1439 char[] chars, int charIndex) 1440 { 1441 return GetChars(bytes, byteIndex, byteCount, chars, charIndex, false); 1442 } 1443 GetChars(byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex, bool flush)1444 public override int GetChars(byte[] bytes, int byteIndex, int byteCount, 1445 char[] chars, int charIndex, bool flush) 1446 { 1447 return _encoding.GetChars(bytes, byteIndex, byteCount, chars, charIndex); 1448 } 1449 GetChars(byte* bytes, int byteCount, char* chars, int charCount, bool flush)1450 public unsafe override int GetChars(byte* bytes, int byteCount, 1451 char* chars, int charCount, bool flush) 1452 { 1453 // By default just call the encoding's version 1454 return _encoding.GetChars(bytes, byteCount, chars, charCount); 1455 } 1456 } 1457 1458 internal class EncodingCharBuffer 1459 { 1460 private unsafe char* _chars; 1461 private unsafe char* _charStart; 1462 private unsafe char* _charEnd; 1463 private int _charCountResult = 0; 1464 private Encoding _enc; 1465 private DecoderNLS _decoder; 1466 private unsafe byte* _byteStart; 1467 private unsafe byte* _byteEnd; 1468 private unsafe byte* _bytes; 1469 private DecoderFallbackBuffer _fallbackBuffer; 1470 EncodingCharBuffer(Encoding enc, DecoderNLS decoder, char* charStart, int charCount, byte* byteStart, int byteCount)1471 internal unsafe EncodingCharBuffer(Encoding enc, DecoderNLS decoder, char* charStart, int charCount, 1472 byte* byteStart, int byteCount) 1473 { 1474 _enc = enc; 1475 _decoder = decoder; 1476 1477 _chars = charStart; 1478 _charStart = charStart; 1479 _charEnd = charStart + charCount; 1480 1481 _byteStart = byteStart; 1482 _bytes = byteStart; 1483 _byteEnd = byteStart + byteCount; 1484 1485 if (_decoder == null) 1486 _fallbackBuffer = enc.DecoderFallback.CreateFallbackBuffer(); 1487 else 1488 _fallbackBuffer = _decoder.FallbackBuffer; 1489 1490 // If we're getting chars or getting char count we don't expect to have 1491 // to remember fallbacks between calls (so it should be empty) 1492 Debug.Assert(_fallbackBuffer.Remaining == 0, 1493 "[Encoding.EncodingCharBuffer.EncodingCharBuffer]Expected empty fallback buffer for getchars/charcount"); 1494 _fallbackBuffer.InternalInitialize(_bytes, _charEnd); 1495 } 1496 AddChar(char ch, int numBytes)1497 internal unsafe bool AddChar(char ch, int numBytes) 1498 { 1499 if (_chars != null) 1500 { 1501 if (_chars >= _charEnd) 1502 { 1503 // Throw maybe 1504 _bytes -= numBytes; // Didn't encode these bytes 1505 _enc.ThrowCharsOverflow(_decoder, _bytes <= _byteStart); // Throw? 1506 return false; // No throw, but no store either 1507 } 1508 1509 *(_chars++) = ch; 1510 } 1511 _charCountResult++; 1512 return true; 1513 } 1514 AddChar(char ch)1515 internal unsafe bool AddChar(char ch) 1516 { 1517 return AddChar(ch, 1); 1518 } 1519 1520 AddChar(char ch1, char ch2, int numBytes)1521 internal unsafe bool AddChar(char ch1, char ch2, int numBytes) 1522 { 1523 // Need room for 2 chars 1524 if (_chars >= _charEnd - 1) 1525 { 1526 // Throw maybe 1527 _bytes -= numBytes; // Didn't encode these bytes 1528 _enc.ThrowCharsOverflow(_decoder, _bytes <= _byteStart); // Throw? 1529 return false; // No throw, but no store either 1530 } 1531 return AddChar(ch1, numBytes) && AddChar(ch2, numBytes); 1532 } 1533 AdjustBytes(int count)1534 internal unsafe void AdjustBytes(int count) 1535 { 1536 _bytes += count; 1537 } 1538 1539 internal unsafe bool MoreData 1540 { 1541 get 1542 { 1543 return _bytes < _byteEnd; 1544 } 1545 } 1546 1547 // Do we have count more bytes? EvenMoreData(int count)1548 internal unsafe bool EvenMoreData(int count) 1549 { 1550 return (_bytes <= _byteEnd - count); 1551 } 1552 1553 // GetNextByte shouldn't be called unless the caller's already checked more data or even more data, 1554 // but we'll double check just to make sure. GetNextByte()1555 internal unsafe byte GetNextByte() 1556 { 1557 Debug.Assert(_bytes < _byteEnd, "[EncodingCharBuffer.GetNextByte]Expected more date"); 1558 if (_bytes >= _byteEnd) 1559 return 0; 1560 return *(_bytes++); 1561 } 1562 1563 internal unsafe int BytesUsed 1564 { 1565 get 1566 { 1567 return (int)(_bytes - _byteStart); 1568 } 1569 } 1570 Fallback(byte fallbackByte)1571 internal unsafe bool Fallback(byte fallbackByte) 1572 { 1573 // Build our buffer 1574 byte[] byteBuffer = new byte[] { fallbackByte }; 1575 1576 // Do the fallback and add the data. 1577 return Fallback(byteBuffer); 1578 } 1579 Fallback(byte byte1, byte byte2)1580 internal unsafe bool Fallback(byte byte1, byte byte2) 1581 { 1582 // Build our buffer 1583 byte[] byteBuffer = new byte[] { byte1, byte2 }; 1584 1585 // Do the fallback and add the data. 1586 return Fallback(byteBuffer); 1587 } 1588 Fallback(byte byte1, byte byte2, byte byte3, byte byte4)1589 internal unsafe bool Fallback(byte byte1, byte byte2, byte byte3, byte byte4) 1590 { 1591 // Build our buffer 1592 byte[] byteBuffer = new byte[] { byte1, byte2, byte3, byte4 }; 1593 1594 // Do the fallback and add the data. 1595 return Fallback(byteBuffer); 1596 } 1597 Fallback(byte[] byteBuffer)1598 internal unsafe bool Fallback(byte[] byteBuffer) 1599 { 1600 // Do the fallback and add the data. 1601 if (_chars != null) 1602 { 1603 char* pTemp = _chars; 1604 if (_fallbackBuffer.InternalFallback(byteBuffer, _bytes, ref _chars) == false) 1605 { 1606 // Throw maybe 1607 _bytes -= byteBuffer.Length; // Didn't use how many ever bytes we're falling back 1608 _fallbackBuffer.InternalReset(); // We didn't use this fallback. 1609 _enc.ThrowCharsOverflow(_decoder, _chars == _charStart); // Throw? 1610 return false; // No throw, but no store either 1611 } 1612 _charCountResult += unchecked((int)(_chars - pTemp)); 1613 } 1614 else 1615 { 1616 _charCountResult += _fallbackBuffer.InternalFallback(byteBuffer, _bytes); 1617 } 1618 1619 return true; 1620 } 1621 1622 internal unsafe int Count 1623 { 1624 get 1625 { 1626 return _charCountResult; 1627 } 1628 } 1629 } 1630 1631 internal class EncodingByteBuffer 1632 { 1633 private unsafe byte* _bytes; 1634 private unsafe byte* _byteStart; 1635 private unsafe byte* _byteEnd; 1636 private unsafe char* _chars; 1637 private unsafe char* _charStart; 1638 private unsafe char* _charEnd; 1639 private int _byteCountResult = 0; 1640 private Encoding _enc; 1641 private EncoderNLS _encoder; 1642 internal EncoderFallbackBuffer fallbackBuffer; 1643 EncodingByteBuffer(Encoding inEncoding, EncoderNLS inEncoder, byte* inByteStart, int inByteCount, char* inCharStart, int inCharCount)1644 internal unsafe EncodingByteBuffer(Encoding inEncoding, EncoderNLS inEncoder, 1645 byte* inByteStart, int inByteCount, char* inCharStart, int inCharCount) 1646 { 1647 _enc = inEncoding; 1648 _encoder = inEncoder; 1649 1650 _charStart = inCharStart; 1651 _chars = inCharStart; 1652 _charEnd = inCharStart + inCharCount; 1653 1654 _bytes = inByteStart; 1655 _byteStart = inByteStart; 1656 _byteEnd = inByteStart + inByteCount; 1657 1658 if (_encoder == null) 1659 this.fallbackBuffer = _enc.EncoderFallback.CreateFallbackBuffer(); 1660 else 1661 { 1662 this.fallbackBuffer = _encoder.FallbackBuffer; 1663 // If we're not converting we must not have data in our fallback buffer 1664 if (_encoder._throwOnOverflow && _encoder.InternalHasFallbackBuffer && 1665 this.fallbackBuffer.Remaining > 0) 1666 throw new ArgumentException(SR.Format(SR.Argument_EncoderFallbackNotEmpty, 1667 _encoder.Encoding.EncodingName, _encoder.Fallback.GetType())); 1668 } 1669 fallbackBuffer.InternalInitialize(_chars, _charEnd, _encoder, _bytes != null); 1670 } 1671 AddByte(byte b, int moreBytesExpected)1672 internal unsafe bool AddByte(byte b, int moreBytesExpected) 1673 { 1674 Debug.Assert(moreBytesExpected >= 0, "[EncodingByteBuffer.AddByte]expected non-negative moreBytesExpected"); 1675 if (_bytes != null) 1676 { 1677 if (_bytes >= _byteEnd - moreBytesExpected) 1678 { 1679 // Throw maybe. Check which buffer to back up (only matters if Converting) 1680 this.MovePrevious(true); // Throw if necessary 1681 return false; // No throw, but no store either 1682 } 1683 1684 *(_bytes++) = b; 1685 } 1686 _byteCountResult++; 1687 return true; 1688 } 1689 AddByte(byte b1)1690 internal unsafe bool AddByte(byte b1) 1691 { 1692 return (AddByte(b1, 0)); 1693 } 1694 AddByte(byte b1, byte b2)1695 internal unsafe bool AddByte(byte b1, byte b2) 1696 { 1697 return (AddByte(b1, b2, 0)); 1698 } 1699 AddByte(byte b1, byte b2, int moreBytesExpected)1700 internal unsafe bool AddByte(byte b1, byte b2, int moreBytesExpected) 1701 { 1702 return (AddByte(b1, 1 + moreBytesExpected) && AddByte(b2, moreBytesExpected)); 1703 } 1704 AddByte(byte b1, byte b2, byte b3)1705 internal unsafe bool AddByte(byte b1, byte b2, byte b3) 1706 { 1707 return AddByte(b1, b2, b3, (int)0); 1708 } 1709 AddByte(byte b1, byte b2, byte b3, int moreBytesExpected)1710 internal unsafe bool AddByte(byte b1, byte b2, byte b3, int moreBytesExpected) 1711 { 1712 return (AddByte(b1, 2 + moreBytesExpected) && 1713 AddByte(b2, 1 + moreBytesExpected) && 1714 AddByte(b3, moreBytesExpected)); 1715 } 1716 AddByte(byte b1, byte b2, byte b3, byte b4)1717 internal unsafe bool AddByte(byte b1, byte b2, byte b3, byte b4) 1718 { 1719 return (AddByte(b1, 3) && 1720 AddByte(b2, 2) && 1721 AddByte(b3, 1) && 1722 AddByte(b4, 0)); 1723 } 1724 MovePrevious(bool bThrow)1725 internal unsafe void MovePrevious(bool bThrow) 1726 { 1727 if (fallbackBuffer.bFallingBack) 1728 fallbackBuffer.MovePrevious(); // don't use last fallback 1729 else 1730 { 1731 Debug.Assert(_chars > _charStart || 1732 ((bThrow == true) && (_bytes == _byteStart)), 1733 "[EncodingByteBuffer.MovePrevious]expected previous data or throw"); 1734 if (_chars > _charStart) 1735 _chars--; // don't use last char 1736 } 1737 1738 if (bThrow) 1739 _enc.ThrowBytesOverflow(_encoder, _bytes == _byteStart); // Throw? (and reset fallback if not converting) 1740 } 1741 Fallback(char charFallback)1742 internal unsafe bool Fallback(char charFallback) 1743 { 1744 // Do the fallback 1745 return fallbackBuffer.InternalFallback(charFallback, ref _chars); 1746 } 1747 1748 internal unsafe bool MoreData 1749 { 1750 get 1751 { 1752 // See if fallbackBuffer is not empty or if there's data left in chars buffer. 1753 return ((fallbackBuffer.Remaining > 0) || (_chars < _charEnd)); 1754 } 1755 } 1756 GetNextChar()1757 internal unsafe char GetNextChar() 1758 { 1759 // See if there's something in our fallback buffer 1760 char cReturn = fallbackBuffer.InternalGetNextChar(); 1761 1762 // Nothing in the fallback buffer, return our normal data. 1763 if (cReturn == 0) 1764 { 1765 if (_chars < _charEnd) 1766 cReturn = *(_chars++); 1767 } 1768 1769 return cReturn; 1770 } 1771 1772 internal unsafe int CharsUsed 1773 { 1774 get 1775 { 1776 return (int)(_chars - _charStart); 1777 } 1778 } 1779 1780 internal unsafe int Count 1781 { 1782 get 1783 { 1784 return _byteCountResult; 1785 } 1786 } 1787 } 1788 } 1789 } 1790