1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
4 
5 using System.Diagnostics;
6 using System.Globalization;
7 using System.Threading;
8 using System.Runtime.InteropServices;
9 using System.Runtime.Serialization;
10 using System.Diagnostics.CodeAnalysis;
11 
12 namespace System.Text
13 {
14     // This abstract base class represents a character encoding. The class provides
15     // methods to convert arrays and strings of Unicode characters to and from
16     // arrays of bytes. A number of Encoding implementations are provided in
17     // the System.Text package, including:
18     //
19     // ASCIIEncoding, which encodes Unicode characters as single 7-bit
20     // ASCII characters. This encoding only supports character values between 0x00
21     //     and 0x7F.
22     // BaseCodePageEncoding, which encapsulates a Windows code page. Any
23     //     installed code page can be accessed through this encoding, and conversions
24     //     are performed using the WideCharToMultiByte and
25     //     MultiByteToWideChar Windows API functions.
26     // UnicodeEncoding, which encodes each Unicode character as two
27     //    consecutive bytes. Both little-endian (code page 1200) and big-endian (code
28     //    page 1201) encodings are recognized.
29     // UTF7Encoding, which encodes Unicode characters using the UTF-7
30     //     encoding (UTF-7 stands for UCS Transformation Format, 7-bit form). This
31     //     encoding supports all Unicode character values, and can also be accessed
32     //     as code page 65000.
33     // UTF8Encoding, which encodes Unicode characters using the UTF-8
34     //     encoding (UTF-8 stands for UCS Transformation Format, 8-bit form). This
35     //     encoding supports all Unicode character values, and can also be accessed
36     //     as code page 65001.
37     // UTF32Encoding, both 12000 (little endian) & 12001 (big endian)
38     //
39     // In addition to directly instantiating Encoding objects, an
40     // application can use the ForCodePage, GetASCII,
41     // GetDefault, GetUnicode, GetUTF7, and GetUTF8
42     // methods in this class to obtain encodings.
43     //
44     // Through an encoding, the GetBytes method is used to convert arrays
45     // of characters to arrays of bytes, and the GetChars method is used to
46     // convert arrays of bytes to arrays of characters. The GetBytes and
47     // GetChars methods maintain no state between conversions, and are
48     // generally intended for conversions of complete blocks of bytes and
49     // characters in one operation. When the data to be converted is only available
50     // in sequential blocks (such as data read from a stream) or when the amount of
51     // data is so large that it needs to be divided into smaller blocks, an
52     // application may choose to use a Decoder or an Encoder to
53     // perform the conversion. Decoders and encoders allow sequential blocks of
54     // data to be converted and they maintain the state required to support
55     // conversions of data that spans adjacent blocks. Decoders and encoders are
56     // obtained using the GetDecoder and GetEncoder methods.
57     //
58     // The core GetBytes and GetChars methods require the caller
59     // to provide the destination buffer and ensure that the buffer is large enough
60     // to hold the entire result of the conversion. When using these methods,
61     // either directly on an Encoding object or on an associated
62     // Decoder or Encoder, an application can use one of two methods
63     // to allocate destination buffers.
64     //
65     // The GetByteCount and GetCharCount methods can be used to
66     // compute the exact size of the result of a particular conversion, and an
67     // appropriately sized buffer for that conversion can then be allocated.
68     // The GetMaxByteCount and GetMaxCharCount methods can be
69     // be used to compute the maximum possible size of a conversion of a given
70     // number of bytes or characters, and a buffer of that size can then be reused
71     // for multiple conversions.
72     //
73     // The first method generally uses less memory, whereas the second method
74     // generally executes faster.
75     //
76 
77     public abstract class Encoding : ICloneable
78     {
79         // For netcore we use UTF8 as default encoding since ANSI isn't available
80         private static readonly UTF8Encoding.UTF8EncodingSealed s_defaultEncoding  = new UTF8Encoding.UTF8EncodingSealed(encoderShouldEmitUTF8Identifier: false);
81 
82         // Returns an encoding for the system's current ANSI code page.
83         public static Encoding Default => s_defaultEncoding;
84 
85         //
86         // The following values are from mlang.idl.  These values
87         // should be in sync with those in mlang.idl.
88         //
89         internal const int MIMECONTF_MAILNEWS = 0x00000001;
90         internal const int MIMECONTF_BROWSER = 0x00000002;
91         internal const int MIMECONTF_SAVABLE_MAILNEWS = 0x00000100;
92         internal const int MIMECONTF_SAVABLE_BROWSER = 0x00000200;
93 
94         // Special Case Code Pages
95         private const int CodePageDefault = 0;
96         private const int CodePageNoOEM = 1;        // OEM Code page not supported
97         private const int CodePageNoMac = 2;        // MAC code page not supported
98         private const int CodePageNoThread = 3;        // Thread code page not supported
99         private const int CodePageNoSymbol = 42;       // Symbol code page not supported
100         private const int CodePageUnicode = 1200;     // Unicode
101         private const int CodePageBigEndian = 1201;     // Big Endian Unicode
102         private const int CodePageWindows1252 = 1252;     // Windows 1252 code page
103 
104         // 20936 has same code page as 10008, so we'll special case it
105         private const int CodePageMacGB2312 = 10008;
106         private const int CodePageGB2312 = 20936;
107         private const int CodePageMacKorean = 10003;
108         private const int CodePageDLLKorean = 20949;
109 
110         // ISO 2022 Code Pages
111         private const int ISO2022JP = 50220;
112         private const int ISO2022JPESC = 50221;
113         private const int ISO2022JPSISO = 50222;
114         private const int ISOKorean = 50225;
115         private const int ISOSimplifiedCN = 50227;
116         private const int EUCJP = 51932;
117         private const int ChineseHZ = 52936;    // HZ has ~}~{~~ sequences
118 
119         // 51936 is the same as 936
120         private const int DuplicateEUCCN = 51936;
121         private const int EUCCN = 936;
122 
123         private const int EUCKR = 51949;
124 
125         // Latin 1 & ASCII Code Pages
126         internal const int CodePageASCII = 20127;    // ASCII
127         internal const int ISO_8859_1 = 28591;    // Latin1
128 
129         // ISCII
130         private const int ISCIIAssemese = 57006;
131         private const int ISCIIBengali = 57003;
132         private const int ISCIIDevanagari = 57002;
133         private const int ISCIIGujarathi = 57010;
134         private const int ISCIIKannada = 57008;
135         private const int ISCIIMalayalam = 57009;
136         private const int ISCIIOriya = 57007;
137         private const int ISCIIPanjabi = 57011;
138         private const int ISCIITamil = 57004;
139         private const int ISCIITelugu = 57005;
140 
141         // GB18030
142         private const int GB18030 = 54936;
143 
144         // Other
145         private const int ISO_8859_8I = 38598;
146         private const int ISO_8859_8_Visual = 28598;
147 
148         // 50229 is currently unsupported // "Chinese Traditional (ISO-2022)"
149         private const int ENC50229 = 50229;
150 
151         // Special code pages
152         private const int CodePageUTF7 = 65000;
153         private const int CodePageUTF8 = 65001;
154         private const int CodePageUTF32 = 12000;
155         private const int CodePageUTF32BE = 12001;
156 
157         internal int _codePage = 0;
158 
159         internal CodePageDataItem _dataItem = null;
160 
161         // Because of encoders we may be read only
162         [OptionalField(VersionAdded = 2)]
163         private bool _isReadOnly = true;
164 
165         // Encoding (encoder) fallback
166         internal EncoderFallback encoderFallback = null;
167         internal DecoderFallback decoderFallback = null;
168 
Encoding()169         protected Encoding() : this(0)
170         {
171         }
172 
173 
Encoding(int codePage)174         protected Encoding(int codePage)
175         {
176             // Validate code page
177             if (codePage < 0)
178             {
179                 throw new ArgumentOutOfRangeException(nameof(codePage));
180             }
181 
182             // Remember code page
183             _codePage = codePage;
184 
185             // Use default encoder/decoder fallbacks
186             this.SetDefaultFallbacks();
187         }
188 
189         // This constructor is needed to allow any sub-classing implementation to provide encoder/decoder fallback objects
190         // because the encoding object is always created as read-only object and don't allow setting encoder/decoder fallback
191         // after the creation is done.
Encoding(int codePage, EncoderFallback encoderFallback, DecoderFallback decoderFallback)192         protected Encoding(int codePage, EncoderFallback encoderFallback, DecoderFallback decoderFallback)
193         {
194             // Validate code page
195             if (codePage < 0)
196             {
197                 throw new ArgumentOutOfRangeException(nameof(codePage));
198             }
199 
200             // Remember code page
201             _codePage = codePage;
202 
203             this.encoderFallback = encoderFallback ?? new InternalEncoderBestFitFallback(this);
204             this.decoderFallback = decoderFallback ?? new InternalDecoderBestFitFallback(this);
205         }
206 
207         // Default fallback that we'll use.
SetDefaultFallbacks()208         internal virtual void SetDefaultFallbacks()
209         {
210             // For UTF-X encodings, we use a replacement fallback with an "\xFFFD" string,
211             // For ASCII we use "?" replacement fallback, etc.
212             encoderFallback = new InternalEncoderBestFitFallback(this);
213             decoderFallback = new InternalDecoderBestFitFallback(this);
214         }
215 
216         // Converts a byte array from one encoding to another. The bytes in the
217         // bytes array are converted from srcEncoding to
218         // dstEncoding, and the returned value is a new byte array
219         // containing the result of the conversion.
220         //
Convert(Encoding srcEncoding, Encoding dstEncoding, byte[] bytes)221         public static byte[] Convert(Encoding srcEncoding, Encoding dstEncoding,
222             byte[] bytes)
223         {
224             if (bytes == null)
225                 throw new ArgumentNullException(nameof(bytes));
226 
227             return Convert(srcEncoding, dstEncoding, bytes, 0, bytes.Length);
228         }
229 
230         // Converts a range of bytes in a byte array from one encoding to another.
231         // This method converts count bytes from bytes starting at
232         // index index from srcEncoding to dstEncoding, and
233         // returns a new byte array containing the result of the conversion.
234         //
Convert(Encoding srcEncoding, Encoding dstEncoding, byte[] bytes, int index, int count)235         public static byte[] Convert(Encoding srcEncoding, Encoding dstEncoding,
236             byte[] bytes, int index, int count)
237         {
238             if (srcEncoding == null || dstEncoding == null)
239             {
240                 throw new ArgumentNullException((srcEncoding == null ? nameof(srcEncoding) : nameof(dstEncoding)),
241                     SR.ArgumentNull_Array);
242             }
243             if (bytes == null)
244             {
245                 throw new ArgumentNullException(nameof(bytes),
246                     SR.ArgumentNull_Array);
247             }
248 
249             return dstEncoding.GetBytes(srcEncoding.GetChars(bytes, index, count));
250         }
251 
RegisterProvider(EncodingProvider provider)252         public static void RegisterProvider(EncodingProvider provider)
253         {
254             // Parameters validated inside EncodingProvider
255             EncodingProvider.AddProvider(provider);
256         }
257 
GetEncoding(int codepage)258         public static Encoding GetEncoding(int codepage)
259         {
260             Encoding result = EncodingProvider.GetEncodingFromProvider(codepage);
261             if (result != null)
262                 return result;
263 
264             //
265             // NOTE: If you add a new encoding that can be retrieved by codepage, be sure to
266             // add the corresponding item in EncodingTable.
267             // Otherwise, the code below will throw exception when trying to call
268             // EncodingTable.GetDataItem().
269             //
270             if (codepage < 0 || codepage > 65535)
271             {
272                 throw new ArgumentOutOfRangeException(
273                     nameof(codepage), SR.Format(SR.ArgumentOutOfRange_Range, 0, 65535));
274             }
275 
276 
277             switch (codepage)
278             {
279                 case CodePageDefault: return Default;            // 0
280                 case CodePageUnicode: return Unicode;            // 1200
281                 case CodePageBigEndian: return BigEndianUnicode; // 1201
282                 case CodePageUTF32: return UTF32;                // 12000
283                 case CodePageUTF32BE: return BigEndianUTF32;     // 12001
284                 case CodePageUTF7: return UTF7;                  // 65000
285                 case CodePageUTF8: return UTF8;                  // 65001
286                 case CodePageASCII: return ASCII;                // 20127
287                 case ISO_8859_1: return Latin1;                  // 28591
288 
289                 // We don't allow the following special code page values that Win32 allows.
290                 case CodePageNoOEM:                              // 1 CP_OEMCP
291                 case CodePageNoMac:                              // 2 CP_MACCP
292                 case CodePageNoThread:                           // 3 CP_THREAD_ACP
293                 case CodePageNoSymbol:                           // 42 CP_SYMBOL
294                     throw new ArgumentException(SR.Format(SR.Argument_CodepageNotSupported, codepage), nameof(codepage));
295             }
296 
297             // Is it a valid code page?
298             if (EncodingTable.GetCodePageDataItem(codepage) == null)
299             {
300                 throw new NotSupportedException(
301                     SR.Format(SR.NotSupported_NoCodepageData, codepage));
302             }
303 
304             return UTF8;
305         }
306 
GetEncoding(int codepage, EncoderFallback encoderFallback, DecoderFallback decoderFallback)307         public static Encoding GetEncoding(int codepage,
308             EncoderFallback encoderFallback, DecoderFallback decoderFallback)
309         {
310             Encoding baseEncoding = EncodingProvider.GetEncodingFromProvider(codepage, encoderFallback, decoderFallback);
311 
312             if (baseEncoding != null)
313                 return baseEncoding;
314 
315             // Get the default encoding (which is cached and read only)
316             baseEncoding = GetEncoding(codepage);
317 
318             // Clone it and set the fallback
319             Encoding fallbackEncoding = (Encoding)baseEncoding.Clone();
320             fallbackEncoding.EncoderFallback = encoderFallback;
321             fallbackEncoding.DecoderFallback = decoderFallback;
322 
323             return fallbackEncoding;
324         }
325 
326         // Returns an Encoding object for a given name or a given code page value.
327         //
GetEncoding(String name)328         public static Encoding GetEncoding(String name)
329         {
330             Encoding baseEncoding = EncodingProvider.GetEncodingFromProvider(name);
331             if (baseEncoding != null)
332                 return baseEncoding;
333 
334             //
335             // NOTE: If you add a new encoding that can be requested by name, be sure to
336             // add the corresponding item in EncodingTable.
337             // Otherwise, the code below will throw exception when trying to call
338             // EncodingTable.GetCodePageFromName().
339             //
340             return GetEncoding(EncodingTable.GetCodePageFromName(name));
341         }
342 
343         // Returns an Encoding object for a given name or a given code page value.
344         //
GetEncoding(String name, EncoderFallback encoderFallback, DecoderFallback decoderFallback)345         public static Encoding GetEncoding(String name,
346             EncoderFallback encoderFallback, DecoderFallback decoderFallback)
347         {
348             Encoding baseEncoding = EncodingProvider.GetEncodingFromProvider(name, encoderFallback, decoderFallback);
349             if (baseEncoding != null)
350                 return baseEncoding;
351 
352             //
353             // NOTE: If you add a new encoding that can be requested by name, be sure to
354             // add the corresponding item in EncodingTable.
355             // Otherwise, the code below will throw exception when trying to call
356             // EncodingTable.GetCodePageFromName().
357             //
358             return (GetEncoding(EncodingTable.GetCodePageFromName(name), encoderFallback, decoderFallback));
359         }
360 
361         // Return a list of all EncodingInfo objects describing all of our encodings
GetEncodings()362         public static EncodingInfo[] GetEncodings()
363         {
364             return EncodingTable.GetEncodings();
365         }
366 
GetPreamble()367         public virtual byte[] GetPreamble()
368         {
369             return Array.Empty<byte>();
370         }
371 
372         public virtual ReadOnlySpan<byte> Preamble => GetPreamble();
373 
GetDataItem()374         private void GetDataItem()
375         {
376             if (_dataItem == null)
377             {
378                 _dataItem = EncodingTable.GetCodePageDataItem(_codePage);
379                 if (_dataItem == null)
380                 {
381                     throw new NotSupportedException(SR.Format(SR.NotSupported_NoCodepageData, _codePage));
382                 }
383             }
384         }
385 
386         // Returns the name for this encoding that can be used with mail agent body tags.
387         // If the encoding may not be used, the string is empty.
388 
389         public virtual String BodyName
390         {
391             get
392             {
393                 if (_dataItem == null)
394                 {
395                     GetDataItem();
396                 }
397                 return (_dataItem.BodyName);
398             }
399         }
400 
401         // Returns the human-readable description of the encoding ( e.g. Hebrew (DOS)).
402 #if PROJECTN
403         public virtual String EncodingName
404         {
405             get
406             {
407                 string encodingName = GetLocalizedEncodingNameResource(this.CodePage);
408                 if (encodingName == null)
409                 {
410                     throw new NotSupportedException(SR.Format(SR.MissingEncodingNameResource, this.CodePage));
411                 }
412 
413                 if (encodingName.StartsWith("Globalization_cp_", StringComparison.Ordinal))
414                 {
415                     // On ProjectN, resource strings are stripped from retail builds and replaced by
416                     // their identifier names. Since this property is meant to be a localized string,
417                     // but we don't localize ProjectN, we specifically need to do something reasonable
418                     // in this case. This currently returns the English name of the encoding from a
419                     // static data table.
420                     encodingName = EncodingTable.GetCodePageDataItem(this.CodePage).EnglishName;
421                     if (encodingName == null)
422                     {
423                         throw new NotSupportedException(SR.Format(SR.MissingEncodingNameResource, this.WebName, this.CodePage));
424                     }
425                 }
426                 return encodingName;
427             }
428         }
429 
GetLocalizedEncodingNameResource(int codePage)430         private static string GetLocalizedEncodingNameResource(int codePage)
431         {
432             switch (codePage)
433             {
434                 case 1200: return SR.Globalization_cp_1200;
435                 case 1201: return SR.Globalization_cp_1201;
436                 case 12000: return SR.Globalization_cp_12000;
437                 case 12001: return SR.Globalization_cp_12001;
438                 case 20127: return SR.Globalization_cp_20127;
439                 case 28591: return SR.Globalization_cp_28591;
440                 case 65000: return SR.Globalization_cp_65000;
441                 case 65001: return SR.Globalization_cp_65001;
442                 default: return null;
443             }
444         }
445 #else
446         public virtual String EncodingName
447         {
448             get
449             {
450                 return SR.GetResourceString("Globalization_cp_" + _codePage.ToString());
451             }
452         }
453 #endif
454         // Returns the name for this encoding that can be used with mail agent header
455         // tags.  If the encoding may not be used, the string is empty.
456 
457         public virtual String HeaderName
458         {
459             get
460             {
461                 if (_dataItem == null)
462                 {
463                     GetDataItem();
464                 }
465                 return (_dataItem.HeaderName);
466             }
467         }
468 
469         // Returns the IANA preferred name for this encoding.
470         public virtual String WebName
471         {
472             get
473             {
474                 if (_dataItem == null)
475                 {
476                     GetDataItem();
477                 }
478                 return (_dataItem.WebName);
479             }
480         }
481 
482         // Returns the windows code page that most closely corresponds to this encoding.
483 
484         public virtual int WindowsCodePage
485         {
486             get
487             {
488                 if (_dataItem == null)
489                 {
490                     GetDataItem();
491                 }
492                 return (_dataItem.UIFamilyCodePage);
493             }
494         }
495 
496 
497         // True if and only if the encoding is used for display by browsers clients.
498 
499         public virtual bool IsBrowserDisplay
500         {
501             get
502             {
503                 if (_dataItem == null)
504                 {
505                     GetDataItem();
506                 }
507                 return ((_dataItem.Flags & MIMECONTF_BROWSER) != 0);
508             }
509         }
510 
511         // True if and only if the encoding is used for saving by browsers clients.
512 
513         public virtual bool IsBrowserSave
514         {
515             get
516             {
517                 if (_dataItem == null)
518                 {
519                     GetDataItem();
520                 }
521                 return ((_dataItem.Flags & MIMECONTF_SAVABLE_BROWSER) != 0);
522             }
523         }
524 
525         // True if and only if the encoding is used for display by mail and news clients.
526 
527         public virtual bool IsMailNewsDisplay
528         {
529             get
530             {
531                 if (_dataItem == null)
532                 {
533                     GetDataItem();
534                 }
535                 return ((_dataItem.Flags & MIMECONTF_MAILNEWS) != 0);
536             }
537         }
538 
539 
540         // True if and only if the encoding is used for saving documents by mail and
541         // news clients
542 
543         public virtual bool IsMailNewsSave
544         {
545             get
546             {
547                 if (_dataItem == null)
548                 {
549                     GetDataItem();
550                 }
551                 return ((_dataItem.Flags & MIMECONTF_SAVABLE_MAILNEWS) != 0);
552             }
553         }
554 
555         // True if and only if the encoding only uses single byte code points.  (Ie, ASCII, 1252, etc)
556 
557         public virtual bool IsSingleByte
558         {
559             get
560             {
561                 return false;
562             }
563         }
564 
565 
566         public EncoderFallback EncoderFallback
567         {
568             get
569             {
570                 return encoderFallback;
571             }
572 
573             set
574             {
575                 if (this.IsReadOnly)
576                     throw new InvalidOperationException(SR.InvalidOperation_ReadOnly);
577 
578                 if (value == null)
579                     throw new ArgumentNullException(nameof(value));
580 
581                 encoderFallback = value;
582             }
583         }
584 
585 
586         public DecoderFallback DecoderFallback
587         {
588             get
589             {
590                 return decoderFallback;
591             }
592 
593             set
594             {
595                 if (this.IsReadOnly)
596                     throw new InvalidOperationException(SR.InvalidOperation_ReadOnly);
597 
598                 if (value == null)
599                     throw new ArgumentNullException(nameof(value));
600 
601                 decoderFallback = value;
602             }
603         }
604 
605 
Clone()606         public virtual Object Clone()
607         {
608             Encoding newEncoding = (Encoding)this.MemberwiseClone();
609 
610             // New one should be readable
611             newEncoding._isReadOnly = false;
612             return newEncoding;
613         }
614 
615 
616         public bool IsReadOnly
617         {
618             get
619             {
620                 return (_isReadOnly);
621             }
622         }
623 
624         // Returns an encoding for the ASCII character set. The returned encoding
625         // will be an instance of the ASCIIEncoding class.
626 
627         public static Encoding ASCII => ASCIIEncoding.s_default;
628 
629         // Returns an encoding for the Latin1 character set. The returned encoding
630         // will be an instance of the Latin1Encoding class.
631         //
632         // This is for our optimizations
633         private static Encoding Latin1 => Latin1Encoding.s_default;
634 
635         // Returns the number of bytes required to encode the given character
636         // array.
637         //
GetByteCount(char[] chars)638         public virtual int GetByteCount(char[] chars)
639         {
640             if (chars == null)
641             {
642                 throw new ArgumentNullException(nameof(chars),
643                     SR.ArgumentNull_Array);
644             }
645 
646             return GetByteCount(chars, 0, chars.Length);
647         }
648 
GetByteCount(String s)649         public virtual int GetByteCount(String s)
650         {
651             if (s == null)
652                 throw new ArgumentNullException(nameof(s));
653 
654             char[] chars = s.ToCharArray();
655             return GetByteCount(chars, 0, chars.Length);
656         }
657 
658         // Returns the number of bytes required to encode a range of characters in
659         // a character array.
660         //
GetByteCount(char[] chars, int index, int count)661         public abstract int GetByteCount(char[] chars, int index, int count);
662 
663         // Returns the number of bytes required to encode a string range.
664         //
GetByteCount(string s, int index, int count)665         public int GetByteCount(string s, int index, int count)
666         {
667             if (s == null)
668                 throw new ArgumentNullException(nameof(s),
669                     SR.ArgumentNull_String);
670             if (index < 0)
671                 throw new ArgumentOutOfRangeException(nameof(index),
672                       SR.ArgumentOutOfRange_NeedNonNegNum);
673             if (count < 0)
674                 throw new ArgumentOutOfRangeException(nameof(count),
675                       SR.ArgumentOutOfRange_NeedNonNegNum);
676             if (index > s.Length - count)
677                 throw new ArgumentOutOfRangeException(nameof(index),
678                       SR.ArgumentOutOfRange_IndexCount);
679 
680             unsafe
681             {
682                 fixed (char* pChar = s)
683                 {
684                     return GetByteCount(pChar + index, count);
685                 }
686             }
687         }
688 
689         // We expect this to be the workhorse for NLS encodings
690         // unfortunately for existing overrides, it has to call the [] version,
691         // which is really slow, so this method should be avoided if you're calling
692         // a 3rd party encoding.
693         [CLSCompliant(false)]
GetByteCount(char* chars, int count)694         public virtual unsafe int GetByteCount(char* chars, int count)
695         {
696             // Validate input parameters
697             if (chars == null)
698                 throw new ArgumentNullException(nameof(chars),
699                       SR.ArgumentNull_Array);
700 
701             if (count < 0)
702                 throw new ArgumentOutOfRangeException(nameof(count),
703                       SR.ArgumentOutOfRange_NeedNonNegNum);
704 
705             char[] arrChar = new char[count];
706             int index;
707 
708             for (index = 0; index < count; index++)
709                 arrChar[index] = chars[index];
710 
711             return GetByteCount(arrChar, 0, count);
712         }
713 
GetByteCount(ReadOnlySpan<char> chars)714         public virtual unsafe int GetByteCount(ReadOnlySpan<char> chars)
715         {
716             fixed (char* charsPtr = &MemoryMarshal.GetReference(chars))
717             {
718                 return GetByteCount(charsPtr, chars.Length);
719             }
720         }
721 
722         // For NLS Encodings, workhorse takes an encoder (may be null)
723         // Always validate parameters before calling internal version, which will only assert.
GetByteCount(char* chars, int count, EncoderNLS encoder)724         internal virtual unsafe int GetByteCount(char* chars, int count, EncoderNLS encoder)
725         {
726             Debug.Assert(chars != null);
727             Debug.Assert(count >= 0);
728 
729             return GetByteCount(chars, count);
730         }
731 
732         // Returns a byte array containing the encoded representation of the given
733         // character array.
734         //
GetBytes(char[] chars)735         public virtual byte[] GetBytes(char[] chars)
736         {
737             if (chars == null)
738             {
739                 throw new ArgumentNullException(nameof(chars),
740                     SR.ArgumentNull_Array);
741             }
742             return GetBytes(chars, 0, chars.Length);
743         }
744 
745         // Returns a byte array containing the encoded representation of a range
746         // of characters in a character array.
747         //
GetBytes(char[] chars, int index, int count)748         public virtual byte[] GetBytes(char[] chars, int index, int count)
749         {
750             byte[] result = new byte[GetByteCount(chars, index, count)];
751             GetBytes(chars, index, count, result, 0);
752             return result;
753         }
754 
755         // Encodes a range of characters in a character array into a range of bytes
756         // in a byte array. An exception occurs if the byte array is not large
757         // enough to hold the complete encoding of the characters. The
758         // GetByteCount method can be used to determine the exact number of
759         // bytes that will be produced for a given range of characters.
760         // Alternatively, the GetMaxByteCount method can be used to
761         // determine the maximum number of bytes that will be produced for a given
762         // number of characters, regardless of the actual character values.
763         //
GetBytes(char[] chars, int charIndex, int charCount, byte[] bytes, int byteIndex)764         public abstract int GetBytes(char[] chars, int charIndex, int charCount,
765             byte[] bytes, int byteIndex);
766 
767         // Returns a byte array containing the encoded representation of the given
768         // string.
769         //
GetBytes(String s)770         public virtual byte[] GetBytes(String s)
771         {
772             if (s == null)
773                 throw new ArgumentNullException(nameof(s),
774                     SR.ArgumentNull_String);
775 
776             int byteCount = GetByteCount(s);
777             byte[] bytes = new byte[byteCount];
778             int bytesReceived = GetBytes(s, 0, s.Length, bytes, 0);
779             Debug.Assert(byteCount == bytesReceived);
780             return bytes;
781         }
782 
783         // Returns a byte array containing the encoded representation of the given
784         // string range.
785         //
GetBytes(string s, int index, int count)786         public byte[] GetBytes(string s, int index, int count)
787         {
788             if (s == null)
789                 throw new ArgumentNullException(nameof(s),
790                     SR.ArgumentNull_String);
791             if (index < 0)
792                 throw new ArgumentOutOfRangeException(nameof(index),
793                       SR.ArgumentOutOfRange_NeedNonNegNum);
794             if (count < 0)
795                 throw new ArgumentOutOfRangeException(nameof(count),
796                       SR.ArgumentOutOfRange_NeedNonNegNum);
797             if (index > s.Length - count)
798                 throw new ArgumentOutOfRangeException(nameof(index),
799                       SR.ArgumentOutOfRange_IndexCount);
800 
801             unsafe
802             {
803                 fixed (char* pChar = s)
804                 {
805                     int byteCount = GetByteCount(pChar + index, count);
806                     if (byteCount == 0)
807                         return Array.Empty<byte>();
808 
809                     byte[] bytes = new byte[byteCount];
810                     fixed (byte* pBytes = &bytes[0])
811                     {
812                         int bytesReceived = GetBytes(pChar + index, count, pBytes, byteCount);
813                         Debug.Assert(byteCount == bytesReceived);
814                     }
815                     return bytes;
816                 }
817             }
818         }
819 
GetBytes(String s, int charIndex, int charCount, byte[] bytes, int byteIndex)820         public virtual int GetBytes(String s, int charIndex, int charCount,
821                                        byte[] bytes, int byteIndex)
822         {
823             if (s == null)
824                 throw new ArgumentNullException(nameof(s));
825             return GetBytes(s.ToCharArray(), charIndex, charCount, bytes, byteIndex);
826         }
827 
828         // This is our internal workhorse
829         // Always validate parameters before calling internal version, which will only assert.
GetBytes(char* chars, int charCount, byte* bytes, int byteCount, EncoderNLS encoder)830         internal virtual unsafe int GetBytes(char* chars, int charCount,
831                                                 byte* bytes, int byteCount, EncoderNLS encoder)
832         {
833             return GetBytes(chars, charCount, bytes, byteCount);
834         }
835 
836         // We expect this to be the workhorse for NLS Encodings, but for existing
837         // ones we need a working (if slow) default implementation)
838         //
839         // WARNING WARNING WARNING
840         //
841         // WARNING: If this breaks it could be a security threat.  Obviously we
842         // call this internally, so you need to make sure that your pointers, counts
843         // and indexes are correct when you call this method.
844         //
845         // In addition, we have internal code, which will be marked as "safe" calling
846         // this code.  However this code is dependent upon the implementation of an
847         // external GetBytes() method, which could be overridden by a third party and
848         // the results of which cannot be guaranteed.  We use that result to copy
849         // the byte[] to our byte* output buffer.  If the result count was wrong, we
850         // could easily overflow our output buffer.  Therefore we do an extra test
851         // when we copy the buffer so that we don't overflow byteCount either.
852 
853         [CLSCompliant(false)]
GetBytes(char* chars, int charCount, byte* bytes, int byteCount)854         public virtual unsafe int GetBytes(char* chars, int charCount,
855                                               byte* bytes, int byteCount)
856         {
857             // Validate input parameters
858             if (bytes == null || chars == null)
859                 throw new ArgumentNullException(bytes == null ? nameof(bytes) : nameof(chars),
860                     SR.ArgumentNull_Array);
861 
862             if (charCount < 0 || byteCount < 0)
863                 throw new ArgumentOutOfRangeException((charCount < 0 ? nameof(charCount) : nameof(byteCount)),
864                     SR.ArgumentOutOfRange_NeedNonNegNum);
865 
866             // Get the char array to convert
867             char[] arrChar = new char[charCount];
868 
869             int index;
870             for (index = 0; index < charCount; index++)
871                 arrChar[index] = chars[index];
872 
873             // Get the byte array to fill
874             byte[] arrByte = new byte[byteCount];
875 
876             // Do the work
877             int result = GetBytes(arrChar, 0, charCount, arrByte, 0);
878 
879             Debug.Assert(result <= byteCount, "[Encoding.GetBytes]Returned more bytes than we have space for");
880 
881             // Copy the byte array
882             // WARNING: We MUST make sure that we don't copy too many bytes.  We can't
883             // rely on result because it could be a 3rd party implementation.  We need
884             // to make sure we never copy more than byteCount bytes no matter the value
885             // of result
886             if (result < byteCount)
887                 byteCount = result;
888 
889             // Copy the data, don't overrun our array!
890             for (index = 0; index < byteCount; index++)
891                 bytes[index] = arrByte[index];
892 
893             return byteCount;
894         }
895 
GetBytes(ReadOnlySpan<char> chars, Span<byte> bytes)896         public virtual unsafe int GetBytes(ReadOnlySpan<char> chars, Span<byte> bytes)
897         {
898             fixed (char* charsPtr = &MemoryMarshal.GetReference(chars))
899             fixed (byte* bytesPtr = &MemoryMarshal.GetReference(bytes))
900             {
901                 return GetBytes(charsPtr, chars.Length, bytesPtr, bytes.Length);
902             }
903         }
904 
905         // Returns the number of characters produced by decoding the given byte
906         // array.
907         //
GetCharCount(byte[] bytes)908         public virtual int GetCharCount(byte[] bytes)
909         {
910             if (bytes == null)
911             {
912                 throw new ArgumentNullException(nameof(bytes),
913                     SR.ArgumentNull_Array);
914             }
915             return GetCharCount(bytes, 0, bytes.Length);
916         }
917 
918         // Returns the number of characters produced by decoding a range of bytes
919         // in a byte array.
920         //
GetCharCount(byte[] bytes, int index, int count)921         public abstract int GetCharCount(byte[] bytes, int index, int count);
922 
923         // We expect this to be the workhorse for NLS Encodings, but for existing
924         // ones we need a working (if slow) default implementation)
925         [CLSCompliant(false)]
GetCharCount(byte* bytes, int count)926         public virtual unsafe int GetCharCount(byte* bytes, int count)
927         {
928             // Validate input parameters
929             if (bytes == null)
930                 throw new ArgumentNullException(nameof(bytes),
931                       SR.ArgumentNull_Array);
932 
933             if (count < 0)
934                 throw new ArgumentOutOfRangeException(nameof(count),
935                       SR.ArgumentOutOfRange_NeedNonNegNum);
936 
937             byte[] arrbyte = new byte[count];
938             int index;
939 
940             for (index = 0; index < count; index++)
941                 arrbyte[index] = bytes[index];
942 
943             return GetCharCount(arrbyte, 0, count);
944         }
945 
GetCharCount(ReadOnlySpan<byte> bytes)946         public virtual unsafe int GetCharCount(ReadOnlySpan<byte> bytes)
947         {
948             fixed (byte* bytesPtr = &MemoryMarshal.GetReference(bytes))
949             {
950                 return GetCharCount(bytesPtr, bytes.Length);
951             }
952         }
953 
954         // This is our internal workhorse
955         // Always validate parameters before calling internal version, which will only assert.
GetCharCount(byte* bytes, int count, DecoderNLS decoder)956         internal virtual unsafe int GetCharCount(byte* bytes, int count, DecoderNLS decoder)
957         {
958             return GetCharCount(bytes, count);
959         }
960 
961         // Returns a character array containing the decoded representation of a
962         // given byte array.
963         //
GetChars(byte[] bytes)964         public virtual char[] GetChars(byte[] bytes)
965         {
966             if (bytes == null)
967             {
968                 throw new ArgumentNullException(nameof(bytes),
969                     SR.ArgumentNull_Array);
970             }
971             return GetChars(bytes, 0, bytes.Length);
972         }
973 
974         // Returns a character array containing the decoded representation of a
975         // range of bytes in a byte array.
976         //
GetChars(byte[] bytes, int index, int count)977         public virtual char[] GetChars(byte[] bytes, int index, int count)
978         {
979             char[] result = new char[GetCharCount(bytes, index, count)];
980             GetChars(bytes, index, count, result, 0);
981             return result;
982         }
983 
984         // Decodes a range of bytes in a byte array into a range of characters in a
985         // character array. An exception occurs if the character array is not large
986         // enough to hold the complete decoding of the bytes. The
987         // GetCharCount method can be used to determine the exact number of
988         // characters that will be produced for a given range of bytes.
989         // Alternatively, the GetMaxCharCount method can be used to
990         // determine the maximum number of characters that will be produced for a
991         // given number of bytes, regardless of the actual byte values.
992         //
993 
GetChars(byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex)994         public abstract int GetChars(byte[] bytes, int byteIndex, int byteCount,
995                                        char[] chars, int charIndex);
996 
997 
998         // We expect this to be the workhorse for NLS Encodings, but for existing
999         // ones we need a working (if slow) default implementation)
1000         //
1001         // WARNING WARNING WARNING
1002         //
1003         // WARNING: If this breaks it could be a security threat.  Obviously we
1004         // call this internally, so you need to make sure that your pointers, counts
1005         // and indexes are correct when you call this method.
1006         //
1007         // In addition, we have internal code, which will be marked as "safe" calling
1008         // this code.  However this code is dependent upon the implementation of an
1009         // external GetChars() method, which could be overridden by a third party and
1010         // the results of which cannot be guaranteed.  We use that result to copy
1011         // the char[] to our char* output buffer.  If the result count was wrong, we
1012         // could easily overflow our output buffer.  Therefore we do an extra test
1013         // when we copy the buffer so that we don't overflow charCount either.
1014 
1015         [CLSCompliant(false)]
GetChars(byte* bytes, int byteCount, char* chars, int charCount)1016         public virtual unsafe int GetChars(byte* bytes, int byteCount,
1017                                               char* chars, int charCount)
1018         {
1019             // Validate input parameters
1020             if (chars == null || bytes == null)
1021                 throw new ArgumentNullException(chars == null ? nameof(chars) : nameof(bytes),
1022                     SR.ArgumentNull_Array);
1023 
1024             if (byteCount < 0 || charCount < 0)
1025                 throw new ArgumentOutOfRangeException((byteCount < 0 ? nameof(byteCount) : nameof(charCount)),
1026                     SR.ArgumentOutOfRange_NeedNonNegNum);
1027 
1028             // Get the byte array to convert
1029             byte[] arrByte = new byte[byteCount];
1030 
1031             int index;
1032             for (index = 0; index < byteCount; index++)
1033                 arrByte[index] = bytes[index];
1034 
1035             // Get the char array to fill
1036             char[] arrChar = new char[charCount];
1037 
1038             // Do the work
1039             int result = GetChars(arrByte, 0, byteCount, arrChar, 0);
1040 
1041             Debug.Assert(result <= charCount, "[Encoding.GetChars]Returned more chars than we have space for");
1042 
1043             // Copy the char array
1044             // WARNING: We MUST make sure that we don't copy too many chars.  We can't
1045             // rely on result because it could be a 3rd party implementation.  We need
1046             // to make sure we never copy more than charCount chars no matter the value
1047             // of result
1048             if (result < charCount)
1049                 charCount = result;
1050 
1051             // Copy the data, don't overrun our array!
1052             for (index = 0; index < charCount; index++)
1053                 chars[index] = arrChar[index];
1054 
1055             return charCount;
1056         }
1057 
GetChars(ReadOnlySpan<byte> bytes, Span<char> chars)1058         public virtual unsafe int GetChars(ReadOnlySpan<byte> bytes, Span<char> chars)
1059         {
1060             fixed (byte* bytesPtr = &MemoryMarshal.GetReference(bytes))
1061             fixed (char* charsPtr = &MemoryMarshal.GetReference(chars))
1062             {
1063                 return GetChars(bytesPtr, bytes.Length, charsPtr, chars.Length);
1064             }
1065         }
1066 
1067         // This is our internal workhorse
1068         // Always validate parameters before calling internal version, which will only assert.
GetChars(byte* bytes, int byteCount, char* chars, int charCount, DecoderNLS decoder)1069         internal virtual unsafe int GetChars(byte* bytes, int byteCount,
1070                                                 char* chars, int charCount, DecoderNLS decoder)
1071         {
1072             return GetChars(bytes, byteCount, chars, charCount);
1073         }
1074 
1075 
1076         [CLSCompliant(false)]
GetString(byte* bytes, int byteCount)1077         public unsafe string GetString(byte* bytes, int byteCount)
1078         {
1079             if (bytes == null)
1080                 throw new ArgumentNullException(nameof(bytes), SR.ArgumentNull_Array);
1081 
1082             if (byteCount < 0)
1083                 throw new ArgumentOutOfRangeException(nameof(byteCount), SR.ArgumentOutOfRange_NeedNonNegNum);
1084 
1085             return String.CreateStringFromEncoding(bytes, byteCount, this);
1086         }
1087 
GetString(ReadOnlySpan<byte> bytes)1088         public unsafe string GetString(ReadOnlySpan<byte> bytes)
1089         {
1090             fixed (byte* bytesPtr = &MemoryMarshal.GetReference(bytes))
1091             {
1092                 return GetString(bytesPtr, bytes.Length);
1093             }
1094         }
1095 
1096 
1097         // Returns the code page identifier of this encoding. The returned value is
1098         // an integer between 0 and 65535 if the encoding has a code page
1099         // identifier, or -1 if the encoding does not represent a code page.
1100         //
1101 
1102         public virtual int CodePage
1103         {
1104             get
1105             {
1106                 return _codePage;
1107             }
1108         }
1109 
1110         // IsAlwaysNormalized
1111         // Returns true if the encoding is always normalized for the specified encoding form
IsAlwaysNormalized()1112         public bool IsAlwaysNormalized()
1113         {
1114             return this.IsAlwaysNormalized(NormalizationForm.FormC);
1115         }
1116 
IsAlwaysNormalized(NormalizationForm form)1117         public virtual bool IsAlwaysNormalized(NormalizationForm form)
1118         {
1119             // Assume false unless the encoding knows otherwise
1120             return false;
1121         }
1122 
1123         // Returns a Decoder object for this encoding. The returned object
1124         // can be used to decode a sequence of bytes into a sequence of characters.
1125         // Contrary to the GetChars family of methods, a Decoder can
1126         // convert partial sequences of bytes into partial sequences of characters
1127         // by maintaining the appropriate state between the conversions.
1128         //
1129         // This default implementation returns a Decoder that simply
1130         // forwards calls to the GetCharCount and GetChars methods to
1131         // the corresponding methods of this encoding. Encodings that require state
1132         // to be maintained between successive conversions should override this
1133         // method and return an instance of an appropriate Decoder
1134         // implementation.
1135         //
1136 
GetDecoder()1137         public virtual Decoder GetDecoder()
1138         {
1139             return new DefaultDecoder(this);
1140         }
1141 
1142         // Returns an Encoder object for this encoding. The returned object
1143         // can be used to encode a sequence of characters into a sequence of bytes.
1144         // Contrary to the GetBytes family of methods, an Encoder can
1145         // convert partial sequences of characters into partial sequences of bytes
1146         // by maintaining the appropriate state between the conversions.
1147         //
1148         // This default implementation returns an Encoder that simply
1149         // forwards calls to the GetByteCount and GetBytes methods to
1150         // the corresponding methods of this encoding. Encodings that require state
1151         // to be maintained between successive conversions should override this
1152         // method and return an instance of an appropriate Encoder
1153         // implementation.
1154         //
1155 
GetEncoder()1156         public virtual Encoder GetEncoder()
1157         {
1158             return new DefaultEncoder(this);
1159         }
1160 
1161         // Returns the maximum number of bytes required to encode a given number of
1162         // characters. This method can be used to determine an appropriate buffer
1163         // size for byte arrays passed to the GetBytes method of this
1164         // encoding or the GetBytes method of an Encoder for this
1165         // encoding. All encodings must guarantee that no buffer overflow
1166         // exceptions will occur if buffers are sized according to the results of
1167         // this method.
1168         //
1169         // WARNING: If you're using something besides the default replacement encoder fallback,
1170         // then you could have more bytes than this returned from an actual call to GetBytes().
1171         //
GetMaxByteCount(int charCount)1172         public abstract int GetMaxByteCount(int charCount);
1173 
1174         // Returns the maximum number of characters produced by decoding a given
1175         // number of bytes. This method can be used to determine an appropriate
1176         // buffer size for character arrays passed to the GetChars method of
1177         // this encoding or the GetChars method of a Decoder for this
1178         // encoding. All encodings must guarantee that no buffer overflow
1179         // exceptions will occur if buffers are sized according to the results of
1180         // this method.
1181         //
GetMaxCharCount(int byteCount)1182         public abstract int GetMaxCharCount(int byteCount);
1183 
1184         // Returns a string containing the decoded representation of a given byte
1185         // array.
1186         //
GetString(byte[] bytes)1187         public virtual String GetString(byte[] bytes)
1188         {
1189             if (bytes == null)
1190                 throw new ArgumentNullException(nameof(bytes),
1191                     SR.ArgumentNull_Array);
1192 
1193             return GetString(bytes, 0, bytes.Length);
1194         }
1195 
1196         // Returns a string containing the decoded representation of a range of
1197         // bytes in a byte array.
1198         //
1199         // Internally we override this for performance
1200         //
GetString(byte[] bytes, int index, int count)1201         public virtual String GetString(byte[] bytes, int index, int count)
1202         {
1203             return new String(GetChars(bytes, index, count));
1204         }
1205 
1206         // Returns an encoding for Unicode format. The returned encoding will be
1207         // an instance of the UnicodeEncoding class.
1208         //
1209         // It will use little endian byte order, but will detect
1210         // input in big endian if it finds a byte order mark per Unicode 2.0.
1211 
1212         public static Encoding Unicode => UnicodeEncoding.s_littleEndianDefault;
1213 
1214         // Returns an encoding for Unicode format. The returned encoding will be
1215         // an instance of the UnicodeEncoding class.
1216         //
1217         // It will use big endian byte order, but will detect
1218         // input in little endian if it finds a byte order mark per Unicode 2.0.
1219 
1220         public static Encoding BigEndianUnicode => UnicodeEncoding.s_bigEndianDefault;
1221 
1222         // Returns an encoding for the UTF-7 format. The returned encoding will be
1223         // an instance of the UTF7Encoding class.
1224 
1225         public static Encoding UTF7 => UTF7Encoding.s_default;
1226 
1227         // Returns an encoding for the UTF-8 format. The returned encoding will be
1228         // an instance of the UTF8Encoding class.
1229 
1230         public static Encoding UTF8 => UTF8Encoding.s_default;
1231 
1232         // Returns an encoding for the UTF-32 format. The returned encoding will be
1233         // an instance of the UTF32Encoding class.
1234 
1235         public static Encoding UTF32 => UTF32Encoding.s_default;
1236 
1237         // Returns an encoding for the UTF-32 format. The returned encoding will be
1238         // an instance of the UTF32Encoding class.
1239         //
1240         // It will use big endian byte order.
1241 
1242         private static Encoding BigEndianUTF32 => UTF32Encoding.s_bigEndianDefault;
1243 
Equals(Object value)1244         public override bool Equals(Object value)
1245         {
1246             Encoding that = value as Encoding;
1247             if (that != null)
1248                 return (_codePage == that._codePage) &&
1249                        (EncoderFallback.Equals(that.EncoderFallback)) &&
1250                        (DecoderFallback.Equals(that.DecoderFallback));
1251             return (false);
1252         }
1253 
1254 
GetHashCode()1255         public override int GetHashCode()
1256         {
1257             return _codePage + this.EncoderFallback.GetHashCode() + this.DecoderFallback.GetHashCode();
1258         }
1259 
GetBestFitUnicodeToBytesData()1260         internal virtual char[] GetBestFitUnicodeToBytesData()
1261         {
1262             // Normally we don't have any best fit data.
1263             return Array.Empty<char>();
1264         }
1265 
GetBestFitBytesToUnicodeData()1266         internal virtual char[] GetBestFitBytesToUnicodeData()
1267         {
1268             // Normally we don't have any best fit data.
1269             return Array.Empty<char>();
1270         }
1271 
ThrowBytesOverflow()1272         internal void ThrowBytesOverflow()
1273         {
1274             // Special message to include fallback type in case fallback's GetMaxCharCount is broken
1275             // This happens if user has implemented an encoder fallback with a broken GetMaxCharCount
1276             throw new ArgumentException(
1277                 SR.Format(SR.Argument_EncodingConversionOverflowBytes, EncodingName, EncoderFallback.GetType()), "bytes");
1278         }
1279 
ThrowBytesOverflow(EncoderNLS encoder, bool nothingEncoded)1280         internal void ThrowBytesOverflow(EncoderNLS encoder, bool nothingEncoded)
1281         {
1282             if (encoder == null || encoder._throwOnOverflow || nothingEncoded)
1283             {
1284                 if (encoder != null && encoder.InternalHasFallbackBuffer)
1285                     encoder.FallbackBuffer.InternalReset();
1286                 // Special message to include fallback type in case fallback's GetMaxCharCount is broken
1287                 // This happens if user has implemented an encoder fallback with a broken GetMaxCharCount
1288                 ThrowBytesOverflow();
1289             }
1290 
1291             // If we didn't throw, we are in convert and have to remember our flushing
1292             encoder.ClearMustFlush();
1293         }
1294 
ThrowCharsOverflow()1295         internal void ThrowCharsOverflow()
1296         {
1297             // Special message to include fallback type in case fallback's GetMaxCharCount is broken
1298             // This happens if user has implemented a decoder fallback with a broken GetMaxCharCount
1299             throw new ArgumentException(
1300                 SR.Format(SR.Argument_EncodingConversionOverflowChars, EncodingName, DecoderFallback.GetType()), "chars");
1301         }
1302 
ThrowCharsOverflow(DecoderNLS decoder, bool nothingDecoded)1303         internal void ThrowCharsOverflow(DecoderNLS decoder, bool nothingDecoded)
1304         {
1305             if (decoder == null || decoder._throwOnOverflow || nothingDecoded)
1306             {
1307                 if (decoder != null && decoder.InternalHasFallbackBuffer)
1308                     decoder.FallbackBuffer.InternalReset();
1309 
1310                 // Special message to include fallback type in case fallback's GetMaxCharCount is broken
1311                 // This happens if user has implemented a decoder fallback with a broken GetMaxCharCount
1312                 ThrowCharsOverflow();
1313             }
1314 
1315             // If we didn't throw, we are in convert and have to remember our flushing
1316             decoder.ClearMustFlush();
1317         }
1318 
1319         internal sealed class DefaultEncoder : Encoder, IObjectReference
1320         {
1321             private Encoding _encoding;
1322 
DefaultEncoder(Encoding encoding)1323             public DefaultEncoder(Encoding encoding)
1324             {
1325                 _encoding = encoding;
1326             }
1327 
GetRealObject(StreamingContext context)1328             public Object GetRealObject(StreamingContext context)
1329             {
1330                 throw new PlatformNotSupportedException();
1331             }
1332 
1333             // Returns the number of bytes the next call to GetBytes will
1334             // produce if presented with the given range of characters and the given
1335             // value of the flush parameter. The returned value takes into
1336             // account the state in which the encoder was left following the last call
1337             // to GetBytes. The state of the encoder is not affected by a call
1338             // to this method.
1339             //
1340 
GetByteCount(char[] chars, int index, int count, bool flush)1341             public override int GetByteCount(char[] chars, int index, int count, bool flush)
1342             {
1343                 return _encoding.GetByteCount(chars, index, count);
1344             }
1345 
GetByteCount(char* chars, int count, bool flush)1346             public unsafe override int GetByteCount(char* chars, int count, bool flush)
1347             {
1348                 return _encoding.GetByteCount(chars, count);
1349             }
1350 
1351             // Encodes a range of characters in a character array into a range of bytes
1352             // in a byte array. The method encodes charCount characters from
1353             // chars starting at index charIndex, storing the resulting
1354             // bytes in bytes starting at index byteIndex. The encoding
1355             // takes into account the state in which the encoder was left following the
1356             // last call to this method. The flush parameter indicates whether
1357             // the encoder should flush any shift-states and partial characters at the
1358             // end of the conversion. To ensure correct termination of a sequence of
1359             // blocks of encoded bytes, the last call to GetBytes should specify
1360             // a value of true for the flush parameter.
1361             //
1362             // An exception occurs if the byte array is not large enough to hold the
1363             // complete encoding of the characters. The GetByteCount method can
1364             // be used to determine the exact number of bytes that will be produced for
1365             // a given range of characters. Alternatively, the GetMaxByteCount
1366             // method of the Encoding that produced this encoder can be used to
1367             // determine the maximum number of bytes that will be produced for a given
1368             // number of characters, regardless of the actual character values.
1369             //
1370 
GetBytes(char[] chars, int charIndex, int charCount, byte[] bytes, int byteIndex, bool flush)1371             public override int GetBytes(char[] chars, int charIndex, int charCount,
1372                                           byte[] bytes, int byteIndex, bool flush)
1373             {
1374                 return _encoding.GetBytes(chars, charIndex, charCount, bytes, byteIndex);
1375             }
1376 
GetBytes(char* chars, int charCount, byte* bytes, int byteCount, bool flush)1377             public unsafe override int GetBytes(char* chars, int charCount,
1378                                                  byte* bytes, int byteCount, bool flush)
1379             {
1380                 return _encoding.GetBytes(chars, charCount, bytes, byteCount);
1381             }
1382         }
1383 
1384         internal sealed class DefaultDecoder : Decoder, IObjectReference
1385         {
1386             private Encoding _encoding;
1387 
DefaultDecoder(Encoding encoding)1388             public DefaultDecoder(Encoding encoding)
1389             {
1390                 _encoding = encoding;
1391             }
1392 
GetRealObject(StreamingContext context)1393             public Object GetRealObject(StreamingContext context)
1394             {
1395                 throw new PlatformNotSupportedException();
1396             }
1397 
1398             // Returns the number of characters the next call to GetChars will
1399             // produce if presented with the given range of bytes. The returned value
1400             // takes into account the state in which the decoder was left following the
1401             // last call to GetChars. The state of the decoder is not affected
1402             // by a call to this method.
1403             //
1404 
GetCharCount(byte[] bytes, int index, int count)1405             public override int GetCharCount(byte[] bytes, int index, int count)
1406             {
1407                 return GetCharCount(bytes, index, count, false);
1408             }
1409 
GetCharCount(byte[] bytes, int index, int count, bool flush)1410             public override int GetCharCount(byte[] bytes, int index, int count, bool flush)
1411             {
1412                 return _encoding.GetCharCount(bytes, index, count);
1413             }
1414 
GetCharCount(byte* bytes, int count, bool flush)1415             public unsafe override int GetCharCount(byte* bytes, int count, bool flush)
1416             {
1417                 // By default just call the encoding version, no flush by default
1418                 return _encoding.GetCharCount(bytes, count);
1419             }
1420 
1421             // Decodes a range of bytes in a byte array into a range of characters
1422             // in a character array. The method decodes byteCount bytes from
1423             // bytes starting at index byteIndex, storing the resulting
1424             // characters in chars starting at index charIndex. The
1425             // decoding takes into account the state in which the decoder was left
1426             // following the last call to this method.
1427             //
1428             // An exception occurs if the character array is not large enough to
1429             // hold the complete decoding of the bytes. The GetCharCount method
1430             // can be used to determine the exact number of characters that will be
1431             // produced for a given range of bytes. Alternatively, the
1432             // GetMaxCharCount method of the Encoding that produced this
1433             // decoder can be used to determine the maximum number of characters that
1434             // will be produced for a given number of bytes, regardless of the actual
1435             // byte values.
1436             //
1437 
GetChars(byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex)1438             public override int GetChars(byte[] bytes, int byteIndex, int byteCount,
1439                                            char[] chars, int charIndex)
1440             {
1441                 return GetChars(bytes, byteIndex, byteCount, chars, charIndex, false);
1442             }
1443 
GetChars(byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex, bool flush)1444             public override int GetChars(byte[] bytes, int byteIndex, int byteCount,
1445                                            char[] chars, int charIndex, bool flush)
1446             {
1447                 return _encoding.GetChars(bytes, byteIndex, byteCount, chars, charIndex);
1448             }
1449 
GetChars(byte* bytes, int byteCount, char* chars, int charCount, bool flush)1450             public unsafe override int GetChars(byte* bytes, int byteCount,
1451                                                   char* chars, int charCount, bool flush)
1452             {
1453                 // By default just call the encoding's version
1454                 return _encoding.GetChars(bytes, byteCount, chars, charCount);
1455             }
1456         }
1457 
1458         internal class EncodingCharBuffer
1459         {
1460             private unsafe char* _chars;
1461             private unsafe char* _charStart;
1462             private unsafe char* _charEnd;
1463             private int _charCountResult = 0;
1464             private Encoding _enc;
1465             private DecoderNLS _decoder;
1466             private unsafe byte* _byteStart;
1467             private unsafe byte* _byteEnd;
1468             private unsafe byte* _bytes;
1469             private DecoderFallbackBuffer _fallbackBuffer;
1470 
EncodingCharBuffer(Encoding enc, DecoderNLS decoder, char* charStart, int charCount, byte* byteStart, int byteCount)1471             internal unsafe EncodingCharBuffer(Encoding enc, DecoderNLS decoder, char* charStart, int charCount,
1472                                                     byte* byteStart, int byteCount)
1473             {
1474                 _enc = enc;
1475                 _decoder = decoder;
1476 
1477                 _chars = charStart;
1478                 _charStart = charStart;
1479                 _charEnd = charStart + charCount;
1480 
1481                 _byteStart = byteStart;
1482                 _bytes = byteStart;
1483                 _byteEnd = byteStart + byteCount;
1484 
1485                 if (_decoder == null)
1486                     _fallbackBuffer = enc.DecoderFallback.CreateFallbackBuffer();
1487                 else
1488                     _fallbackBuffer = _decoder.FallbackBuffer;
1489 
1490                 // If we're getting chars or getting char count we don't expect to have
1491                 // to remember fallbacks between calls (so it should be empty)
1492                 Debug.Assert(_fallbackBuffer.Remaining == 0,
1493                     "[Encoding.EncodingCharBuffer.EncodingCharBuffer]Expected empty fallback buffer for getchars/charcount");
1494                 _fallbackBuffer.InternalInitialize(_bytes, _charEnd);
1495             }
1496 
AddChar(char ch, int numBytes)1497             internal unsafe bool AddChar(char ch, int numBytes)
1498             {
1499                 if (_chars != null)
1500                 {
1501                     if (_chars >= _charEnd)
1502                     {
1503                         // Throw maybe
1504                         _bytes -= numBytes;                                        // Didn't encode these bytes
1505                         _enc.ThrowCharsOverflow(_decoder, _bytes <= _byteStart);    // Throw?
1506                         return false;                                           // No throw, but no store either
1507                     }
1508 
1509                     *(_chars++) = ch;
1510                 }
1511                 _charCountResult++;
1512                 return true;
1513             }
1514 
AddChar(char ch)1515             internal unsafe bool AddChar(char ch)
1516             {
1517                 return AddChar(ch, 1);
1518             }
1519 
1520 
AddChar(char ch1, char ch2, int numBytes)1521             internal unsafe bool AddChar(char ch1, char ch2, int numBytes)
1522             {
1523                 // Need room for 2 chars
1524                 if (_chars >= _charEnd - 1)
1525                 {
1526                     // Throw maybe
1527                     _bytes -= numBytes;                                        // Didn't encode these bytes
1528                     _enc.ThrowCharsOverflow(_decoder, _bytes <= _byteStart);    // Throw?
1529                     return false;                                           // No throw, but no store either
1530                 }
1531                 return AddChar(ch1, numBytes) && AddChar(ch2, numBytes);
1532             }
1533 
AdjustBytes(int count)1534             internal unsafe void AdjustBytes(int count)
1535             {
1536                 _bytes += count;
1537             }
1538 
1539             internal unsafe bool MoreData
1540             {
1541                 get
1542                 {
1543                     return _bytes < _byteEnd;
1544                 }
1545             }
1546 
1547             // Do we have count more bytes?
EvenMoreData(int count)1548             internal unsafe bool EvenMoreData(int count)
1549             {
1550                 return (_bytes <= _byteEnd - count);
1551             }
1552 
1553             // GetNextByte shouldn't be called unless the caller's already checked more data or even more data,
1554             // but we'll double check just to make sure.
GetNextByte()1555             internal unsafe byte GetNextByte()
1556             {
1557                 Debug.Assert(_bytes < _byteEnd, "[EncodingCharBuffer.GetNextByte]Expected more date");
1558                 if (_bytes >= _byteEnd)
1559                     return 0;
1560                 return *(_bytes++);
1561             }
1562 
1563             internal unsafe int BytesUsed
1564             {
1565                 get
1566                 {
1567                     return (int)(_bytes - _byteStart);
1568                 }
1569             }
1570 
Fallback(byte fallbackByte)1571             internal unsafe bool Fallback(byte fallbackByte)
1572             {
1573                 // Build our buffer
1574                 byte[] byteBuffer = new byte[] { fallbackByte };
1575 
1576                 // Do the fallback and add the data.
1577                 return Fallback(byteBuffer);
1578             }
1579 
Fallback(byte byte1, byte byte2)1580             internal unsafe bool Fallback(byte byte1, byte byte2)
1581             {
1582                 // Build our buffer
1583                 byte[] byteBuffer = new byte[] { byte1, byte2 };
1584 
1585                 // Do the fallback and add the data.
1586                 return Fallback(byteBuffer);
1587             }
1588 
Fallback(byte byte1, byte byte2, byte byte3, byte byte4)1589             internal unsafe bool Fallback(byte byte1, byte byte2, byte byte3, byte byte4)
1590             {
1591                 // Build our buffer
1592                 byte[] byteBuffer = new byte[] { byte1, byte2, byte3, byte4 };
1593 
1594                 // Do the fallback and add the data.
1595                 return Fallback(byteBuffer);
1596             }
1597 
Fallback(byte[] byteBuffer)1598             internal unsafe bool Fallback(byte[] byteBuffer)
1599             {
1600                 // Do the fallback and add the data.
1601                 if (_chars != null)
1602                 {
1603                     char* pTemp = _chars;
1604                     if (_fallbackBuffer.InternalFallback(byteBuffer, _bytes, ref _chars) == false)
1605                     {
1606                         // Throw maybe
1607                         _bytes -= byteBuffer.Length;                             // Didn't use how many ever bytes we're falling back
1608                         _fallbackBuffer.InternalReset();                         // We didn't use this fallback.
1609                         _enc.ThrowCharsOverflow(_decoder, _chars == _charStart);    // Throw?
1610                         return false;                                           // No throw, but no store either
1611                     }
1612                     _charCountResult += unchecked((int)(_chars - pTemp));
1613                 }
1614                 else
1615                 {
1616                     _charCountResult += _fallbackBuffer.InternalFallback(byteBuffer, _bytes);
1617                 }
1618 
1619                 return true;
1620             }
1621 
1622             internal unsafe int Count
1623             {
1624                 get
1625                 {
1626                     return _charCountResult;
1627                 }
1628             }
1629         }
1630 
1631         internal class EncodingByteBuffer
1632         {
1633             private unsafe byte* _bytes;
1634             private unsafe byte* _byteStart;
1635             private unsafe byte* _byteEnd;
1636             private unsafe char* _chars;
1637             private unsafe char* _charStart;
1638             private unsafe char* _charEnd;
1639             private int _byteCountResult = 0;
1640             private Encoding _enc;
1641             private EncoderNLS _encoder;
1642             internal EncoderFallbackBuffer fallbackBuffer;
1643 
EncodingByteBuffer(Encoding inEncoding, EncoderNLS inEncoder, byte* inByteStart, int inByteCount, char* inCharStart, int inCharCount)1644             internal unsafe EncodingByteBuffer(Encoding inEncoding, EncoderNLS inEncoder,
1645                         byte* inByteStart, int inByteCount, char* inCharStart, int inCharCount)
1646             {
1647                 _enc = inEncoding;
1648                 _encoder = inEncoder;
1649 
1650                 _charStart = inCharStart;
1651                 _chars = inCharStart;
1652                 _charEnd = inCharStart + inCharCount;
1653 
1654                 _bytes = inByteStart;
1655                 _byteStart = inByteStart;
1656                 _byteEnd = inByteStart + inByteCount;
1657 
1658                 if (_encoder == null)
1659                     this.fallbackBuffer = _enc.EncoderFallback.CreateFallbackBuffer();
1660                 else
1661                 {
1662                     this.fallbackBuffer = _encoder.FallbackBuffer;
1663                     // If we're not converting we must not have data in our fallback buffer
1664                     if (_encoder._throwOnOverflow && _encoder.InternalHasFallbackBuffer &&
1665                         this.fallbackBuffer.Remaining > 0)
1666                         throw new ArgumentException(SR.Format(SR.Argument_EncoderFallbackNotEmpty,
1667                             _encoder.Encoding.EncodingName, _encoder.Fallback.GetType()));
1668                 }
1669                 fallbackBuffer.InternalInitialize(_chars, _charEnd, _encoder, _bytes != null);
1670             }
1671 
AddByte(byte b, int moreBytesExpected)1672             internal unsafe bool AddByte(byte b, int moreBytesExpected)
1673             {
1674                 Debug.Assert(moreBytesExpected >= 0, "[EncodingByteBuffer.AddByte]expected non-negative moreBytesExpected");
1675                 if (_bytes != null)
1676                 {
1677                     if (_bytes >= _byteEnd - moreBytesExpected)
1678                     {
1679                         // Throw maybe.  Check which buffer to back up (only matters if Converting)
1680                         this.MovePrevious(true);            // Throw if necessary
1681                         return false;                       // No throw, but no store either
1682                     }
1683 
1684                     *(_bytes++) = b;
1685                 }
1686                 _byteCountResult++;
1687                 return true;
1688             }
1689 
AddByte(byte b1)1690             internal unsafe bool AddByte(byte b1)
1691             {
1692                 return (AddByte(b1, 0));
1693             }
1694 
AddByte(byte b1, byte b2)1695             internal unsafe bool AddByte(byte b1, byte b2)
1696             {
1697                 return (AddByte(b1, b2, 0));
1698             }
1699 
AddByte(byte b1, byte b2, int moreBytesExpected)1700             internal unsafe bool AddByte(byte b1, byte b2, int moreBytesExpected)
1701             {
1702                 return (AddByte(b1, 1 + moreBytesExpected) && AddByte(b2, moreBytesExpected));
1703             }
1704 
AddByte(byte b1, byte b2, byte b3)1705             internal unsafe bool AddByte(byte b1, byte b2, byte b3)
1706             {
1707                 return AddByte(b1, b2, b3, (int)0);
1708             }
1709 
AddByte(byte b1, byte b2, byte b3, int moreBytesExpected)1710             internal unsafe bool AddByte(byte b1, byte b2, byte b3, int moreBytesExpected)
1711             {
1712                 return (AddByte(b1, 2 + moreBytesExpected) &&
1713                         AddByte(b2, 1 + moreBytesExpected) &&
1714                         AddByte(b3, moreBytesExpected));
1715             }
1716 
AddByte(byte b1, byte b2, byte b3, byte b4)1717             internal unsafe bool AddByte(byte b1, byte b2, byte b3, byte b4)
1718             {
1719                 return (AddByte(b1, 3) &&
1720                         AddByte(b2, 2) &&
1721                         AddByte(b3, 1) &&
1722                         AddByte(b4, 0));
1723             }
1724 
MovePrevious(bool bThrow)1725             internal unsafe void MovePrevious(bool bThrow)
1726             {
1727                 if (fallbackBuffer.bFallingBack)
1728                     fallbackBuffer.MovePrevious();                      // don't use last fallback
1729                 else
1730                 {
1731                     Debug.Assert(_chars > _charStart ||
1732                         ((bThrow == true) && (_bytes == _byteStart)),
1733                         "[EncodingByteBuffer.MovePrevious]expected previous data or throw");
1734                     if (_chars > _charStart)
1735                         _chars--;                                        // don't use last char
1736                 }
1737 
1738                 if (bThrow)
1739                     _enc.ThrowBytesOverflow(_encoder, _bytes == _byteStart);    // Throw? (and reset fallback if not converting)
1740             }
1741 
Fallback(char charFallback)1742             internal unsafe bool Fallback(char charFallback)
1743             {
1744                 // Do the fallback
1745                 return fallbackBuffer.InternalFallback(charFallback, ref _chars);
1746             }
1747 
1748             internal unsafe bool MoreData
1749             {
1750                 get
1751                 {
1752                     // See if fallbackBuffer is not empty or if there's data left in chars buffer.
1753                     return ((fallbackBuffer.Remaining > 0) || (_chars < _charEnd));
1754                 }
1755             }
1756 
GetNextChar()1757             internal unsafe char GetNextChar()
1758             {
1759                 // See if there's something in our fallback buffer
1760                 char cReturn = fallbackBuffer.InternalGetNextChar();
1761 
1762                 // Nothing in the fallback buffer, return our normal data.
1763                 if (cReturn == 0)
1764                 {
1765                     if (_chars < _charEnd)
1766                         cReturn = *(_chars++);
1767                 }
1768 
1769                 return cReturn;
1770             }
1771 
1772             internal unsafe int CharsUsed
1773             {
1774                 get
1775                 {
1776                     return (int)(_chars - _charStart);
1777                 }
1778             }
1779 
1780             internal unsafe int Count
1781             {
1782                 get
1783                 {
1784                     return _byteCountResult;
1785                 }
1786             }
1787         }
1788     }
1789 }
1790