1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
4 
5 //
6 // Don't override IsAlwaysNormalized because it is just a Unicode Transformation and could be confused.
7 //
8 
9 using System;
10 using System.Globalization;
11 using System.Diagnostics;
12 using System.Runtime.InteropServices;
13 
14 namespace System.Text
15 {
16     public class UnicodeEncoding : Encoding
17     {
18         // Used by Encoding.BigEndianUnicode/Unicode for lazy initialization
19         // The initialization code will not be run until a static member of the class is referenced
20         internal static readonly UnicodeEncoding s_bigEndianDefault = new UnicodeEncoding(bigEndian: true, byteOrderMark: true);
21         internal static readonly UnicodeEncoding s_littleEndianDefault = new UnicodeEncoding(bigEndian: false, byteOrderMark: true);
22 
23         private static readonly byte[] s_bigEndianPreamble = new byte[2] { 0xfe, 0xff };
24         private static readonly byte[] s_littleEndianPreamble = new byte[2] { 0xff, 0xfe };
25 
26         internal bool isThrowException = false;
27 
28         internal bool bigEndian = false;
29         internal bool byteOrderMark = true;
30 
31         // Unicode version 2.0 character size in bytes
32         public const int CharSize = 2;
33 
34 
UnicodeEncoding()35         public UnicodeEncoding()
36             : this(false, true)
37         {
38         }
39 
40 
UnicodeEncoding(bool bigEndian, bool byteOrderMark)41         public UnicodeEncoding(bool bigEndian, bool byteOrderMark)
42             : this(bigEndian, byteOrderMark, false)
43         {
44         }
45 
46 
UnicodeEncoding(bool bigEndian, bool byteOrderMark, bool throwOnInvalidBytes)47         public UnicodeEncoding(bool bigEndian, bool byteOrderMark, bool throwOnInvalidBytes)
48             : base(bigEndian ? 1201 : 1200)  //Set the data item.
49         {
50             this.isThrowException = throwOnInvalidBytes;
51             this.bigEndian = bigEndian;
52             this.byteOrderMark = byteOrderMark;
53 
54             // Encoding constructor already did this, but it'll be wrong if we're throwing exceptions
55             if (this.isThrowException)
56                 SetDefaultFallbacks();
57         }
58 
SetDefaultFallbacks()59         internal override void SetDefaultFallbacks()
60         {
61             // For UTF-X encodings, we use a replacement fallback with an empty string
62             if (this.isThrowException)
63             {
64                 this.encoderFallback = EncoderFallback.ExceptionFallback;
65                 this.decoderFallback = DecoderFallback.ExceptionFallback;
66             }
67             else
68             {
69                 this.encoderFallback = new EncoderReplacementFallback("\xFFFD");
70                 this.decoderFallback = new DecoderReplacementFallback("\xFFFD");
71             }
72         }
73 
74         // The following methods are copied from EncodingNLS.cs.
75         // Unfortunately EncodingNLS.cs is internal and we're public, so we have to re-implement them here.
76         // These should be kept in sync for the following classes:
77         // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
78         //
79 
80         // Returns the number of bytes required to encode a range of characters in
81         // a character array.
82         //
83         // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
84         // So if you fix this, fix the others.  Currently those include:
85         // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
86         // parent method is safe
87 
GetByteCount(char[] chars, int index, int count)88         public override unsafe int GetByteCount(char[] chars, int index, int count)
89         {
90             // Validate input parameters
91             if (chars == null)
92                 throw new ArgumentNullException("chars", SR.ArgumentNull_Array);
93 
94             if (index < 0 || count < 0)
95                 throw new ArgumentOutOfRangeException((index < 0 ? "index" : "count"), SR.ArgumentOutOfRange_NeedNonNegNum);
96 
97             if (chars.Length - index < count)
98                 throw new ArgumentOutOfRangeException("chars", SR.ArgumentOutOfRange_IndexCountBuffer);
99 
100             // If no input, return 0, avoid fixed empty array problem
101             if (count == 0)
102                 return 0;
103 
104             // Just call the pointer version
105             fixed (char* pChars = chars)
106                 return GetByteCount(pChars + index, count, null);
107         }
108 
109         // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
110         // So if you fix this, fix the others.  Currently those include:
111         // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
112         // parent method is safe
113 
GetByteCount(String s)114         public override unsafe int GetByteCount(String s)
115         {
116             // Validate input
117             if (s==null)
118                 throw new ArgumentNullException("s");
119 
120             fixed (char* pChars = s)
121                 return GetByteCount(pChars, s.Length, null);
122         }
123 
124         // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
125         // So if you fix this, fix the others.  Currently those include:
126         // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
127 
128         [CLSCompliant(false)]
GetByteCount(char* chars, int count)129         public override unsafe int GetByteCount(char* chars, int count)
130         {
131             // Validate Parameters
132             if (chars == null)
133                 throw new ArgumentNullException("chars", SR.ArgumentNull_Array);
134 
135             if (count < 0)
136                 throw new ArgumentOutOfRangeException("count", SR.ArgumentOutOfRange_NeedNonNegNum);
137 
138             // Call it with empty encoder
139             return GetByteCount(chars, count, null);
140         }
141 
142         // Parent method is safe.
143         // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
144         // So if you fix this, fix the others.  Currently those include:
145         // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
146 
GetBytes(String s, int charIndex, int charCount, byte[] bytes, int byteIndex)147         public override unsafe int GetBytes(String s, int charIndex, int charCount,
148                                               byte[] bytes, int byteIndex)
149         {
150             if (s == null || bytes == null)
151                 throw new ArgumentNullException((s == null ? "s" : "bytes"), SR.ArgumentNull_Array);
152 
153             if (charIndex < 0 || charCount < 0)
154                 throw new ArgumentOutOfRangeException((charIndex < 0 ? "charIndex" : "charCount"), SR.ArgumentOutOfRange_NeedNonNegNum);
155 
156             if (s.Length - charIndex < charCount)
157                 throw new ArgumentOutOfRangeException("s", SR.ArgumentOutOfRange_IndexCount);
158 
159             if (byteIndex < 0 || byteIndex > bytes.Length)
160                 throw new ArgumentOutOfRangeException("byteIndex", SR.ArgumentOutOfRange_Index);
161 
162             int byteCount = bytes.Length - byteIndex;
163 
164             fixed (char* pChars = s) fixed (byte* pBytes = &MemoryMarshal.GetReference((Span<byte>)bytes))
165                 return GetBytes(pChars + charIndex, charCount, pBytes + byteIndex, byteCount, null);
166         }
167 
168         // Encodes a range of characters in a character array into a range of bytes
169         // in a byte array. An exception occurs if the byte array is not large
170         // enough to hold the complete encoding of the characters. The
171         // GetByteCount method can be used to determine the exact number of
172         // bytes that will be produced for a given range of characters.
173         // Alternatively, the GetMaxByteCount method can be used to
174         // determine the maximum number of bytes that will be produced for a given
175         // number of characters, regardless of the actual character values.
176         //
177         // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
178         // So if you fix this, fix the others.  Currently those include:
179         // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
180         // parent method is safe
181 
GetBytes(char[] chars, int charIndex, int charCount, byte[] bytes, int byteIndex)182         public override unsafe int GetBytes(char[] chars, int charIndex, int charCount,
183                                                byte[] bytes, int byteIndex)
184         {
185             // Validate parameters
186             if (chars == null || bytes == null)
187                 throw new ArgumentNullException((chars == null ? "chars" : "bytes"), SR.ArgumentNull_Array);
188 
189             if (charIndex < 0 || charCount < 0)
190                 throw new ArgumentOutOfRangeException((charIndex < 0 ? "charIndex" : "charCount"), SR.ArgumentOutOfRange_NeedNonNegNum);
191 
192             if (chars.Length - charIndex < charCount)
193                 throw new ArgumentOutOfRangeException("chars", SR.ArgumentOutOfRange_IndexCountBuffer);
194 
195             if (byteIndex < 0 || byteIndex > bytes.Length)
196                 throw new ArgumentOutOfRangeException("byteIndex", SR.ArgumentOutOfRange_Index);
197 
198             // If nothing to encode return 0, avoid fixed problem
199             if (charCount == 0)
200                 return 0;
201 
202             // Just call pointer version
203             int byteCount = bytes.Length - byteIndex;
204 
205             fixed (char* pChars = chars) fixed (byte* pBytes = &MemoryMarshal.GetReference((Span<byte>)bytes))
206                 // Remember that byteCount is # to decode, not size of array.
207                 return GetBytes(pChars + charIndex, charCount, pBytes + byteIndex, byteCount, null);
208         }
209 
210         // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
211         // So if you fix this, fix the others.  Currently those include:
212         // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
213 
214         [CLSCompliant(false)]
GetBytes(char* chars, int charCount, byte* bytes, int byteCount)215         public override unsafe int GetBytes(char* chars, int charCount, byte* bytes, int byteCount)
216         {
217             // Validate Parameters
218             if (bytes == null || chars == null)
219                 throw new ArgumentNullException(bytes == null ? "bytes" : "chars", SR.ArgumentNull_Array);
220 
221             if (charCount < 0 || byteCount < 0)
222                 throw new ArgumentOutOfRangeException((charCount < 0 ? "charCount" : "byteCount"), SR.ArgumentOutOfRange_NeedNonNegNum);
223 
224             return GetBytes(chars, charCount, bytes, byteCount, null);
225         }
226 
227         // Returns the number of characters produced by decoding a range of bytes
228         // in a byte array.
229         //
230         // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
231         // So if you fix this, fix the others.  Currently those include:
232         // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
233         // parent method is safe
234 
GetCharCount(byte[] bytes, int index, int count)235         public override unsafe int GetCharCount(byte[] bytes, int index, int count)
236         {
237             // Validate Parameters
238             if (bytes == null)
239                 throw new ArgumentNullException("bytes", SR.ArgumentNull_Array);
240 
241             if (index < 0 || count < 0)
242                 throw new ArgumentOutOfRangeException((index < 0 ? "index" : "count"), SR.ArgumentOutOfRange_NeedNonNegNum);
243 
244             if (bytes.Length - index < count)
245                 throw new ArgumentOutOfRangeException("bytes", SR.ArgumentOutOfRange_IndexCountBuffer);
246 
247             // If no input just return 0, fixed doesn't like 0 length arrays
248             if (count == 0)
249                 return 0;
250 
251             // Just call pointer version
252             fixed (byte* pBytes = bytes)
253                 return GetCharCount(pBytes + index, count, null);
254         }
255 
256         // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
257         // So if you fix this, fix the others.  Currently those include:
258         // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
259 
260         [CLSCompliant(false)]
GetCharCount(byte* bytes, int count)261         public override unsafe int GetCharCount(byte* bytes, int count)
262         {
263             // Validate Parameters
264             if (bytes == null)
265                 throw new ArgumentNullException("bytes", SR.ArgumentNull_Array);
266 
267             if (count < 0)
268                 throw new ArgumentOutOfRangeException("count", SR.ArgumentOutOfRange_NeedNonNegNum);
269 
270             return GetCharCount(bytes, count, null);
271         }
272 
273         // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
274         // So if you fix this, fix the others.  Currently those include:
275         // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
276         // parent method is safe
277 
GetChars(byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex)278         public override unsafe int GetChars(byte[] bytes, int byteIndex, int byteCount,
279                                               char[] chars, int charIndex)
280         {
281             // Validate Parameters
282             if (bytes == null || chars == null)
283                 throw new ArgumentNullException(bytes == null ? "bytes" : "chars", SR.ArgumentNull_Array);
284 
285             if (byteIndex < 0 || byteCount < 0)
286                 throw new ArgumentOutOfRangeException((byteIndex < 0 ? "byteIndex" : "byteCount"), SR.ArgumentOutOfRange_NeedNonNegNum);
287 
288             if ( bytes.Length - byteIndex < byteCount)
289                 throw new ArgumentOutOfRangeException("bytes", SR.ArgumentOutOfRange_IndexCountBuffer);
290 
291             if (charIndex < 0 || charIndex > chars.Length)
292                 throw new ArgumentOutOfRangeException("charIndex", SR.ArgumentOutOfRange_Index);
293 
294             // If no input, return 0 & avoid fixed problem
295             if (byteCount == 0)
296                 return 0;
297 
298             // Just call pointer version
299             int charCount = chars.Length - charIndex;
300 
301             fixed (byte* pBytes = bytes) fixed (char* pChars = &MemoryMarshal.GetReference((Span<char>)chars))
302                 // Remember that charCount is # to decode, not size of array
303                 return GetChars(pBytes + byteIndex, byteCount, pChars + charIndex, charCount, null);
304         }
305 
306         // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
307         // So if you fix this, fix the others.  Currently those include:
308         // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
309 
310         [CLSCompliant(false)]
GetChars(byte* bytes, int byteCount, char* chars, int charCount)311         public unsafe override int GetChars(byte* bytes, int byteCount, char* chars, int charCount)
312         {
313             // Validate Parameters
314             if (bytes == null || chars == null)
315                 throw new ArgumentNullException(bytes == null ? "bytes" : "chars", SR.ArgumentNull_Array);
316 
317             if (charCount < 0 || byteCount < 0)
318                 throw new ArgumentOutOfRangeException((charCount < 0 ? "charCount" : "byteCount"), SR.ArgumentOutOfRange_NeedNonNegNum);
319 
320             return GetChars(bytes, byteCount, chars, charCount, null);
321         }
322 
323         // Returns a string containing the decoded representation of a range of
324         // bytes in a byte array.
325         //
326         // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
327         // So if you fix this, fix the others.  Currently those include:
328         // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
329         // parent method is safe
330 
GetString(byte[] bytes, int index, int count)331         public override unsafe string GetString(byte[] bytes, int index, int count)
332         {
333             // Validate Parameters
334             if (bytes == null)
335                 throw new ArgumentNullException("bytes", SR.ArgumentNull_Array);
336 
337             if (index < 0 || count < 0)
338                 throw new ArgumentOutOfRangeException((index < 0 ? "index" : "count"), SR.ArgumentOutOfRange_NeedNonNegNum);
339 
340             if (bytes.Length - index < count)
341                 throw new ArgumentOutOfRangeException("bytes", SR.ArgumentOutOfRange_IndexCountBuffer);
342 
343             // Avoid problems with empty input buffer
344             if (count == 0) return String.Empty;
345 
346             fixed (byte* pBytes = bytes)
347                 return String.CreateStringFromEncoding(
348                     pBytes + index, count, this);
349         }
350 
351         //
352         // End of standard methods copied from EncodingNLS.cs
353         //
354 
GetByteCount(char* chars, int count, EncoderNLS encoder)355         internal override unsafe int GetByteCount(char* chars, int count, EncoderNLS encoder)
356         {
357             Debug.Assert(chars != null, "[UnicodeEncoding.GetByteCount]chars!=null");
358             Debug.Assert(count >= 0, "[UnicodeEncoding.GetByteCount]count >=0");
359 
360             // Start by assuming each char gets 2 bytes
361             int byteCount = count << 1;
362 
363             // Check for overflow in byteCount
364             // (If they were all invalid chars, this would actually be wrong,
365             // but that's a ridiculously large # so we're not concerned about that case)
366             if (byteCount < 0)
367                 throw new ArgumentOutOfRangeException(nameof(count), SR.ArgumentOutOfRange_GetByteCountOverflow);
368 
369             char* charStart = chars;
370             char* charEnd = chars + count;
371             char charLeftOver = (char)0;
372 
373             bool wasHereBefore = false;
374 
375             // Need -1 to check 2 at a time.  If we have an even #, longChars will go
376             // from longEnd - 1/2 long to longEnd + 1/2 long.  If we're odd, longChars
377             // will go from longEnd - 1 long to longEnd. (Might not get to use this)
378             ulong* longEnd = (ulong*)(charEnd - 3);
379 
380             // For fallback we may need a fallback buffer
381             EncoderFallbackBuffer fallbackBuffer = null;
382             char* charsForFallback;
383 
384             if (encoder != null)
385             {
386                 charLeftOver = encoder._charLeftOver;
387 
388                 // Assume extra bytes to encode charLeftOver if it existed
389                 if (charLeftOver > 0)
390                     byteCount += 2;
391 
392                 // We mustn't have left over fallback data when counting
393                 if (encoder.InternalHasFallbackBuffer)
394                 {
395                     fallbackBuffer = encoder.FallbackBuffer;
396                     if (fallbackBuffer.Remaining > 0)
397                         throw new ArgumentException(SR.Format(SR.Argument_EncoderFallbackNotEmpty, this.EncodingName, encoder.Fallback.GetType()));
398 
399                     // Set our internal fallback interesting things.
400                     fallbackBuffer.InternalInitialize(charStart, charEnd, encoder, false);
401                 }
402             }
403 
404             char ch;
405         TryAgain:
406 
407             while (((ch = (fallbackBuffer == null) ? (char)0 : fallbackBuffer.InternalGetNextChar()) != 0) || chars < charEnd)
408             {
409                 // First unwind any fallback
410                 if (ch == 0)
411                 {
412                     // No fallback, maybe we can do it fast
413 #if !NO_FAST_UNICODE_LOOP
414 #if BIGENDIAN       // If endianess is backwards then each pair of bytes would be backwards.
415                     if ( bigEndian &&
416 #else
417                     if (!bigEndian &&
418 #endif // BIGENDIAN
419 
420 #if BIT64           // 64 bit CPU needs to be long aligned for this to work.
421                           charLeftOver == 0 && (unchecked((long)chars) & 7) == 0)
422 #else
423                           charLeftOver == 0 && (unchecked((int)chars) & 3) == 0)
424 #endif
425                     {
426                         // Need new char* so we can check 4 at a time
427                         ulong* longChars = (ulong*)chars;
428 
429                         while (longChars < longEnd)
430                         {
431                             // See if we potentially have surrogates (0x8000 bit set)
432                             // (We're either big endian on a big endian machine or little endian on
433                             // a little endian machine so that'll work)
434                             if ((0x8000800080008000 & *longChars) != 0)
435                             {
436                                 // See if any of these are high or low surrogates (0xd800 - 0xdfff).  If the high
437                                 // 5 bits looks like 11011, then its a high or low surrogate.
438                                 // We do the & f800 to filter the 5 bits, then ^ d800 to ensure the 0 isn't set.
439                                 // Note that we expect BMP characters to be more common than surrogates
440                                 // & each char with 11111... then ^ with 11011.  Zeroes then indicate surrogates
441                                 ulong uTemp = (0xf800f800f800f800 & *longChars) ^ 0xd800d800d800d800;
442 
443                                 // Check each of the 4 chars.  0 for those 16 bits means it was a surrogate
444                                 // but no clue if they're high or low.
445                                 // If each of the 4 characters are non-zero, then none are surrogates.
446                                 if ((uTemp & 0xFFFF000000000000) == 0 ||
447                                     (uTemp & 0x0000FFFF00000000) == 0 ||
448                                     (uTemp & 0x00000000FFFF0000) == 0 ||
449                                     (uTemp & 0x000000000000FFFF) == 0)
450                                 {
451                                     // It has at least 1 surrogate, but we don't know if they're high or low surrogates,
452                                     // or if there's 1 or 4 surrogates
453 
454                                     // If they happen to be high/low/high/low, we may as well continue.  Check the next
455                                     // bit to see if its set (low) or not (high) in the right pattern
456 #if BIGENDIAN
457                                     if (((0xfc00fc00fc00fc00 & *longChars) ^ 0xd800dc00d800dc00) != 0)
458 #else
459                                     if (((0xfc00fc00fc00fc00 & *longChars) ^ 0xdc00d800dc00d800) != 0)
460 #endif
461                                     {
462                                         // Either there weren't 4 surrogates, or the 0x0400 bit was set when a high
463                                         // was hoped for or the 0x0400 bit wasn't set where a low was hoped for.
464 
465                                         // Drop out to the slow loop to resolve the surrogates
466                                         break;
467                                     }
468                                     // else they are all surrogates in High/Low/High/Low order, so we can use them.
469                                 }
470                                 // else none are surrogates, so we can use them.
471                             }
472                             // else all < 0x8000 so we can use them
473 
474                             // We already counted these four chars, go to next long.
475                             longChars++;
476                         }
477 
478                         chars = (char*)longChars;
479 
480                         if (chars >= charEnd)
481                             break;
482                     }
483 #endif // !NO_FAST_UNICODE_LOOP
484 
485                     // No fallback, just get next char
486                     ch = *chars;
487                     chars++;
488                 }
489                 else
490                 {
491                     // We weren't preallocating fallback space.
492                     byteCount += 2;
493                 }
494 
495                 // Check for high or low surrogates
496                 if (ch >= 0xd800 && ch <= 0xdfff)
497                 {
498                     // Was it a high surrogate?
499                     if (ch <= 0xdbff)
500                     {
501                         // Its a high surrogate, if we already had a high surrogate do its fallback
502                         if (charLeftOver > 0)
503                         {
504                             // Unwind the current character, this should be safe because we
505                             // don't have leftover data in the fallback, so chars must have
506                             // advanced already.
507                             Debug.Assert(chars > charStart,
508                                 "[UnicodeEncoding.GetByteCount]Expected chars to have advanced in unexpected high surrogate");
509                             chars--;
510 
511                             // If previous high surrogate deallocate 2 bytes
512                             byteCount -= 2;
513 
514                             // Fallback the previous surrogate
515                             // Need to initialize fallback buffer?
516                             if (fallbackBuffer == null)
517                             {
518                                 if (encoder == null)
519                                     fallbackBuffer = this.encoderFallback.CreateFallbackBuffer();
520                                 else
521                                     fallbackBuffer = encoder.FallbackBuffer;
522 
523                                 // Set our internal fallback interesting things.
524                                 fallbackBuffer.InternalInitialize(charStart, charEnd, encoder, false);
525                             }
526 
527                             charsForFallback = chars; // Avoid passing chars by reference to allow it to be enregistered
528                             fallbackBuffer.InternalFallback(charLeftOver, ref charsForFallback);
529                             chars = charsForFallback;
530 
531                             // Now no high surrogate left over
532                             charLeftOver = (char)0;
533                             continue;
534                         }
535 
536                         // Remember this high surrogate
537                         charLeftOver = ch;
538                         continue;
539                     }
540 
541 
542                     // Its a low surrogate
543                     if (charLeftOver == 0)
544                     {
545                         // Expected a previous high surrogate.
546                         // Don't count this one (we'll count its fallback if necessary)
547                         byteCount -= 2;
548 
549                         // fallback this one
550                         // Need to initialize fallback buffer?
551                         if (fallbackBuffer == null)
552                         {
553                             if (encoder == null)
554                                 fallbackBuffer = this.encoderFallback.CreateFallbackBuffer();
555                             else
556                                 fallbackBuffer = encoder.FallbackBuffer;
557 
558                             // Set our internal fallback interesting things.
559                             fallbackBuffer.InternalInitialize(charStart, charEnd, encoder, false);
560                         }
561                         charsForFallback = chars; // Avoid passing chars by reference to allow it to be en-registered
562                         fallbackBuffer.InternalFallback(ch, ref charsForFallback);
563                         chars = charsForFallback;
564                         continue;
565                     }
566 
567                     // Valid surrogate pair, add our charLeftOver
568                     charLeftOver = (char)0;
569                     continue;
570                 }
571                 else if (charLeftOver > 0)
572                 {
573                     // Expected a low surrogate, but this char is normal
574 
575                     // Rewind the current character, fallback previous character.
576                     // this should be safe because we don't have leftover data in the
577                     // fallback, so chars must have advanced already.
578                     Debug.Assert(chars > charStart,
579                         "[UnicodeEncoding.GetByteCount]Expected chars to have advanced when expected low surrogate");
580                     chars--;
581 
582                     // fallback previous chars
583                     // Need to initialize fallback buffer?
584                     if (fallbackBuffer == null)
585                     {
586                         if (encoder == null)
587                             fallbackBuffer = this.encoderFallback.CreateFallbackBuffer();
588                         else
589                             fallbackBuffer = encoder.FallbackBuffer;
590 
591                         // Set our internal fallback interesting things.
592                         fallbackBuffer.InternalInitialize(charStart, charEnd, encoder, false);
593                     }
594                     charsForFallback = chars; // Avoid passing chars by reference to allow it to be en-registered
595                     fallbackBuffer.InternalFallback(charLeftOver, ref charsForFallback);
596                     chars = charsForFallback;
597 
598                     // Ignore charLeftOver or throw
599                     byteCount -= 2;
600                     charLeftOver = (char)0;
601 
602                     continue;
603                 }
604 
605                 // Ok we had something to add (already counted)
606             }
607 
608             // Don't allocate space for left over char
609             if (charLeftOver > 0)
610             {
611                 byteCount -= 2;
612 
613                 // If we have to flush, stick it in fallback and try again
614                 if (encoder == null || encoder.MustFlush)
615                 {
616                     if (wasHereBefore)
617                     {
618                         // Throw it, using our complete character
619                         throw new ArgumentException(
620                             SR.Format(SR.Argument_RecursiveFallback, charLeftOver), nameof(chars));
621                     }
622                     else
623                     {
624                         // Need to initialize fallback buffer?
625                         if (fallbackBuffer == null)
626                         {
627                             if (encoder == null)
628                                 fallbackBuffer = this.encoderFallback.CreateFallbackBuffer();
629                             else
630                                 fallbackBuffer = encoder.FallbackBuffer;
631 
632                             // Set our internal fallback interesting things.
633                             fallbackBuffer.InternalInitialize(charStart, charEnd, encoder, false);
634                         }
635                         charsForFallback = chars; // Avoid passing chars by reference to allow it to be en-registered
636                         fallbackBuffer.InternalFallback(charLeftOver, ref charsForFallback);
637                         chars = charsForFallback;
638                         charLeftOver = (char)0;
639                         wasHereBefore = true;
640                         goto TryAgain;
641                     }
642                 }
643             }
644 
645             // Shouldn't have anything in fallback buffer for GetByteCount
646             // (don't have to check _throwOnOverflow for count)
647             Debug.Assert(fallbackBuffer == null || fallbackBuffer.Remaining == 0,
648                 "[UnicodeEncoding.GetByteCount]Expected empty fallback buffer at end");
649 
650             // Don't remember fallbackBuffer.encoder for counting
651             return byteCount;
652         }
653 
GetBytes(char* chars, int charCount, byte* bytes, int byteCount, EncoderNLS encoder)654         internal override unsafe int GetBytes(char* chars, int charCount,
655                                                 byte* bytes, int byteCount, EncoderNLS encoder)
656         {
657             Debug.Assert(chars != null, "[UnicodeEncoding.GetBytes]chars!=null");
658             Debug.Assert(byteCount >= 0, "[UnicodeEncoding.GetBytes]byteCount >=0");
659             Debug.Assert(charCount >= 0, "[UnicodeEncoding.GetBytes]charCount >=0");
660             Debug.Assert(bytes != null, "[UnicodeEncoding.GetBytes]bytes!=null");
661 
662             char charLeftOver = (char)0;
663             char ch;
664             bool wasHereBefore = false;
665 
666 
667             byte* byteEnd = bytes + byteCount;
668             char* charEnd = chars + charCount;
669             byte* byteStart = bytes;
670             char* charStart = chars;
671 
672             // For fallback we may need a fallback buffer
673             EncoderFallbackBuffer fallbackBuffer = null;
674             char* charsForFallback;
675 
676             // Get our encoder, but don't clear it yet.
677             if (encoder != null)
678             {
679                 charLeftOver = encoder._charLeftOver;
680 
681                 // We mustn't have left over fallback data when counting
682                 if (encoder.InternalHasFallbackBuffer)
683                 {
684                     // We always need the fallback buffer in get bytes so we can flush any remaining ones if necessary
685                     fallbackBuffer = encoder.FallbackBuffer;
686                     if (fallbackBuffer.Remaining > 0 && encoder._throwOnOverflow)
687                         throw new ArgumentException(SR.Format(SR.Argument_EncoderFallbackNotEmpty, this.EncodingName, encoder.Fallback.GetType()));
688 
689                     // Set our internal fallback interesting things.
690                     fallbackBuffer.InternalInitialize(charStart, charEnd, encoder, false);
691                 }
692             }
693 
694         TryAgain:
695             while (((ch = (fallbackBuffer == null) ?
696                         (char)0 : fallbackBuffer.InternalGetNextChar()) != 0) ||
697                     chars < charEnd)
698             {
699                 // First unwind any fallback
700                 if (ch == 0)
701                 {
702                     // No fallback, maybe we can do it fast
703 #if !NO_FAST_UNICODE_LOOP
704 #if BIGENDIAN           // If endianess is backwards then each pair of bytes would be backwards.
705                     if ( bigEndian &&
706 #else
707                     if (!bigEndian &&
708 #endif // BIGENDIAN
709 #if BIT64           // 64 bit CPU needs to be long aligned for this to work, 32 bit CPU needs to be 32 bit aligned
710                         (unchecked((long)chars) & 7) == 0 && (unchecked((long)bytes) & 7) == 0 &&
711 #else
712                         (unchecked((int)chars) & 3) == 0 && (unchecked((int)bytes) & 3) == 0 &&
713 #endif // BIT64
714                         charLeftOver == 0)
715                     {
716                         // Need -1 to check 2 at a time.  If we have an even #, longChars will go
717                         // from longEnd - 1/2 long to longEnd + 1/2 long.  If we're odd, longChars
718                         // will go from longEnd - 1 long to longEnd. (Might not get to use this)
719                         // We can only go iCount units (limited by shorter of char or byte buffers.
720                         ulong* longEnd = (ulong*)(chars - 3 +
721                                                   (((byteEnd - bytes) >> 1 < charEnd - chars) ?
722                                                     (byteEnd - bytes) >> 1 : charEnd - chars));
723 
724                         // Need new char* so we can check 4 at a time
725                         ulong* longChars = (ulong*)chars;
726                         ulong* longBytes = (ulong*)bytes;
727 
728                         while (longChars < longEnd)
729                         {
730                             // See if we potentially have surrogates (0x8000 bit set)
731                             // (We're either big endian on a big endian machine or little endian on
732                             // a little endian machine so that'll work)
733                             if ((0x8000800080008000 & *longChars) != 0)
734                             {
735                                 // See if any of these are high or low surrogates (0xd800 - 0xdfff).  If the high
736                                 // 5 bits looks like 11011, then its a high or low surrogate.
737                                 // We do the & f800 to filter the 5 bits, then ^ d800 to ensure the 0 isn't set.
738                                 // Note that we expect BMP characters to be more common than surrogates
739                                 // & each char with 11111... then ^ with 11011.  Zeroes then indicate surrogates
740                                 ulong uTemp = (0xf800f800f800f800 & *longChars) ^ 0xd800d800d800d800;
741 
742                                 // Check each of the 4 chars.  0 for those 16 bits means it was a surrogate
743                                 // but no clue if they're high or low.
744                                 // If each of the 4 characters are non-zero, then none are surrogates.
745                                 if ((uTemp & 0xFFFF000000000000) == 0 ||
746                                     (uTemp & 0x0000FFFF00000000) == 0 ||
747                                     (uTemp & 0x00000000FFFF0000) == 0 ||
748                                     (uTemp & 0x000000000000FFFF) == 0)
749                                 {
750                                     // It has at least 1 surrogate, but we don't know if they're high or low surrogates,
751                                     // or if there's 1 or 4 surrogates
752 
753                                     // If they happen to be high/low/high/low, we may as well continue.  Check the next
754                                     // bit to see if its set (low) or not (high) in the right pattern
755 #if BIGENDIAN
756                                     if (((0xfc00fc00fc00fc00 & *longChars) ^ 0xd800dc00d800dc00) != 0)
757 #else
758                                     if (((0xfc00fc00fc00fc00 & *longChars) ^ 0xdc00d800dc00d800) != 0)
759 #endif
760                                     {
761                                         // Either there weren't 4 surrogates, or the 0x0400 bit was set when a high
762                                         // was hoped for or the 0x0400 bit wasn't set where a low was hoped for.
763 
764                                         // Drop out to the slow loop to resolve the surrogates
765                                         break;
766                                     }
767                                     // else they are all surrogates in High/Low/High/Low order, so we can use them.
768                                 }
769                                 // else none are surrogates, so we can use them.
770                             }
771                             // else all < 0x8000 so we can use them
772 
773                             // We can use these 4 chars.
774                             *longBytes = *longChars;
775                             longChars++;
776                             longBytes++;
777                         }
778 
779                         chars = (char*)longChars;
780                         bytes = (byte*)longBytes;
781 
782                         if (chars >= charEnd)
783                             break;
784                     }
785                     // Not aligned, but maybe we can still be somewhat faster
786                     // Also somehow this optimizes the above loop?  It seems to cause something above
787                     // to get enregistered, but I haven't figured out how to make that happen without this loop.
788                     else if ((charLeftOver == 0) &&
789 #if BIGENDIAN
790                         bigEndian &&
791 #else
792                         !bigEndian &&
793 #endif // BIGENDIAN
794 
795 #if BIT64
796                         (unchecked((long)chars) & 7) != (unchecked((long)bytes) & 7) &&  // Only do this if chars & bytes are out of line, otherwise faster loop will be faster next time
797 #else
798                         (unchecked((int)chars) & 3) != (unchecked((int)bytes) & 3) &&  // Only do this if chars & bytes are out of line, otherwise faster loop will be faster next time
799 #endif // BIT64
800                         (unchecked((int)(bytes)) & 1) == 0)
801                     {
802                         // # to use
803                         long iCount = ((byteEnd - bytes) >> 1 < charEnd - chars) ?
804                                        (byteEnd - bytes) >> 1 : charEnd - chars;
805 
806                         // Need new char*
807                         char* charOut = ((char*)bytes);     // a char* for our output
808                         char* tempEnd = chars + iCount - 1; // Our end pointer
809 
810                         while (chars < tempEnd)
811                         {
812                             if (*chars >= (char)0xd800 && *chars <= (char)0xdfff)
813                             {
814                                 // break for fallback for low surrogate
815                                 if (*chars >= 0xdc00)
816                                     break;
817 
818                                 // break if next one's not a low surrogate (will do fallback)
819                                 if (*(chars + 1) < 0xdc00 || *(chars + 1) > 0xdfff)
820                                     break;
821 
822                                 // They both exist, use them
823                             }
824                             // If 2nd char is surrogate & this one isn't then only add one
825                             else if (*(chars + 1) >= (char)0xd800 && *(chars + 1) <= 0xdfff)
826                             {
827                                 *charOut = *chars;
828                                 charOut++;
829                                 chars++;
830                                 continue;
831                             }
832 
833                             *charOut = *chars;
834                             *(charOut + 1) = *(chars + 1);
835                             charOut += 2;
836                             chars += 2;
837                         }
838 
839                         bytes = (byte*)charOut;
840 
841                         if (chars >= charEnd)
842                             break;
843                     }
844 #endif // !NO_FAST_UNICODE_LOOP
845 
846                     // No fallback, just get next char
847                     ch = *chars;
848                     chars++;
849                 }
850 
851                 // Check for high or low surrogates
852                 if (ch >= 0xd800 && ch <= 0xdfff)
853                 {
854                     // Was it a high surrogate?
855                     if (ch <= 0xdbff)
856                     {
857                         // Its a high surrogate, see if we already had a high surrogate
858                         if (charLeftOver > 0)
859                         {
860                             // Unwind the current character, this should be safe because we
861                             // don't have leftover data in the fallback, so chars must have
862                             // advanced already.
863                             Debug.Assert(chars > charStart,
864                                 "[UnicodeEncoding.GetBytes]Expected chars to have advanced in unexpected high surrogate");
865                             chars--;
866 
867                             // Fallback the previous surrogate
868                             // Might need to create our fallback buffer
869                             if (fallbackBuffer == null)
870                             {
871                                 if (encoder == null)
872                                     fallbackBuffer = this.encoderFallback.CreateFallbackBuffer();
873                                 else
874                                     fallbackBuffer = encoder.FallbackBuffer;
875 
876                                 // Set our internal fallback interesting things.
877                                 fallbackBuffer.InternalInitialize(charStart, charEnd, encoder, true);
878                             }
879 
880                             charsForFallback = chars; // Avoid passing chars by reference to allow it to be en-registered
881                             fallbackBuffer.InternalFallback(charLeftOver, ref charsForFallback);
882                             chars = charsForFallback;
883 
884                             charLeftOver = (char)0;
885                             continue;
886                         }
887 
888                         // Remember this high surrogate
889                         charLeftOver = ch;
890                         continue;
891                     }
892 
893                     // Its a low surrogate
894                     if (charLeftOver == 0)
895                     {
896                         // We'll fall back this one
897                         // Might need to create our fallback buffer
898                         if (fallbackBuffer == null)
899                         {
900                             if (encoder == null)
901                                 fallbackBuffer = this.encoderFallback.CreateFallbackBuffer();
902                             else
903                                 fallbackBuffer = encoder.FallbackBuffer;
904 
905                             // Set our internal fallback interesting things.
906                             fallbackBuffer.InternalInitialize(charStart, charEnd, encoder, true);
907                         }
908 
909                         charsForFallback = chars; // Avoid passing chars by reference to allow it to be en-registered
910                         fallbackBuffer.InternalFallback(ch, ref charsForFallback);
911                         chars = charsForFallback;
912                         continue;
913                     }
914 
915                     // Valid surrogate pair, add our charLeftOver
916                     if (bytes + 3 >= byteEnd)
917                     {
918                         // Not enough room to add this surrogate pair
919                         if (fallbackBuffer != null && fallbackBuffer.bFallingBack)
920                         {
921                             // These must have both been from the fallbacks.
922                             // Both of these MUST have been from a fallback because if the 1st wasn't
923                             // from a fallback, then a high surrogate followed by an illegal char
924                             // would've caused the high surrogate to fall back.  If a high surrogate
925                             // fell back, then it was consumed and both chars came from the fallback.
926                             fallbackBuffer.MovePrevious();                     // Didn't use either fallback surrogate
927                             fallbackBuffer.MovePrevious();
928                         }
929                         else
930                         {
931                             // If we don't have enough room, then either we should've advanced a while
932                             // or we should have bytes==byteStart and throw below
933                             Debug.Assert(chars > charStart + 1 || bytes == byteStart,
934                                 "[UnicodeEncoding.GetBytes]Expected chars to have when no room to add surrogate pair");
935                             chars -= 2;                                        // Didn't use either surrogate
936                         }
937                         ThrowBytesOverflow(encoder, bytes == byteStart);    // Throw maybe (if no bytes written)
938                         charLeftOver = (char)0;                             // we'll retry it later
939                         break;                                               // Didn't throw, but stop 'til next time.
940                     }
941 
942                     if (bigEndian)
943                     {
944                         *(bytes++) = (byte)(charLeftOver >> 8);
945                         *(bytes++) = (byte)charLeftOver;
946                     }
947                     else
948                     {
949                         *(bytes++) = (byte)charLeftOver;
950                         *(bytes++) = (byte)(charLeftOver >> 8);
951                     }
952 
953                     charLeftOver = (char)0;
954                 }
955                 else if (charLeftOver > 0)
956                 {
957                     // Expected a low surrogate, but this char is normal
958 
959                     // Rewind the current character, fallback previous character.
960                     // this should be safe because we don't have leftover data in the
961                     // fallback, so chars must have advanced already.
962                     Debug.Assert(chars > charStart,
963                         "[UnicodeEncoding.GetBytes]Expected chars to have advanced after expecting low surrogate");
964                     chars--;
965 
966                     // fallback previous chars
967                     // Might need to create our fallback buffer
968                     if (fallbackBuffer == null)
969                     {
970                         if (encoder == null)
971                             fallbackBuffer = this.encoderFallback.CreateFallbackBuffer();
972                         else
973                             fallbackBuffer = encoder.FallbackBuffer;
974 
975                         // Set our internal fallback interesting things.
976                         fallbackBuffer.InternalInitialize(charStart, charEnd, encoder, true);
977                     }
978 
979                     charsForFallback = chars; // Avoid passing chars by reference to allow it to be en-registered
980                     fallbackBuffer.InternalFallback(charLeftOver, ref charsForFallback);
981                     chars = charsForFallback;
982 
983                     // Ignore charLeftOver or throw
984                     charLeftOver = (char)0;
985                     continue;
986                 }
987 
988                 // Ok, we have a char to add
989                 if (bytes + 1 >= byteEnd)
990                 {
991                     // Couldn't add this char
992                     if (fallbackBuffer != null && fallbackBuffer.bFallingBack)
993                         fallbackBuffer.MovePrevious();                     // Not using this fallback char
994                     else
995                     {
996                         // Lonely charLeftOver (from previous call) would've been caught up above,
997                         // so this must be a case where we've already read an input char.
998                         Debug.Assert(chars > charStart,
999                             "[UnicodeEncoding.GetBytes]Expected chars to have advanced for failed fallback");
1000                         chars--;                                         // Not using this char
1001                     }
1002                     ThrowBytesOverflow(encoder, bytes == byteStart);    // Throw maybe (if no bytes written)
1003                     break;                                               // didn't throw, just stop
1004                 }
1005 
1006                 if (bigEndian)
1007                 {
1008                     *(bytes++) = (byte)(ch >> 8);
1009                     *(bytes++) = (byte)ch;
1010                 }
1011                 else
1012                 {
1013                     *(bytes++) = (byte)ch;
1014                     *(bytes++) = (byte)(ch >> 8);
1015                 }
1016             }
1017 
1018             // Don't allocate space for left over char
1019             if (charLeftOver > 0)
1020             {
1021                 // If we aren't flushing we need to fall this back
1022                 if (encoder == null || encoder.MustFlush)
1023                 {
1024                     if (wasHereBefore)
1025                     {
1026                         // Throw it, using our complete character
1027                         throw new ArgumentException(
1028                             SR.Format(SR.Argument_RecursiveFallback, charLeftOver), nameof(chars));
1029                     }
1030                     else
1031                     {
1032                         // If we have to flush, stick it in fallback and try again
1033                         // Might need to create our fallback buffer
1034                         if (fallbackBuffer == null)
1035                         {
1036                             if (encoder == null)
1037                                 fallbackBuffer = this.encoderFallback.CreateFallbackBuffer();
1038                             else
1039                                 fallbackBuffer = encoder.FallbackBuffer;
1040 
1041                             // Set our internal fallback interesting things.
1042                             fallbackBuffer.InternalInitialize(charStart, charEnd, encoder, true);
1043                         }
1044 
1045                         // If we're not flushing, that'll remember the left over character.
1046                         charsForFallback = chars; // Avoid passing chars by reference to allow it to be en-registered
1047                         fallbackBuffer.InternalFallback(charLeftOver, ref charsForFallback);
1048                         chars = charsForFallback;
1049 
1050                         charLeftOver = (char)0;
1051                         wasHereBefore = true;
1052                         goto TryAgain;
1053                     }
1054                 }
1055             }
1056 
1057             // Not flushing, remember it in the encoder
1058             if (encoder != null)
1059             {
1060                 encoder._charLeftOver = charLeftOver;
1061                 encoder._charsUsed = (int)(chars - charStart);
1062             }
1063 
1064             // Remember charLeftOver if we must, or clear it if we're flushing
1065             // (charLeftOver should be 0 if we're flushing)
1066             Debug.Assert((encoder != null && !encoder.MustFlush) || charLeftOver == (char)0,
1067                 "[UnicodeEncoding.GetBytes] Expected no left over characters if flushing");
1068 
1069             Debug.Assert(fallbackBuffer == null || fallbackBuffer.Remaining == 0 ||
1070                 encoder == null || !encoder._throwOnOverflow,
1071                 "[UnicodeEncoding.GetBytes]Expected empty fallback buffer if not converting");
1072 
1073             // We used to copy it fast, but this doesn't check for surrogates
1074             // System.IO.__UnmanagedMemoryStream.memcpyimpl(bytes, (byte*)chars, usedByteCount);
1075 
1076             return (int)(bytes - byteStart);
1077         }
1078 
GetCharCount(byte* bytes, int count, DecoderNLS baseDecoder)1079         internal override unsafe int GetCharCount(byte* bytes, int count, DecoderNLS baseDecoder)
1080         {
1081             Debug.Assert(bytes != null, "[UnicodeEncoding.GetCharCount]bytes!=null");
1082             Debug.Assert(count >= 0, "[UnicodeEncoding.GetCharCount]count >=0");
1083 
1084             UnicodeEncoding.Decoder decoder = (UnicodeEncoding.Decoder)baseDecoder;
1085 
1086             byte* byteEnd = bytes + count;
1087             byte* byteStart = bytes;
1088 
1089             // Need last vars
1090             int lastByte = -1;
1091             char lastChar = (char)0;
1092 
1093             // Start by assuming same # of chars as bytes
1094             int charCount = count >> 1;
1095 
1096             // Need -1 to check 2 at a time.  If we have an even #, longBytes will go
1097             // from longEnd - 1/2 long to longEnd + 1/2 long.  If we're odd, longBytes
1098             // will go from longEnd - 1 long to longEnd. (Might not get to use this)
1099             ulong* longEnd = (ulong*)(byteEnd - 7);
1100 
1101             // For fallback we may need a fallback buffer
1102             DecoderFallbackBuffer fallbackBuffer = null;
1103 
1104             if (decoder != null)
1105             {
1106                 lastByte = decoder.lastByte;
1107                 lastChar = decoder.lastChar;
1108 
1109                 // Assume extra char if last char was around
1110                 if (lastChar > 0)
1111                     charCount++;
1112 
1113                 // Assume extra char if extra last byte makes up odd # of input bytes
1114                 if (lastByte >= 0 && (count & 1) == 1)
1115                 {
1116                     charCount++;
1117                 }
1118 
1119                 // Shouldn't have anything in fallback buffer for GetCharCount
1120                 // (don't have to check _throwOnOverflow for count)
1121                 Debug.Assert(!decoder.InternalHasFallbackBuffer || decoder.FallbackBuffer.Remaining == 0,
1122                     "[UnicodeEncoding.GetCharCount]Expected empty fallback buffer at start");
1123             }
1124 
1125             while (bytes < byteEnd)
1126             {
1127                 // If we're aligned then maybe we can do it fast
1128                 // That'll hurt if we're unaligned because we'll always test but never be aligned
1129 #if !NO_FAST_UNICODE_LOOP
1130 #if BIGENDIAN
1131                 if (bigEndian &&
1132 #else // BIGENDIAN
1133                 if (!bigEndian &&
1134 #endif // BIGENDIAN
1135 #if BIT64 // win64 has to be long aligned
1136                     (unchecked((long)bytes) & 7) == 0 &&
1137 #else
1138                     (unchecked((int)bytes) & 3) == 0 &&
1139 #endif // BIT64
1140                     lastByte == -1 && lastChar == 0)
1141                 {
1142                     // Need new char* so we can check 4 at a time
1143                     ulong* longBytes = (ulong*)bytes;
1144 
1145                     while (longBytes < longEnd)
1146                     {
1147                         // See if we potentially have surrogates (0x8000 bit set)
1148                         // (We're either big endian on a big endian machine or little endian on
1149                         // a little endian machine so that'll work)
1150                         if ((0x8000800080008000 & *longBytes) != 0)
1151                         {
1152                             // See if any of these are high or low surrogates (0xd800 - 0xdfff).  If the high
1153                             // 5 bits looks like 11011, then its a high or low surrogate.
1154                             // We do the & f800 to filter the 5 bits, then ^ d800 to ensure the 0 isn't set.
1155                             // Note that we expect BMP characters to be more common than surrogates
1156                             // & each char with 11111... then ^ with 11011.  Zeroes then indicate surrogates
1157                             ulong uTemp = (0xf800f800f800f800 & *longBytes) ^ 0xd800d800d800d800;
1158 
1159                             // Check each of the 4 chars.  0 for those 16 bits means it was a surrogate
1160                             // but no clue if they're high or low.
1161                             // If each of the 4 characters are non-zero, then none are surrogates.
1162                             if ((uTemp & 0xFFFF000000000000) == 0 ||
1163                                 (uTemp & 0x0000FFFF00000000) == 0 ||
1164                                 (uTemp & 0x00000000FFFF0000) == 0 ||
1165                                 (uTemp & 0x000000000000FFFF) == 0)
1166                             {
1167                                 // It has at least 1 surrogate, but we don't know if they're high or low surrogates,
1168                                 // or if there's 1 or 4 surrogates
1169 
1170                                 // If they happen to be high/low/high/low, we may as well continue.  Check the next
1171                                 // bit to see if its set (low) or not (high) in the right pattern
1172 #if BIGENDIAN
1173                                 if (((0xfc00fc00fc00fc00 & *longBytes) ^ 0xd800dc00d800dc00) != 0)
1174 #else
1175                                 if (((0xfc00fc00fc00fc00 & *longBytes) ^ 0xdc00d800dc00d800) != 0)
1176 #endif
1177                                 {
1178                                     // Either there weren't 4 surrogates, or the 0x0400 bit was set when a high
1179                                     // was hoped for or the 0x0400 bit wasn't set where a low was hoped for.
1180 
1181                                     // Drop out to the slow loop to resolve the surrogates
1182                                     break;
1183                                 }
1184                                 // else they are all surrogates in High/Low/High/Low order, so we can use them.
1185                             }
1186                             // else none are surrogates, so we can use them.
1187                         }
1188                         // else all < 0x8000 so we can use them
1189 
1190                         // We can use these 4 chars.
1191                         longBytes++;
1192                     }
1193 
1194                     bytes = (byte*)longBytes;
1195 
1196                     if (bytes >= byteEnd)
1197                         break;
1198                 }
1199 #endif // !NO_FAST_UNICODE_LOOP
1200 
1201                 // Get 1st byte
1202                 if (lastByte < 0)
1203                 {
1204                     lastByte = *bytes++;
1205                     if (bytes >= byteEnd) break;
1206                 }
1207 
1208                 // Get full char
1209                 char ch;
1210                 if (bigEndian)
1211                 {
1212                     ch = (char)(lastByte << 8 | *(bytes++));
1213                 }
1214                 else
1215                 {
1216                     ch = (char)(*(bytes++) << 8 | lastByte);
1217                 }
1218                 lastByte = -1;
1219 
1220                 // See if the char's valid
1221                 if (ch >= 0xd800 && ch <= 0xdfff)
1222                 {
1223                     // Was it a high surrogate?
1224                     if (ch <= 0xdbff)
1225                     {
1226                         // Its a high surrogate, if we had one then do fallback for previous one
1227                         if (lastChar > 0)
1228                         {
1229                             // Ignore previous bad high surrogate
1230                             charCount--;
1231 
1232                             // Get fallback for previous high surrogate
1233                             // Note we have to reconstruct bytes because some may have been in decoder
1234                             byte[] byteBuffer = null;
1235                             if (bigEndian)
1236                             {
1237                                 byteBuffer = new byte[]
1238                                     { unchecked((byte)(lastChar >> 8)), unchecked((byte)lastChar) };
1239                             }
1240                             else
1241                             {
1242                                 byteBuffer = new byte[]
1243                                     { unchecked((byte)lastChar), unchecked((byte)(lastChar >> 8)) };
1244                             }
1245 
1246                             if (fallbackBuffer == null)
1247                             {
1248                                 if (decoder == null)
1249                                     fallbackBuffer = this.decoderFallback.CreateFallbackBuffer();
1250                                 else
1251                                     fallbackBuffer = decoder.FallbackBuffer;
1252 
1253                                 // Set our internal fallback interesting things.
1254                                 fallbackBuffer.InternalInitialize(byteStart, null);
1255                             }
1256 
1257                             // Get fallback.
1258                             charCount += fallbackBuffer.InternalFallback(byteBuffer, bytes);
1259                         }
1260 
1261                         // Ignore the last one which fell back already,
1262                         // and remember the new high surrogate
1263                         lastChar = ch;
1264                         continue;
1265                     }
1266 
1267                     // Its a low surrogate
1268                     if (lastChar == 0)
1269                     {
1270                         // Expected a previous high surrogate
1271                         charCount--;
1272 
1273                         // Get fallback for this low surrogate
1274                         // Note we have to reconstruct bytes because some may have been in decoder
1275                         byte[] byteBuffer = null;
1276                         if (bigEndian)
1277                         {
1278                             byteBuffer = new byte[]
1279                                 { unchecked((byte)(ch >> 8)), unchecked((byte)ch) };
1280                         }
1281                         else
1282                         {
1283                             byteBuffer = new byte[]
1284                                 { unchecked((byte)ch), unchecked((byte)(ch >> 8)) };
1285                         }
1286 
1287                         if (fallbackBuffer == null)
1288                         {
1289                             if (decoder == null)
1290                                 fallbackBuffer = this.decoderFallback.CreateFallbackBuffer();
1291                             else
1292                                 fallbackBuffer = decoder.FallbackBuffer;
1293 
1294                             // Set our internal fallback interesting things.
1295                             fallbackBuffer.InternalInitialize(byteStart, null);
1296                         }
1297 
1298                         charCount += fallbackBuffer.InternalFallback(byteBuffer, bytes);
1299 
1300                         // Ignore this one (we already did its fallback)
1301                         continue;
1302                     }
1303 
1304                     // Valid surrogate pair, already counted.
1305                     lastChar = (char)0;
1306                 }
1307                 else if (lastChar > 0)
1308                 {
1309                     // Had a high surrogate, expected a low surrogate
1310                     // Un-count the last high surrogate
1311                     charCount--;
1312 
1313                     // fall back the high surrogate.
1314                     byte[] byteBuffer = null;
1315                     if (bigEndian)
1316                     {
1317                         byteBuffer = new byte[]
1318                             { unchecked((byte)(lastChar >> 8)), unchecked((byte)lastChar) };
1319                     }
1320                     else
1321                     {
1322                         byteBuffer = new byte[]
1323                             { unchecked((byte)lastChar), unchecked((byte)(lastChar >> 8)) };
1324                     }
1325 
1326                     if (fallbackBuffer == null)
1327                     {
1328                         if (decoder == null)
1329                             fallbackBuffer = this.decoderFallback.CreateFallbackBuffer();
1330                         else
1331                             fallbackBuffer = decoder.FallbackBuffer;
1332 
1333                         // Set our internal fallback interesting things.
1334                         fallbackBuffer.InternalInitialize(byteStart, null);
1335                     }
1336 
1337                     // Already subtracted high surrogate
1338                     charCount += fallbackBuffer.InternalFallback(byteBuffer, bytes);
1339 
1340                     // Not left over now, clear previous high surrogate and continue to add current char
1341                     lastChar = (char)0;
1342                 }
1343 
1344                 // Valid char, already counted
1345             }
1346 
1347             // Extra space if we can't use decoder
1348             if (decoder == null || decoder.MustFlush)
1349             {
1350                 if (lastChar > 0)
1351                 {
1352                     // No hanging high surrogates allowed, do fallback and remove count for it
1353                     charCount--;
1354                     byte[] byteBuffer = null;
1355                     if (bigEndian)
1356                     {
1357                         byteBuffer = new byte[]
1358                             { unchecked((byte)(lastChar >> 8)), unchecked((byte)lastChar) };
1359                     }
1360                     else
1361                     {
1362                         byteBuffer = new byte[]
1363                             { unchecked((byte)lastChar), unchecked((byte)(lastChar >> 8)) };
1364                     }
1365 
1366                     if (fallbackBuffer == null)
1367                     {
1368                         if (decoder == null)
1369                             fallbackBuffer = this.decoderFallback.CreateFallbackBuffer();
1370                         else
1371                             fallbackBuffer = decoder.FallbackBuffer;
1372 
1373                         // Set our internal fallback interesting things.
1374                         fallbackBuffer.InternalInitialize(byteStart, null);
1375                     }
1376 
1377                     charCount += fallbackBuffer.InternalFallback(byteBuffer, bytes);
1378 
1379                     lastChar = (char)0;
1380                 }
1381 
1382                 if (lastByte >= 0)
1383                 {
1384                     if (fallbackBuffer == null)
1385                     {
1386                         if (decoder == null)
1387                             fallbackBuffer = this.decoderFallback.CreateFallbackBuffer();
1388                         else
1389                             fallbackBuffer = decoder.FallbackBuffer;
1390 
1391                         // Set our internal fallback interesting things.
1392                         fallbackBuffer.InternalInitialize(byteStart, null);
1393                     }
1394 
1395                     // No hanging odd bytes allowed if must flush
1396                     charCount += fallbackBuffer.InternalFallback(new byte[] { unchecked((byte)lastByte) }, bytes);
1397                     lastByte = -1;
1398                 }
1399             }
1400 
1401             // If we had a high surrogate left over, we can't count it
1402             if (lastChar > 0)
1403                 charCount--;
1404 
1405             // Shouldn't have anything in fallback buffer for GetCharCount
1406             // (don't have to check _throwOnOverflow for count)
1407             Debug.Assert(fallbackBuffer == null || fallbackBuffer.Remaining == 0,
1408                 "[UnicodeEncoding.GetCharCount]Expected empty fallback buffer at end");
1409 
1410             return charCount;
1411         }
1412 
GetChars(byte* bytes, int byteCount, char* chars, int charCount, DecoderNLS baseDecoder)1413         internal override unsafe int GetChars(byte* bytes, int byteCount,
1414                                                 char* chars, int charCount, DecoderNLS baseDecoder)
1415         {
1416             Debug.Assert(chars != null, "[UnicodeEncoding.GetChars]chars!=null");
1417             Debug.Assert(byteCount >= 0, "[UnicodeEncoding.GetChars]byteCount >=0");
1418             Debug.Assert(charCount >= 0, "[UnicodeEncoding.GetChars]charCount >=0");
1419             Debug.Assert(bytes != null, "[UnicodeEncoding.GetChars]bytes!=null");
1420 
1421             UnicodeEncoding.Decoder decoder = (UnicodeEncoding.Decoder)baseDecoder;
1422 
1423             // Need last vars
1424             int lastByte = -1;
1425             char lastChar = (char)0;
1426 
1427             // Get our decoder (but don't clear it yet)
1428             if (decoder != null)
1429             {
1430                 lastByte = decoder.lastByte;
1431                 lastChar = decoder.lastChar;
1432 
1433                 // Shouldn't have anything in fallback buffer for GetChars
1434                 // (don't have to check _throwOnOverflow for chars)
1435                 Debug.Assert(!decoder.InternalHasFallbackBuffer || decoder.FallbackBuffer.Remaining == 0,
1436                     "[UnicodeEncoding.GetChars]Expected empty fallback buffer at start");
1437             }
1438 
1439             // For fallback we may need a fallback buffer
1440             DecoderFallbackBuffer fallbackBuffer = null;
1441             char* charsForFallback;
1442 
1443             byte* byteEnd = bytes + byteCount;
1444             char* charEnd = chars + charCount;
1445             byte* byteStart = bytes;
1446             char* charStart = chars;
1447 
1448             while (bytes < byteEnd)
1449             {
1450                 // If we're aligned then maybe we can do it fast
1451                 // That'll hurt if we're unaligned because we'll always test but never be aligned
1452 #if !NO_FAST_UNICODE_LOOP
1453 #if BIGENDIAN
1454                 if (bigEndian &&
1455 #else // BIGENDIAN
1456                 if (!bigEndian &&
1457 #endif // BIGENDIAN
1458 #if BIT64 // win64 has to be long aligned
1459                     (unchecked((long)chars) & 7) == 0 && (unchecked((long)bytes) & 7) == 0 &&
1460 #else
1461                     (unchecked((int)chars) & 3) == 0 && (unchecked((int)bytes) & 3) == 0 &&
1462 #endif // BIT64
1463                     lastByte == -1 && lastChar == 0)
1464                 {
1465                     // Need -1 to check 2 at a time.  If we have an even #, longChars will go
1466                     // from longEnd - 1/2 long to longEnd + 1/2 long.  If we're odd, longChars
1467                     // will go from longEnd - 1 long to longEnd. (Might not get to use this)
1468                     // We can only go iCount units (limited by shorter of char or byte buffers.
1469                     ulong* longEnd = (ulong*)(bytes - 7 +
1470                                                 (((byteEnd - bytes) >> 1 < charEnd - chars) ?
1471                                                   (byteEnd - bytes) : (charEnd - chars) << 1));
1472 
1473                     // Need new char* so we can check 4 at a time
1474                     ulong* longBytes = (ulong*)bytes;
1475                     ulong* longChars = (ulong*)chars;
1476 
1477                     while (longBytes < longEnd)
1478                     {
1479                         // See if we potentially have surrogates (0x8000 bit set)
1480                         // (We're either big endian on a big endian machine or little endian on
1481                         // a little endian machine so that'll work)
1482                         if ((0x8000800080008000 & *longBytes) != 0)
1483                         {
1484                             // See if any of these are high or low surrogates (0xd800 - 0xdfff).  If the high
1485                             // 5 bits looks like 11011, then its a high or low surrogate.
1486                             // We do the & f800 to filter the 5 bits, then ^ d800 to ensure the 0 isn't set.
1487                             // Note that we expect BMP characters to be more common than surrogates
1488                             // & each char with 11111... then ^ with 11011.  Zeroes then indicate surrogates
1489                             ulong uTemp = (0xf800f800f800f800 & *longBytes) ^ 0xd800d800d800d800;
1490 
1491                             // Check each of the 4 chars.  0 for those 16 bits means it was a surrogate
1492                             // but no clue if they're high or low.
1493                             // If each of the 4 characters are non-zero, then none are surrogates.
1494                             if ((uTemp & 0xFFFF000000000000) == 0 ||
1495                                 (uTemp & 0x0000FFFF00000000) == 0 ||
1496                                 (uTemp & 0x00000000FFFF0000) == 0 ||
1497                                 (uTemp & 0x000000000000FFFF) == 0)
1498                             {
1499                                 // It has at least 1 surrogate, but we don't know if they're high or low surrogates,
1500                                 // or if there's 1 or 4 surrogates
1501 
1502                                 // If they happen to be high/low/high/low, we may as well continue.  Check the next
1503                                 // bit to see if its set (low) or not (high) in the right pattern
1504 #if BIGENDIAN
1505                                 if (((0xfc00fc00fc00fc00 & *longBytes) ^ 0xd800dc00d800dc00) != 0)
1506 #else
1507                                 if (((0xfc00fc00fc00fc00 & *longBytes) ^ 0xdc00d800dc00d800) != 0)
1508 #endif
1509                                 {
1510                                     // Either there weren't 4 surrogates, or the 0x0400 bit was set when a high
1511                                     // was hoped for or the 0x0400 bit wasn't set where a low was hoped for.
1512 
1513                                     // Drop out to the slow loop to resolve the surrogates
1514                                     break;
1515                                 }
1516                                 // else they are all surrogates in High/Low/High/Low order, so we can use them.
1517                             }
1518                             // else none are surrogates, so we can use them.
1519                         }
1520                         // else all < 0x8000 so we can use them
1521 
1522                         // We can use these 4 chars.
1523                         *longChars = *longBytes;
1524                         longBytes++;
1525                         longChars++;
1526                     }
1527 
1528                     chars = (char*)longChars;
1529                     bytes = (byte*)longBytes;
1530 
1531                     if (bytes >= byteEnd)
1532                         break;
1533                 }
1534 #endif // !NO_FAST_UNICODE_LOOP
1535 
1536                 // Get 1st byte
1537                 if (lastByte < 0)
1538                 {
1539                     lastByte = *bytes++;
1540                     continue;
1541                 }
1542 
1543                 // Get full char
1544                 char ch;
1545                 if (bigEndian)
1546                 {
1547                     ch = (char)(lastByte << 8 | *(bytes++));
1548                 }
1549                 else
1550                 {
1551                     ch = (char)(*(bytes++) << 8 | lastByte);
1552                 }
1553                 lastByte = -1;
1554 
1555                 // See if the char's valid
1556                 if (ch >= 0xd800 && ch <= 0xdfff)
1557                 {
1558                     // Was it a high surrogate?
1559                     if (ch <= 0xdbff)
1560                     {
1561                         // Its a high surrogate, if we had one then do fallback for previous one
1562                         if (lastChar > 0)
1563                         {
1564                             // Get fallback for previous high surrogate
1565                             // Note we have to reconstruct bytes because some may have been in decoder
1566                             byte[] byteBuffer = null;
1567                             if (bigEndian)
1568                             {
1569                                 byteBuffer = new byte[]
1570                                     { unchecked((byte)(lastChar >> 8)), unchecked((byte)lastChar) };
1571                             }
1572                             else
1573                             {
1574                                 byteBuffer = new byte[]
1575                                     { unchecked((byte)lastChar), unchecked((byte)(lastChar >> 8)) };
1576                             }
1577 
1578                             if (fallbackBuffer == null)
1579                             {
1580                                 if (decoder == null)
1581                                     fallbackBuffer = this.decoderFallback.CreateFallbackBuffer();
1582                                 else
1583                                     fallbackBuffer = decoder.FallbackBuffer;
1584 
1585                                 // Set our internal fallback interesting things.
1586                                 fallbackBuffer.InternalInitialize(byteStart, charEnd);
1587                             }
1588 
1589                             charsForFallback = chars; // Avoid passing chars by reference to allow it to be en-registered
1590                             bool fallbackResult = fallbackBuffer.InternalFallback(byteBuffer, bytes, ref charsForFallback);
1591                             chars = charsForFallback;
1592 
1593                             if (!fallbackResult)
1594                             {
1595                                 // couldn't fall back lonely surrogate
1596                                 // We either advanced bytes or chars should == charStart and throw below
1597                                 Debug.Assert(bytes >= byteStart + 2 || chars == charStart,
1598                                     "[UnicodeEncoding.GetChars]Expected bytes to have advanced or no output (bad surrogate)");
1599                                 bytes -= 2;                                       // didn't use these 2 bytes
1600                                 fallbackBuffer.InternalReset();
1601                                 ThrowCharsOverflow(decoder, chars == charStart);// Might throw, if no chars output
1602                                 break;                                          // couldn't fallback but didn't throw
1603                             }
1604                         }
1605 
1606                         // Ignore the previous high surrogate which fell back already,
1607                         // yet remember the current high surrogate for next time.
1608                         lastChar = ch;
1609                         continue;
1610                     }
1611 
1612                     // Its a low surrogate
1613                     if (lastChar == 0)
1614                     {
1615                         // Expected a previous high surrogate
1616                         // Get fallback for this low surrogate
1617                         // Note we have to reconstruct bytes because some may have been in decoder
1618                         byte[] byteBuffer = null;
1619                         if (bigEndian)
1620                         {
1621                             byteBuffer = new byte[]
1622                                 { unchecked((byte)(ch >> 8)), unchecked((byte)ch) };
1623                         }
1624                         else
1625                         {
1626                             byteBuffer = new byte[]
1627                                 { unchecked((byte)ch), unchecked((byte)(ch >> 8)) };
1628                         }
1629 
1630                         if (fallbackBuffer == null)
1631                         {
1632                             if (decoder == null)
1633                                 fallbackBuffer = this.decoderFallback.CreateFallbackBuffer();
1634                             else
1635                                 fallbackBuffer = decoder.FallbackBuffer;
1636 
1637                             // Set our internal fallback interesting things.
1638                             fallbackBuffer.InternalInitialize(byteStart, charEnd);
1639                         }
1640 
1641                         charsForFallback = chars; // Avoid passing chars by reference to allow it to be en-registered
1642                         bool fallbackResult = fallbackBuffer.InternalFallback(byteBuffer, bytes, ref charsForFallback);
1643                         chars = charsForFallback;
1644 
1645                         if (!fallbackResult)
1646                         {
1647                             // couldn't fall back lonely surrogate
1648                             // We either advanced bytes or chars should == charStart and throw below
1649                             Debug.Assert(bytes >= byteStart + 2 || chars == charStart,
1650                                 "[UnicodeEncoding.GetChars]Expected bytes to have advanced or no output (lonely surrogate)");
1651                             bytes -= 2;                                       // didn't use these 2 bytes
1652                             fallbackBuffer.InternalReset();
1653                             ThrowCharsOverflow(decoder, chars == charStart);// Might throw, if no chars output
1654                             break;                                          // couldn't fallback but didn't throw
1655                         }
1656 
1657                         // Didn't throw, ignore this one (we already did its fallback)
1658                         continue;
1659                     }
1660 
1661                     // Valid surrogate pair, add our lastChar (will need 2 chars)
1662                     if (chars >= charEnd - 1)
1663                     {
1664                         // couldn't find room for this surrogate pair
1665                         // We either advanced bytes or chars should == charStart and throw below
1666                         Debug.Assert(bytes >= byteStart + 2 || chars == charStart,
1667                             "[UnicodeEncoding.GetChars]Expected bytes to have advanced or no output (surrogate pair)");
1668                         bytes -= 2;                                       // didn't use these 2 bytes
1669                         ThrowCharsOverflow(decoder, chars == charStart);// Might throw, if no chars output
1670                         // Leave lastChar for next call to Convert()
1671                         break;                                          // couldn't fallback but didn't throw
1672                     }
1673 
1674                     *chars++ = lastChar;
1675                     lastChar = (char)0;
1676                 }
1677                 else if (lastChar > 0)
1678                 {
1679                     // Had a high surrogate, expected a low surrogate, fall back the high surrogate.
1680                     byte[] byteBuffer = null;
1681                     if (bigEndian)
1682                     {
1683                         byteBuffer = new byte[]
1684                             { unchecked((byte)(lastChar >> 8)), unchecked((byte)lastChar) };
1685                     }
1686                     else
1687                     {
1688                         byteBuffer = new byte[]
1689                             { unchecked((byte)lastChar), unchecked((byte)(lastChar >> 8)) };
1690                     }
1691 
1692                     if (fallbackBuffer == null)
1693                     {
1694                         if (decoder == null)
1695                             fallbackBuffer = this.decoderFallback.CreateFallbackBuffer();
1696                         else
1697                             fallbackBuffer = decoder.FallbackBuffer;
1698 
1699                         // Set our internal fallback interesting things.
1700                         fallbackBuffer.InternalInitialize(byteStart, charEnd);
1701                     }
1702 
1703                     charsForFallback = chars; // Avoid passing chars by reference to allow it to be en-registered
1704                     bool fallbackResult = fallbackBuffer.InternalFallback(byteBuffer, bytes, ref charsForFallback);
1705                     chars = charsForFallback;
1706 
1707                     if (!fallbackResult)
1708                     {
1709                         // couldn't fall back high surrogate, or char that would be next
1710                         // We either advanced bytes or chars should == charStart and throw below
1711                         Debug.Assert(bytes >= byteStart + 2 || chars == charStart,
1712                             "[UnicodeEncoding.GetChars]Expected bytes to have advanced or no output (no low surrogate)");
1713                         bytes -= 2;                                       // didn't use these 2 bytes
1714                         fallbackBuffer.InternalReset();
1715                         ThrowCharsOverflow(decoder, chars == charStart);// Might throw, if no chars output
1716                         break;                                          // couldn't fallback but didn't throw
1717                     }
1718 
1719                     // Not left over now, clear previous high surrogate and continue to add current char
1720                     lastChar = (char)0;
1721                 }
1722 
1723                 // Valid char, room for it?
1724                 if (chars >= charEnd)
1725                 {
1726                     // 2 bytes couldn't fall back
1727                     // We either advanced bytes or chars should == charStart and throw below
1728                     Debug.Assert(bytes >= byteStart + 2 || chars == charStart,
1729                         "[UnicodeEncoding.GetChars]Expected bytes to have advanced or no output (normal)");
1730                     bytes -= 2;                                       // didn't use these bytes
1731                     ThrowCharsOverflow(decoder, chars == charStart);// Might throw, if no chars output
1732                     break;                                          // couldn't fallback but didn't throw
1733                 }
1734 
1735                 // add it
1736                 *chars++ = ch;
1737             }
1738 
1739             // Remember our decoder if we must
1740             if (decoder == null || decoder.MustFlush)
1741             {
1742                 if (lastChar > 0)
1743                 {
1744                     // No hanging high surrogates allowed, do fallback and remove count for it
1745                     byte[] byteBuffer = null;
1746                     if (bigEndian)
1747                     {
1748                         byteBuffer = new byte[]
1749                             { unchecked((byte)(lastChar >> 8)), unchecked((byte)lastChar) };
1750                     }
1751                     else
1752                     {
1753                         byteBuffer = new byte[]
1754                             { unchecked((byte)lastChar), unchecked((byte)(lastChar >> 8)) };
1755                     }
1756 
1757                     if (fallbackBuffer == null)
1758                     {
1759                         if (decoder == null)
1760                             fallbackBuffer = this.decoderFallback.CreateFallbackBuffer();
1761                         else
1762                             fallbackBuffer = decoder.FallbackBuffer;
1763 
1764                         // Set our internal fallback interesting things.
1765                         fallbackBuffer.InternalInitialize(byteStart, charEnd);
1766                     }
1767 
1768                     charsForFallback = chars; // Avoid passing chars by reference to allow it to be en-registered
1769                     bool fallbackResult = fallbackBuffer.InternalFallback(byteBuffer, bytes, ref charsForFallback);
1770                     chars = charsForFallback;
1771 
1772                     if (!fallbackResult)
1773                     {
1774                         // 2 bytes couldn't fall back
1775                         // We either advanced bytes or chars should == charStart and throw below
1776                         Debug.Assert(bytes >= byteStart + 2 || chars == charStart,
1777                             "[UnicodeEncoding.GetChars]Expected bytes to have advanced or no output (decoder)");
1778                         bytes -= 2;                                       // didn't use these bytes
1779                         if (lastByte >= 0)
1780                             bytes--;                                    // had an extra last byte hanging around
1781                         fallbackBuffer.InternalReset();
1782                         ThrowCharsOverflow(decoder, chars == charStart);// Might throw, if no chars output
1783                         // We'll remember these in our decoder though
1784                         bytes += 2;
1785                         if (lastByte >= 0)
1786                             bytes++;
1787                         goto End;
1788                     }
1789 
1790                     // done with this one
1791                     lastChar = (char)0;
1792                 }
1793 
1794                 if (lastByte >= 0)
1795                 {
1796                     if (fallbackBuffer == null)
1797                     {
1798                         if (decoder == null)
1799                             fallbackBuffer = this.decoderFallback.CreateFallbackBuffer();
1800                         else
1801                             fallbackBuffer = decoder.FallbackBuffer;
1802 
1803                         // Set our internal fallback interesting things.
1804                         fallbackBuffer.InternalInitialize(byteStart, charEnd);
1805                     }
1806 
1807                     // No hanging odd bytes allowed if must flush
1808                     charsForFallback = chars; // Avoid passing chars by reference to allow it to be en-registered
1809                     bool fallbackResult = fallbackBuffer.InternalFallback(new byte[] { unchecked((byte)lastByte) }, bytes, ref charsForFallback);
1810                     chars = charsForFallback;
1811 
1812                     if (!fallbackResult)
1813                     {
1814                         // odd byte couldn't fall back
1815                         bytes--;                                        // didn't use this byte
1816                         fallbackBuffer.InternalReset();
1817                         ThrowCharsOverflow(decoder, chars == charStart);// Might throw, if no chars output
1818                         // didn't throw, but we'll remember it in the decoder
1819                         bytes++;
1820                         goto End;
1821                     }
1822 
1823                     // Didn't fail, clear buffer
1824                     lastByte = -1;
1825                 }
1826             }
1827 
1828         End:
1829 
1830             // Remember our decoder if we must
1831             if (decoder != null)
1832             {
1833                 Debug.Assert((decoder.MustFlush == false) || ((lastChar == (char)0) && (lastByte == -1)),
1834                     "[UnicodeEncoding.GetChars] Expected no left over chars or bytes if flushing"
1835                     //                    + " " + ((int)lastChar).ToString("X4") + " " + lastByte.ToString("X2")
1836                     );
1837 
1838                 decoder._bytesUsed = (int)(bytes - byteStart);
1839                 decoder.lastChar = lastChar;
1840                 decoder.lastByte = lastByte;
1841             }
1842 
1843             // Used to do this the old way
1844             // System.IO.__UnmanagedMemoryStream.memcpyimpl((byte*)chars, bytes, byteCount);
1845 
1846             // Shouldn't have anything in fallback buffer for GetChars
1847             // (don't have to check _throwOnOverflow for count or chars)
1848             Debug.Assert(fallbackBuffer == null || fallbackBuffer.Remaining == 0,
1849                 "[UnicodeEncoding.GetChars]Expected empty fallback buffer at end");
1850 
1851             return (int)(chars - charStart);
1852         }
1853 
1854 
GetEncoder()1855         public override System.Text.Encoder GetEncoder()
1856         {
1857             return new EncoderNLS(this);
1858         }
1859 
1860 
GetDecoder()1861         public override System.Text.Decoder GetDecoder()
1862         {
1863             return new UnicodeEncoding.Decoder(this);
1864         }
1865 
1866 
GetPreamble()1867         public override byte[] GetPreamble()
1868         {
1869             if (byteOrderMark)
1870             {
1871                 // Note - we must allocate new byte[]'s here to prevent someone
1872                 // from modifying a cached byte[].
1873                 if (bigEndian)
1874                     return new byte[2] { 0xfe, 0xff };
1875                 else
1876                     return new byte[2] { 0xff, 0xfe };
1877             }
1878             return Array.Empty<Byte>();
1879         }
1880 
1881         public override ReadOnlySpan<byte> Preamble =>
1882             GetType() != typeof(UnicodeEncoding) ? GetPreamble() : // in case a derived UnicodeEncoding overrode GetPreamble
1883             byteOrderMark ? (bigEndian ? s_bigEndianPreamble : s_littleEndianPreamble) :
1884             Array.Empty<byte>();
1885 
GetMaxByteCount(int charCount)1886         public override int GetMaxByteCount(int charCount)
1887         {
1888             if (charCount < 0)
1889                 throw new ArgumentOutOfRangeException(nameof(charCount),
1890                      SR.ArgumentOutOfRange_NeedNonNegNum);
1891 
1892             // Characters would be # of characters + 1 in case left over high surrogate is ? * max fallback
1893             long byteCount = (long)charCount + 1;
1894 
1895             if (EncoderFallback.MaxCharCount > 1)
1896                 byteCount *= EncoderFallback.MaxCharCount;
1897 
1898             // 2 bytes per char
1899             byteCount <<= 1;
1900 
1901             if (byteCount > 0x7fffffff)
1902                 throw new ArgumentOutOfRangeException(nameof(charCount), SR.ArgumentOutOfRange_GetByteCountOverflow);
1903 
1904             return (int)byteCount;
1905         }
1906 
1907 
GetMaxCharCount(int byteCount)1908         public override int GetMaxCharCount(int byteCount)
1909         {
1910             if (byteCount < 0)
1911                 throw new ArgumentOutOfRangeException(nameof(byteCount),
1912                      SR.ArgumentOutOfRange_NeedNonNegNum);
1913 
1914             // long because byteCount could be biggest int.
1915             // 1 char per 2 bytes.  Round up in case 1 left over in decoder.
1916             // Round up using &1 in case byteCount is max size
1917             // Might also need an extra 1 if there's a left over high surrogate in the decoder.
1918             long charCount = (long)(byteCount >> 1) + (byteCount & 1) + 1;
1919 
1920             // Don't forget fallback (in case they have a bunch of lonely surrogates or something bizarre like that)
1921             if (DecoderFallback.MaxCharCount > 1)
1922                 charCount *= DecoderFallback.MaxCharCount;
1923 
1924             if (charCount > 0x7fffffff)
1925                 throw new ArgumentOutOfRangeException(nameof(byteCount), SR.ArgumentOutOfRange_GetCharCountOverflow);
1926 
1927             return (int)charCount;
1928         }
1929 
1930 
Equals(Object value)1931         public override bool Equals(Object value)
1932         {
1933             UnicodeEncoding that = value as UnicodeEncoding;
1934             if (that != null)
1935             {
1936                 //
1937                 // Big Endian Unicode has different code page (1201) than small Endian one (1200),
1938                 // so we still have to check _codePage here.
1939                 //
1940                 return (CodePage == that.CodePage) &&
1941                         byteOrderMark == that.byteOrderMark &&
1942                         //                        isThrowException == that.isThrowException &&  // Same as Encoder/Decoder being exception fallbacks
1943                         bigEndian == that.bigEndian &&
1944                        (EncoderFallback.Equals(that.EncoderFallback)) &&
1945                        (DecoderFallback.Equals(that.DecoderFallback));
1946             }
1947             return (false);
1948         }
1949 
GetHashCode()1950         public override int GetHashCode()
1951         {
1952             return CodePage + this.EncoderFallback.GetHashCode() + this.DecoderFallback.GetHashCode() +
1953                    (byteOrderMark ? 4 : 0) + (bigEndian ? 8 : 0);
1954         }
1955 
1956         private sealed class Decoder : System.Text.DecoderNLS
1957         {
1958             internal int lastByte = -1;
1959             internal char lastChar = '\0';
1960 
Decoder(UnicodeEncoding encoding)1961             public Decoder(UnicodeEncoding encoding) : base(encoding)
1962             {
1963                 // base calls reset
1964             }
1965 
Reset()1966             public override void Reset()
1967             {
1968                 lastByte = -1;
1969                 lastChar = '\0';
1970                 if (_fallbackBuffer != null)
1971                     _fallbackBuffer.Reset();
1972             }
1973 
1974             // Anything left in our decoder?
1975             internal override bool HasState
1976             {
1977                 get
1978                 {
1979                     return (this.lastByte != -1 || this.lastChar != '\0');
1980                 }
1981             }
1982         }
1983     }
1984 }
1985 
1986