1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
4 
5 //
6 // Don't override IsAlwaysNormalized because it is just a Unicode Transformation and could be confused.
7 //
8 
9 using System;
10 using System.Diagnostics;
11 using System.Runtime.InteropServices;
12 
13 namespace System.Text
14 {
15     public class UTF7Encoding : Encoding
16     {
17         private const String base64Chars =
18             "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
19         //   0123456789111111111122222222223333333333444444444455555555556666
20         //             012345678901234567890123456789012345678901234567890123
21 
22         // These are the characters that can be directly encoded in UTF7.
23         private const String directChars =
24             "\t\n\r '(),-./0123456789:?ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
25 
26         // These are the characters that can be optionally directly encoded in UTF7.
27         private const String optionalChars =
28             "!\"#$%&*;<=>@[]^_`{|}";
29 
30         // Used by Encoding.UTF7 for lazy initialization
31         // The initialization code will not be run until a static member of the class is referenced
32         internal static readonly UTF7Encoding s_default = new UTF7Encoding();
33 
34         // The set of base 64 characters.
35         private byte[] _base64Bytes;
36         // The decoded bits for every base64 values. This array has a size of 128 elements.
37         // The index is the code point value of the base 64 characters.  The value is -1 if
38         // the code point is not a valid base 64 character.  Otherwise, the value is a value
39         // from 0 ~ 63.
40         private sbyte[] _base64Values;
41         // The array to decide if a Unicode code point below 0x80 can be directly encoded in UTF7.
42         // This array has a size of 128.
43         private bool[] _directEncode;
44 
45         private bool _allowOptionals;
46 
47         private const int UTF7_CODEPAGE = 65000;
48 
49 
UTF7Encoding()50         public UTF7Encoding()
51             : this(false)
52         {
53         }
54 
UTF7Encoding(bool allowOptionals)55         public UTF7Encoding(bool allowOptionals)
56             : base(UTF7_CODEPAGE) //Set the data item.
57         {
58             // Allowing optionals?
59             _allowOptionals = allowOptionals;
60 
61             // Make our tables
62             MakeTables();
63         }
64 
MakeTables()65         private void MakeTables()
66         {
67             // Build our tables
68             _base64Bytes = new byte[64];
69             for (int i = 0; i < 64; i++) _base64Bytes[i] = (byte)base64Chars[i];
70             _base64Values = new sbyte[128];
71             for (int i = 0; i < 128; i++) _base64Values[i] = -1;
72             for (int i = 0; i < 64; i++) _base64Values[_base64Bytes[i]] = (sbyte)i;
73             _directEncode = new bool[128];
74             int count = directChars.Length;
75             for (int i = 0; i < count; i++)
76             {
77                 _directEncode[directChars[i]] = true;
78             }
79 
80             if (_allowOptionals)
81             {
82                 count = optionalChars.Length;
83                 for (int i = 0; i < count; i++)
84                 {
85                     _directEncode[optionalChars[i]] = true;
86                 }
87             }
88         }
89 
90         // We go ahead and set this because Encoding expects it, however nothing can fall back in UTF7.
SetDefaultFallbacks()91         internal override void SetDefaultFallbacks()
92         {
93             // UTF7 had an odd decoderFallback behavior, and the Encoder fallback
94             // is irrelevant because we encode surrogates individually and never check for unmatched ones
95             // (so nothing can fallback during encoding)
96             this.encoderFallback = new EncoderReplacementFallback(String.Empty);
97             this.decoderFallback = new DecoderUTF7Fallback();
98         }
99 
Equals(Object value)100         public override bool Equals(Object value)
101         {
102             UTF7Encoding that = value as UTF7Encoding;
103             if (that != null)
104             {
105                 return (_allowOptionals == that._allowOptionals) &&
106                        (EncoderFallback.Equals(that.EncoderFallback)) &&
107                        (DecoderFallback.Equals(that.DecoderFallback));
108             }
109             return (false);
110         }
111 
112         // Compared to all the other encodings, variations of UTF7 are unlikely
113 
GetHashCode()114         public override int GetHashCode()
115         {
116             return this.CodePage + this.EncoderFallback.GetHashCode() + this.DecoderFallback.GetHashCode();
117         }
118 
119         // The following methods are copied from EncodingNLS.cs.
120         // Unfortunately EncodingNLS.cs is internal and we're public, so we have to re-implement them here.
121         // These should be kept in sync for the following classes:
122         // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
123 
124         // Returns the number of bytes required to encode a range of characters in
125         // a character array.
126         //
127         // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
128         // So if you fix this, fix the others.  Currently those include:
129         // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
130         // parent method is safe
131 
GetByteCount(char[] chars, int index, int count)132         public override unsafe int GetByteCount(char[] chars, int index, int count)
133         {
134             // Validate input parameters
135             if (chars == null)
136                 throw new ArgumentNullException("chars", SR.ArgumentNull_Array);
137 
138             if (index < 0 || count < 0)
139                 throw new ArgumentOutOfRangeException((index < 0 ? "index" : "count"), SR.ArgumentOutOfRange_NeedNonNegNum);
140 
141             if (chars.Length - index < count)
142                 throw new ArgumentOutOfRangeException("chars", SR.ArgumentOutOfRange_IndexCountBuffer);
143 
144             // If no input, return 0, avoid fixed empty array problem
145             if (count == 0)
146                 return 0;
147 
148             // Just call the pointer version
149             fixed (char* pChars = chars)
150                 return GetByteCount(pChars + index, count, null);
151         }
152 
153         // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
154         // So if you fix this, fix the others.  Currently those include:
155         // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
156         // parent method is safe
157 
GetByteCount(string s)158         public override unsafe int GetByteCount(string s)
159         {
160             // Validate input
161             if (s==null)
162                 throw new ArgumentNullException("s");
163 
164             fixed (char* pChars = s)
165                 return GetByteCount(pChars, s.Length, null);
166         }
167 
168         // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
169         // So if you fix this, fix the others.  Currently those include:
170         // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
171 
172         [CLSCompliant(false)]
GetByteCount(char* chars, int count)173         public override unsafe int GetByteCount(char* chars, int count)
174         {
175             // Validate Parameters
176             if (chars == null)
177                 throw new ArgumentNullException("chars", SR.ArgumentNull_Array);
178 
179             if (count < 0)
180                 throw new ArgumentOutOfRangeException("count", SR.ArgumentOutOfRange_NeedNonNegNum);
181 
182             // Call it with empty encoder
183             return GetByteCount(chars, count, null);
184         }
185 
186         // Parent method is safe.
187         // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
188         // So if you fix this, fix the others.  Currently those include:
189         // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
190 
GetBytes(string s, int charIndex, int charCount, byte[] bytes, int byteIndex)191         public override unsafe int GetBytes(string s, int charIndex, int charCount,
192                                               byte[] bytes, int byteIndex)
193         {
194             if (s == null || bytes == null)
195                 throw new ArgumentNullException((s == null ? "s" : "bytes"), SR.ArgumentNull_Array);
196 
197             if (charIndex < 0 || charCount < 0)
198                 throw new ArgumentOutOfRangeException((charIndex < 0 ? "charIndex" : "charCount"), SR.ArgumentOutOfRange_NeedNonNegNum);
199 
200             if (s.Length - charIndex < charCount)
201                 throw new ArgumentOutOfRangeException("s", SR.ArgumentOutOfRange_IndexCount);
202 
203             if (byteIndex < 0 || byteIndex > bytes.Length)
204                 throw new ArgumentOutOfRangeException("byteIndex", SR.ArgumentOutOfRange_Index);
205 
206             int byteCount = bytes.Length - byteIndex;
207 
208             fixed (char* pChars = s) fixed (byte* pBytes = &MemoryMarshal.GetReference((Span<byte>)bytes))
209                 return GetBytes(pChars + charIndex, charCount, pBytes + byteIndex, byteCount, null);
210         }
211 
212         // Encodes a range of characters in a character array into a range of bytes
213         // in a byte array. An exception occurs if the byte array is not large
214         // enough to hold the complete encoding of the characters. The
215         // GetByteCount method can be used to determine the exact number of
216         // bytes that will be produced for a given range of characters.
217         // Alternatively, the GetMaxByteCount method can be used to
218         // determine the maximum number of bytes that will be produced for a given
219         // number of characters, regardless of the actual character values.
220         //
221         // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
222         // So if you fix this, fix the others.  Currently those include:
223         // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
224         // parent method is safe
225 
GetBytes(char[] chars, int charIndex, int charCount, byte[] bytes, int byteIndex)226         public override unsafe int GetBytes(char[] chars, int charIndex, int charCount,
227                                                byte[] bytes, int byteIndex)
228         {
229             // Validate parameters
230             if (chars == null || bytes == null)
231                 throw new ArgumentNullException((chars == null ? "chars" : "bytes"), SR.ArgumentNull_Array);
232 
233             if (charIndex < 0 || charCount < 0)
234                 throw new ArgumentOutOfRangeException((charIndex < 0 ? "charIndex" : "charCount"), SR.ArgumentOutOfRange_NeedNonNegNum);
235 
236             if (chars.Length - charIndex < charCount)
237                 throw new ArgumentOutOfRangeException("chars", SR.ArgumentOutOfRange_IndexCountBuffer);
238 
239             if (byteIndex < 0 || byteIndex > bytes.Length)
240                 throw new ArgumentOutOfRangeException("byteIndex", SR.ArgumentOutOfRange_Index);
241 
242             // If nothing to encode return 0, avoid fixed problem
243             if (charCount == 0)
244                 return 0;
245 
246             // Just call pointer version
247             int byteCount = bytes.Length - byteIndex;
248 
249             fixed (char* pChars = chars) fixed (byte* pBytes = &MemoryMarshal.GetReference((Span<byte>)bytes))
250                 // Remember that byteCount is # to decode, not size of array.
251                 return GetBytes(pChars + charIndex, charCount, pBytes + byteIndex, byteCount, null);
252         }
253 
254         // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
255         // So if you fix this, fix the others.  Currently those include:
256         // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
257 
258         [CLSCompliant(false)]
GetBytes(char* chars, int charCount, byte* bytes, int byteCount)259         public override unsafe int GetBytes(char* chars, int charCount, byte* bytes, int byteCount)
260         {
261             // Validate Parameters
262             if (bytes == null || chars == null)
263                 throw new ArgumentNullException(bytes == null ? "bytes" : "chars", SR.ArgumentNull_Array);
264 
265             if (charCount < 0 || byteCount < 0)
266                 throw new ArgumentOutOfRangeException((charCount < 0 ? "charCount" : "byteCount"), SR.ArgumentOutOfRange_NeedNonNegNum);
267 
268             return GetBytes(chars, charCount, bytes, byteCount, null);
269         }
270 
271         // Returns the number of characters produced by decoding a range of bytes
272         // in a byte array.
273         //
274         // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
275         // So if you fix this, fix the others.  Currently those include:
276         // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
277         // parent method is safe
278 
GetCharCount(byte[] bytes, int index, int count)279         public override unsafe int GetCharCount(byte[] bytes, int index, int count)
280         {
281             // Validate Parameters
282             if (bytes == null)
283                 throw new ArgumentNullException("bytes", SR.ArgumentNull_Array);
284 
285             if (index < 0 || count < 0)
286                 throw new ArgumentOutOfRangeException((index < 0 ? "index" : "count"), SR.ArgumentOutOfRange_NeedNonNegNum);
287 
288             if (bytes.Length - index < count)
289                 throw new ArgumentOutOfRangeException("bytes", SR.ArgumentOutOfRange_IndexCountBuffer);
290 
291             // If no input just return 0, fixed doesn't like 0 length arrays.
292             if (count == 0)
293                 return 0;
294 
295             // Just call pointer version
296             fixed (byte* pBytes = bytes)
297                 return GetCharCount(pBytes + index, count, null);
298         }
299 
300         // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
301         // So if you fix this, fix the others.  Currently those include:
302         // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
303 
304         [CLSCompliant(false)]
GetCharCount(byte* bytes, int count)305         public override unsafe int GetCharCount(byte* bytes, int count)
306         {
307             // Validate Parameters
308             if (bytes == null)
309                 throw new ArgumentNullException("bytes", SR.ArgumentNull_Array);
310 
311             if (count < 0)
312                 throw new ArgumentOutOfRangeException("count", SR.ArgumentOutOfRange_NeedNonNegNum);
313 
314             return GetCharCount(bytes, count, null);
315         }
316 
317         // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
318         // So if you fix this, fix the others.  Currently those include:
319         // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
320         // parent method is safe
321 
GetChars(byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex)322         public override unsafe int GetChars(byte[] bytes, int byteIndex, int byteCount,
323                                               char[] chars, int charIndex)
324         {
325             // Validate Parameters
326             if (bytes == null || chars == null)
327                 throw new ArgumentNullException(bytes == null ? "bytes" : "chars", SR.ArgumentNull_Array);
328 
329             if (byteIndex < 0 || byteCount < 0)
330                 throw new ArgumentOutOfRangeException((byteIndex < 0 ? "byteIndex" : "byteCount"), SR.ArgumentOutOfRange_NeedNonNegNum);
331 
332             if ( bytes.Length - byteIndex < byteCount)
333                 throw new ArgumentOutOfRangeException("bytes", SR.ArgumentOutOfRange_IndexCountBuffer);
334 
335             if (charIndex < 0 || charIndex > chars.Length)
336                 throw new ArgumentOutOfRangeException("charIndex", SR.ArgumentOutOfRange_Index);
337 
338             // If no input, return 0 & avoid fixed problem
339             if (byteCount == 0)
340                 return 0;
341 
342             // Just call pointer version
343             int charCount = chars.Length - charIndex;
344 
345             fixed (byte* pBytes = bytes) fixed (char* pChars = &MemoryMarshal.GetReference((Span<char>)chars))
346                 // Remember that charCount is # to decode, not size of array
347                 return GetChars(pBytes + byteIndex, byteCount, pChars + charIndex, charCount, null);
348         }
349 
350         // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
351         // So if you fix this, fix the others.  Currently those include:
352         // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
353 
354         [CLSCompliant(false)]
GetChars(byte* bytes, int byteCount, char* chars, int charCount)355         public unsafe override int GetChars(byte* bytes, int byteCount, char* chars, int charCount)
356         {
357             // Validate Parameters
358             if (bytes == null || chars == null)
359                 throw new ArgumentNullException(bytes == null ? "bytes" : "chars", SR.ArgumentNull_Array);
360 
361             if (charCount < 0 || byteCount < 0)
362                 throw new ArgumentOutOfRangeException((charCount < 0 ? "charCount" : "byteCount"), SR.ArgumentOutOfRange_NeedNonNegNum);
363 
364             return GetChars(bytes, byteCount, chars, charCount, null);
365         }
366 
367         // Returns a string containing the decoded representation of a range of
368         // bytes in a byte array.
369         //
370         // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS)
371         // So if you fix this, fix the others.  Currently those include:
372         // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding
373         // parent method is safe
374 
GetString(byte[] bytes, int index, int count)375         public override unsafe String GetString(byte[] bytes, int index, int count)
376         {
377             // Validate Parameters
378             if (bytes == null)
379                 throw new ArgumentNullException("bytes", SR.ArgumentNull_Array);
380 
381             if (index < 0 || count < 0)
382                 throw new ArgumentOutOfRangeException((index < 0 ? "index" : "count"), SR.ArgumentOutOfRange_NeedNonNegNum);
383 
384             if (bytes.Length - index < count)
385                 throw new ArgumentOutOfRangeException("bytes", SR.ArgumentOutOfRange_IndexCountBuffer);
386 
387             // Avoid problems with empty input buffer
388             if (count == 0) return String.Empty;
389 
390             fixed (byte* pBytes = bytes)
391                 return String.CreateStringFromEncoding(
392                     pBytes + index, count, this);
393         }
394 
395         //
396         // End of standard methods copied from EncodingNLS.cs
397         //
398 
GetByteCount(char* chars, int count, EncoderNLS baseEncoder)399         internal override unsafe int GetByteCount(char* chars, int count, EncoderNLS baseEncoder)
400         {
401             Debug.Assert(chars != null, "[UTF7Encoding.GetByteCount]chars!=null");
402             Debug.Assert(count >= 0, "[UTF7Encoding.GetByteCount]count >=0");
403 
404             // Just call GetBytes with bytes == null
405             return GetBytes(chars, count, null, 0, baseEncoder);
406         }
407 
GetBytes(char* chars, int charCount, byte* bytes, int byteCount, EncoderNLS baseEncoder)408         internal override unsafe int GetBytes(char* chars, int charCount,
409                                                 byte* bytes, int byteCount, EncoderNLS baseEncoder)
410         {
411             Debug.Assert(byteCount >= 0, "[UTF7Encoding.GetBytes]byteCount >=0");
412             Debug.Assert(chars != null, "[UTF7Encoding.GetBytes]chars!=null");
413             Debug.Assert(charCount >= 0, "[UTF7Encoding.GetBytes]charCount >=0");
414 
415             // Get encoder info
416             UTF7Encoding.Encoder encoder = (UTF7Encoding.Encoder)baseEncoder;
417 
418             // Default bits & count
419             int bits = 0;
420             int bitCount = -1;
421 
422             // prepare our helpers
423             Encoding.EncodingByteBuffer buffer = new Encoding.EncodingByteBuffer(
424                 this, encoder, bytes, byteCount, chars, charCount);
425 
426             if (encoder != null)
427             {
428                 bits = encoder.bits;
429                 bitCount = encoder.bitCount;
430 
431                 // May have had too many left over
432                 while (bitCount >= 6)
433                 {
434                     bitCount -= 6;
435                     // If we fail we'll never really have enough room
436                     if (!buffer.AddByte(_base64Bytes[(bits >> bitCount) & 0x3F]))
437                         ThrowBytesOverflow(encoder, buffer.Count == 0);
438                 }
439             }
440 
441             while (buffer.MoreData)
442             {
443                 char currentChar = buffer.GetNextChar();
444 
445                 if (currentChar < 0x80 && _directEncode[currentChar])
446                 {
447                     if (bitCount >= 0)
448                     {
449                         if (bitCount > 0)
450                         {
451                             // Try to add the next byte
452                             if (!buffer.AddByte(_base64Bytes[bits << 6 - bitCount & 0x3F]))
453                                 break;                                          // Stop here, didn't throw
454 
455                             bitCount = 0;
456                         }
457 
458                         // Need to get emit '-' and our char, 2 bytes total
459                         if (!buffer.AddByte((byte)'-'))
460                             break;                                          // Stop here, didn't throw
461 
462                         bitCount = -1;
463                     }
464 
465                     // Need to emit our char
466                     if (!buffer.AddByte((byte)currentChar))
467                         break;                                          // Stop here, didn't throw
468                 }
469                 else if (bitCount < 0 && currentChar == '+')
470                 {
471                     if (!buffer.AddByte((byte)'+', (byte)'-'))
472                         break;                                          // Stop here, didn't throw
473                 }
474                 else
475                 {
476                     if (bitCount < 0)
477                     {
478                         // Need to emit a + and 12 bits (3 bytes)
479                         // Only 12 of the 16 bits will be emitted this time, the other 4 wait 'til next time
480                         if (!buffer.AddByte((byte)'+'))
481                             break;                                          // Stop here, didn't throw
482 
483                         // We're now in bit mode, but haven't stored data yet
484                         bitCount = 0;
485                     }
486 
487                     // Add our bits
488                     bits = bits << 16 | currentChar;
489                     bitCount += 16;
490 
491                     while (bitCount >= 6)
492                     {
493                         bitCount -= 6;
494                         if (!buffer.AddByte(_base64Bytes[(bits >> bitCount) & 0x3F]))
495                         {
496                             bitCount += 6;                              // We didn't use these bits
497                             currentChar = buffer.GetNextChar();              // We're processing this char still, but AddByte
498                                                                              // --'d it when we ran out of space
499                             break;                                      // Stop here, not enough room for bytes
500                         }
501                     }
502 
503                     if (bitCount >= 6)
504                         break;                  // Didn't have room to encode enough bits
505                 }
506             }
507 
508             // Now if we have bits left over we have to encode them.
509             // MustFlush may have been cleared by encoding.ThrowBytesOverflow earlier if converting
510             if (bitCount >= 0 && (encoder == null || encoder.MustFlush))
511             {
512                 // Do we have bits we have to stick in?
513                 if (bitCount > 0)
514                 {
515                     if (buffer.AddByte(_base64Bytes[(bits << (6 - bitCount)) & 0x3F]))
516                     {
517                         // Emitted spare bits, 0 bits left
518                         bitCount = 0;
519                     }
520                 }
521 
522                 // If converting and failed bitCount above, then we'll fail this too
523                 if (buffer.AddByte((byte)'-'))
524                 {
525                     // turned off bit mode';
526                     bits = 0;
527                     bitCount = -1;
528                 }
529                 else
530                     // If not successful, convert will maintain state for next time, also
531                     // AddByte will have decremented our char count, however we need it to remain the same
532                     buffer.GetNextChar();
533             }
534 
535             // Do we have an encoder we're allowed to use?
536             // bytes == null if counting, so don't use encoder then
537             if (bytes != null && encoder != null)
538             {
539                 // We already cleared bits & bitcount for mustflush case
540                 encoder.bits = bits;
541                 encoder.bitCount = bitCount;
542                 encoder._charsUsed = buffer.CharsUsed;
543             }
544 
545             return buffer.Count;
546         }
547 
GetCharCount(byte* bytes, int count, DecoderNLS baseDecoder)548         internal override unsafe int GetCharCount(byte* bytes, int count, DecoderNLS baseDecoder)
549         {
550             Debug.Assert(count >= 0, "[UTF7Encoding.GetCharCount]count >=0");
551             Debug.Assert(bytes != null, "[UTF7Encoding.GetCharCount]bytes!=null");
552 
553             // Just call GetChars with null char* to do counting
554             return GetChars(bytes, count, null, 0, baseDecoder);
555         }
556 
GetChars(byte* bytes, int byteCount, char* chars, int charCount, DecoderNLS baseDecoder)557         internal override unsafe int GetChars(byte* bytes, int byteCount,
558                                                 char* chars, int charCount, DecoderNLS baseDecoder)
559         {
560             Debug.Assert(byteCount >= 0, "[UTF7Encoding.GetChars]byteCount >=0");
561             Debug.Assert(bytes != null, "[UTF7Encoding.GetChars]bytes!=null");
562             Debug.Assert(charCount >= 0, "[UTF7Encoding.GetChars]charCount >=0");
563 
564             // Might use a decoder
565             UTF7Encoding.Decoder decoder = (UTF7Encoding.Decoder)baseDecoder;
566 
567             // Get our output buffer info.
568             Encoding.EncodingCharBuffer buffer = new Encoding.EncodingCharBuffer(
569                 this, decoder, chars, charCount, bytes, byteCount);
570 
571             // Get decoder info
572             int bits = 0;
573             int bitCount = -1;
574             bool firstByte = false;
575             if (decoder != null)
576             {
577                 bits = decoder.bits;
578                 bitCount = decoder.bitCount;
579                 firstByte = decoder.firstByte;
580 
581                 Debug.Assert(firstByte == false || decoder.bitCount <= 0,
582                     "[UTF7Encoding.GetChars]If remembered bits, then first byte flag shouldn't be set");
583             }
584 
585             // We may have had bits in the decoder that we couldn't output last time, so do so now
586             if (bitCount >= 16)
587             {
588                 // Check our decoder buffer
589                 if (!buffer.AddChar((char)((bits >> (bitCount - 16)) & 0xFFFF)))
590                     ThrowCharsOverflow(decoder, true);  // Always throw, they need at least 1 char even in Convert
591 
592                 // Used this one, clean up extra bits
593                 bitCount -= 16;
594             }
595 
596             // Loop through the input
597             while (buffer.MoreData)
598             {
599                 byte currentByte = buffer.GetNextByte();
600                 int c;
601 
602                 if (bitCount >= 0)
603                 {
604                     //
605                     // Modified base 64 encoding.
606                     //
607                     sbyte v;
608                     if (currentByte < 0x80 && ((v = _base64Values[currentByte]) >= 0))
609                     {
610                         firstByte = false;
611                         bits = (bits << 6) | ((byte)v);
612                         bitCount += 6;
613                         if (bitCount >= 16)
614                         {
615                             c = (bits >> (bitCount - 16)) & 0xFFFF;
616                             bitCount -= 16;
617                         }
618                         // If not enough bits just continue
619                         else continue;
620                     }
621                     else
622                     {
623                         // If it wasn't a base 64 byte, everything's going to turn off base 64 mode
624                         bitCount = -1;
625 
626                         if (currentByte != '-')
627                         {
628                             // >= 0x80 (because of 1st if statemtn)
629                             // We need this check since the _base64Values[b] check below need b <= 0x7f.
630                             // This is not a valid base 64 byte.  Terminate the shifted-sequence and
631                             // emit this byte.
632 
633                             // not in base 64 table
634                             // According to the RFC 1642 and the example code of UTF-7
635                             // in Unicode 2.0, we should just zero-extend the invalid UTF7 byte
636 
637                             // Chars won't be updated unless this works, try to fallback
638                             if (!buffer.Fallback(currentByte))
639                                 break;                                          // Stop here, didn't throw
640 
641                             // Used that byte, we're done with it
642                             continue;
643                         }
644 
645                         //
646                         // The encoding for '+' is "+-".
647                         //
648                         if (firstByte) c = '+';
649                         // We just turn it off if not emitting a +, so we're done.
650                         else continue;
651                     }
652                     //
653                     // End of modified base 64 encoding block.
654                     //
655                 }
656                 else if (currentByte == '+')
657                 {
658                     //
659                     // Found the start of a modified base 64 encoding block or a plus sign.
660                     //
661                     bitCount = 0;
662                     firstByte = true;
663                     continue;
664                 }
665                 else
666                 {
667                     // Normal character
668                     if (currentByte >= 0x80)
669                     {
670                         // Try to fallback
671                         if (!buffer.Fallback(currentByte))
672                             break;                                          // Stop here, didn't throw
673 
674                         // Done falling back
675                         continue;
676                     }
677 
678                     // Use the normal character
679                     c = currentByte;
680                 }
681 
682                 if (c >= 0)
683                 {
684                     // Check our buffer
685                     if (!buffer.AddChar((char)c))
686                     {
687                         // No room.  If it was a plain char we'll try again later.
688                         // Note, we'll consume this byte and stick it in decoder, even if we can't output it
689                         if (bitCount >= 0)                                  // Can we rememmber this byte (char)
690                         {
691                             buffer.AdjustBytes(+1);                         // Need to readd the byte that AddChar subtracted when it failed
692                             bitCount += 16;                                 // We'll still need that char we have in our bits
693                         }
694                         break;                                              // didn't throw, stop
695                     }
696                 }
697             }
698 
699             // Stick stuff in the decoder if we can (chars == null if counting, so don't store decoder)
700             if (chars != null && decoder != null)
701             {
702                 // MustFlush?  (Could've been cleared by ThrowCharsOverflow if Convert & didn't reach end of buffer)
703                 if (decoder.MustFlush)
704                 {
705                     // RFC doesn't specify what would happen if we have non-0 leftover bits, we just drop them
706                     decoder.bits = 0;
707                     decoder.bitCount = -1;
708                     decoder.firstByte = false;
709                 }
710                 else
711                 {
712                     decoder.bits = bits;
713                     decoder.bitCount = bitCount;
714                     decoder.firstByte = firstByte;
715                 }
716                 decoder._bytesUsed = buffer.BytesUsed;
717             }
718             // else ignore any hanging bits.
719 
720             // Return our count
721             return buffer.Count;
722         }
723 
724 
GetDecoder()725         public override System.Text.Decoder GetDecoder()
726         {
727             return new UTF7Encoding.Decoder(this);
728         }
729 
730 
GetEncoder()731         public override System.Text.Encoder GetEncoder()
732         {
733             return new UTF7Encoding.Encoder(this);
734         }
735 
736 
GetMaxByteCount(int charCount)737         public override int GetMaxByteCount(int charCount)
738         {
739             if (charCount < 0)
740                 throw new ArgumentOutOfRangeException(nameof(charCount),
741                      SR.ArgumentOutOfRange_NeedNonNegNum);
742 
743             // Suppose that every char can not be direct-encoded, we know that
744             // a byte can encode 6 bits of the Unicode character.  And we will
745             // also need two extra bytes for the shift-in ('+') and shift-out ('-') mark.
746             // Therefore, the max byte should be:
747             // byteCount = 2 + Math.Ceiling((double)charCount * 16 / 6);
748             // That is always <= 2 + 3 * charCount;
749             // Longest case is alternating encoded, direct, encoded data for 5 + 1 + 5... bytes per char.
750             // UTF7 doesn't have left over surrogates, but if no input we may need an output - to turn off
751             // encoding if MustFlush is true.
752 
753             // Its easiest to think of this as 2 bytes to turn on/off the base64 mode, then 3 bytes per char.
754             // 3 bytes is 18 bits of encoding, which is more than we need, but if its direct encoded then 3
755             // bytes allows us to turn off and then back on base64 mode if necessary.
756 
757             // Note that UTF7 encoded surrogates individually and isn't worried about mismatches, so all
758             // code points are encodable int UTF7.
759             long byteCount = (long)charCount * 3 + 2;
760 
761             // check for overflow
762             if (byteCount > 0x7fffffff)
763                 throw new ArgumentOutOfRangeException(nameof(charCount), SR.ArgumentOutOfRange_GetByteCountOverflow);
764 
765             return (int)byteCount;
766         }
767 
768 
GetMaxCharCount(int byteCount)769         public override int GetMaxCharCount(int byteCount)
770         {
771             if (byteCount < 0)
772                 throw new ArgumentOutOfRangeException(nameof(byteCount),
773                      SR.ArgumentOutOfRange_NeedNonNegNum);
774 
775             // Worst case is 1 char per byte.  Minimum 1 for left over bits in case decoder is being flushed
776             // Also note that we ignore extra bits (per spec), so UTF7 doesn't have unknown in this direction.
777             int charCount = byteCount;
778             if (charCount == 0) charCount = 1;
779 
780             return charCount;
781         }
782 
783         // Of all the amazing things... This MUST be Decoder so that our com name
784         // for System.Text.Decoder doesn't change
785         private sealed class Decoder : DecoderNLS
786         {
787             /*private*/
788             internal int bits;
789             /*private*/
790             internal int bitCount;
791             /*private*/
792             internal bool firstByte;
793 
Decoder(UTF7Encoding encoding)794             public Decoder(UTF7Encoding encoding) : base(encoding)
795             {
796                 // base calls reset
797             }
798 
Reset()799             public override void Reset()
800             {
801                 this.bits = 0;
802                 this.bitCount = -1;
803                 this.firstByte = false;
804                 if (_fallbackBuffer != null)
805                     _fallbackBuffer.Reset();
806             }
807 
808             // Anything left in our encoder?
809             internal override bool HasState
810             {
811                 get
812                 {
813                     // NOTE: This forces the last -, which some encoder might not encode.  If we
814                     // don't see it we don't think we're done reading.
815                     return (this.bitCount != -1);
816                 }
817             }
818         }
819 
820         // Of all the amazing things... This MUST be Encoder so that our com name
821         // for System.Text.Encoder doesn't change
822         private sealed class Encoder : EncoderNLS
823         {
824             /*private*/
825             internal int bits;
826             /*private*/
827             internal int bitCount;
828 
Encoder(UTF7Encoding encoding)829             public Encoder(UTF7Encoding encoding) : base(encoding)
830             {
831                 // base calls reset
832             }
833 
Reset()834             public override void Reset()
835             {
836                 this.bitCount = -1;
837                 this.bits = 0;
838                 if (_fallbackBuffer != null)
839                     _fallbackBuffer.Reset();
840             }
841 
842             // Anything left in our encoder?
843             internal override bool HasState
844             {
845                 get
846                 {
847                     return (this.bits != 0 || this.bitCount != -1);
848                 }
849             }
850         }
851 
852         // Preexisting UTF7 behavior for bad bytes was just to spit out the byte as the next char
853         // and turn off base64 mode if it was in that mode.  We still exit the mode, but now we fallback.
854         private sealed class DecoderUTF7Fallback : DecoderFallback
855         {
856             // Construction.  Default replacement fallback uses no best fit and ? replacement string
DecoderUTF7Fallback()857             public DecoderUTF7Fallback()
858             {
859             }
860 
CreateFallbackBuffer()861             public override DecoderFallbackBuffer CreateFallbackBuffer()
862             {
863                 return new DecoderUTF7FallbackBuffer(this);
864             }
865 
866             // Maximum number of characters that this instance of this fallback could return
867             public override int MaxCharCount
868             {
869                 get
870                 {
871                     // returns 1 char per bad byte
872                     return 1;
873                 }
874             }
875 
Equals(Object value)876             public override bool Equals(Object value)
877             {
878                 DecoderUTF7Fallback that = value as DecoderUTF7Fallback;
879                 if (that != null)
880                 {
881                     return true;
882                 }
883                 return (false);
884             }
885 
GetHashCode()886             public override int GetHashCode()
887             {
888                 return 984;
889             }
890         }
891 
892         private sealed class DecoderUTF7FallbackBuffer : DecoderFallbackBuffer
893         {
894             // Store our default string
895             private char cFallback = (char)0;
896             private int iCount = -1;
897             private int iSize;
898 
899             // Construction
DecoderUTF7FallbackBuffer(DecoderUTF7Fallback fallback)900             public DecoderUTF7FallbackBuffer(DecoderUTF7Fallback fallback)
901             {
902             }
903 
904             // Fallback Methods
Fallback(byte[] bytesUnknown, int index)905             public override bool Fallback(byte[] bytesUnknown, int index)
906             {
907                 // We expect no previous fallback in our buffer
908                 Debug.Assert(iCount < 0, "[DecoderUTF7FallbackBuffer.Fallback] Can't have recursive fallbacks");
909                 Debug.Assert(bytesUnknown.Length == 1, "[DecoderUTF7FallbackBuffer.Fallback] Only possible fallback case should be 1 unknown byte");
910 
911                 // Go ahead and get our fallback
912                 cFallback = (char)bytesUnknown[0];
913 
914                 // Any of the fallback characters can be handled except for 0
915                 if (cFallback == 0)
916                 {
917                     return false;
918                 }
919 
920                 iCount = iSize = 1;
921 
922                 return true;
923             }
924 
GetNextChar()925             public override char GetNextChar()
926             {
927                 if (iCount-- > 0)
928                     return cFallback;
929 
930                 // Note: this means that 0 in UTF7 stream will never be emitted.
931                 return (char)0;
932             }
933 
MovePrevious()934             public override bool MovePrevious()
935             {
936                 if (iCount >= 0)
937                 {
938                     iCount++;
939                 }
940 
941                 // return true if we were allowed to do this
942                 return (iCount >= 0 && iCount <= iSize);
943             }
944 
945             // Return # of chars left in this fallback
946             public override int Remaining
947             {
948                 get
949                 {
950                     return (iCount > 0) ? iCount : 0;
951                 }
952             }
953 
954             // Clear the buffer
Reset()955             public override unsafe void Reset()
956             {
957                 iCount = -1;
958                 byteStart = null;
959             }
960 
961             // This version just counts the fallback and doesn't actually copy anything.
InternalFallback(byte[] bytes, byte* pBytes)962             internal unsafe override int InternalFallback(byte[] bytes, byte* pBytes)
963             // Right now this has both bytes and bytes[], since we might have extra bytes, hence the
964             // array, and we might need the index, hence the byte*
965             {
966                 // We expect no previous fallback in our buffer
967                 Debug.Assert(iCount < 0, "[DecoderUTF7FallbackBuffer.InternalFallback] Can't have recursive fallbacks");
968                 if (bytes.Length != 1)
969                 {
970                     throw new ArgumentException(SR.Argument_InvalidCharSequenceNoIndex);
971                 }
972 
973                 // Can't fallback a byte 0, so return for that case, 1 otherwise.
974                 return bytes[0] == 0 ? 0 : 1;
975             }
976         }
977     }
978 }
979