1 // Licensed to the .NET Foundation under one or more agreements. 2 // The .NET Foundation licenses this file to you under the MIT license. 3 // See the LICENSE file in the project root for more information. 4 5 // 6 // Don't override IsAlwaysNormalized because it is just a Unicode Transformation and could be confused. 7 // 8 9 using System; 10 using System.Diagnostics; 11 using System.Runtime.InteropServices; 12 13 namespace System.Text 14 { 15 public class UTF7Encoding : Encoding 16 { 17 private const String base64Chars = 18 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; 19 // 0123456789111111111122222222223333333333444444444455555555556666 20 // 012345678901234567890123456789012345678901234567890123 21 22 // These are the characters that can be directly encoded in UTF7. 23 private const String directChars = 24 "\t\n\r '(),-./0123456789:?ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"; 25 26 // These are the characters that can be optionally directly encoded in UTF7. 27 private const String optionalChars = 28 "!\"#$%&*;<=>@[]^_`{|}"; 29 30 // Used by Encoding.UTF7 for lazy initialization 31 // The initialization code will not be run until a static member of the class is referenced 32 internal static readonly UTF7Encoding s_default = new UTF7Encoding(); 33 34 // The set of base 64 characters. 35 private byte[] _base64Bytes; 36 // The decoded bits for every base64 values. This array has a size of 128 elements. 37 // The index is the code point value of the base 64 characters. The value is -1 if 38 // the code point is not a valid base 64 character. Otherwise, the value is a value 39 // from 0 ~ 63. 40 private sbyte[] _base64Values; 41 // The array to decide if a Unicode code point below 0x80 can be directly encoded in UTF7. 42 // This array has a size of 128. 43 private bool[] _directEncode; 44 45 private bool _allowOptionals; 46 47 private const int UTF7_CODEPAGE = 65000; 48 49 UTF7Encoding()50 public UTF7Encoding() 51 : this(false) 52 { 53 } 54 UTF7Encoding(bool allowOptionals)55 public UTF7Encoding(bool allowOptionals) 56 : base(UTF7_CODEPAGE) //Set the data item. 57 { 58 // Allowing optionals? 59 _allowOptionals = allowOptionals; 60 61 // Make our tables 62 MakeTables(); 63 } 64 MakeTables()65 private void MakeTables() 66 { 67 // Build our tables 68 _base64Bytes = new byte[64]; 69 for (int i = 0; i < 64; i++) _base64Bytes[i] = (byte)base64Chars[i]; 70 _base64Values = new sbyte[128]; 71 for (int i = 0; i < 128; i++) _base64Values[i] = -1; 72 for (int i = 0; i < 64; i++) _base64Values[_base64Bytes[i]] = (sbyte)i; 73 _directEncode = new bool[128]; 74 int count = directChars.Length; 75 for (int i = 0; i < count; i++) 76 { 77 _directEncode[directChars[i]] = true; 78 } 79 80 if (_allowOptionals) 81 { 82 count = optionalChars.Length; 83 for (int i = 0; i < count; i++) 84 { 85 _directEncode[optionalChars[i]] = true; 86 } 87 } 88 } 89 90 // We go ahead and set this because Encoding expects it, however nothing can fall back in UTF7. SetDefaultFallbacks()91 internal override void SetDefaultFallbacks() 92 { 93 // UTF7 had an odd decoderFallback behavior, and the Encoder fallback 94 // is irrelevant because we encode surrogates individually and never check for unmatched ones 95 // (so nothing can fallback during encoding) 96 this.encoderFallback = new EncoderReplacementFallback(String.Empty); 97 this.decoderFallback = new DecoderUTF7Fallback(); 98 } 99 Equals(Object value)100 public override bool Equals(Object value) 101 { 102 UTF7Encoding that = value as UTF7Encoding; 103 if (that != null) 104 { 105 return (_allowOptionals == that._allowOptionals) && 106 (EncoderFallback.Equals(that.EncoderFallback)) && 107 (DecoderFallback.Equals(that.DecoderFallback)); 108 } 109 return (false); 110 } 111 112 // Compared to all the other encodings, variations of UTF7 are unlikely 113 GetHashCode()114 public override int GetHashCode() 115 { 116 return this.CodePage + this.EncoderFallback.GetHashCode() + this.DecoderFallback.GetHashCode(); 117 } 118 119 // The following methods are copied from EncodingNLS.cs. 120 // Unfortunately EncodingNLS.cs is internal and we're public, so we have to re-implement them here. 121 // These should be kept in sync for the following classes: 122 // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding 123 124 // Returns the number of bytes required to encode a range of characters in 125 // a character array. 126 // 127 // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS) 128 // So if you fix this, fix the others. Currently those include: 129 // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding 130 // parent method is safe 131 GetByteCount(char[] chars, int index, int count)132 public override unsafe int GetByteCount(char[] chars, int index, int count) 133 { 134 // Validate input parameters 135 if (chars == null) 136 throw new ArgumentNullException("chars", SR.ArgumentNull_Array); 137 138 if (index < 0 || count < 0) 139 throw new ArgumentOutOfRangeException((index < 0 ? "index" : "count"), SR.ArgumentOutOfRange_NeedNonNegNum); 140 141 if (chars.Length - index < count) 142 throw new ArgumentOutOfRangeException("chars", SR.ArgumentOutOfRange_IndexCountBuffer); 143 144 // If no input, return 0, avoid fixed empty array problem 145 if (count == 0) 146 return 0; 147 148 // Just call the pointer version 149 fixed (char* pChars = chars) 150 return GetByteCount(pChars + index, count, null); 151 } 152 153 // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS) 154 // So if you fix this, fix the others. Currently those include: 155 // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding 156 // parent method is safe 157 GetByteCount(string s)158 public override unsafe int GetByteCount(string s) 159 { 160 // Validate input 161 if (s==null) 162 throw new ArgumentNullException("s"); 163 164 fixed (char* pChars = s) 165 return GetByteCount(pChars, s.Length, null); 166 } 167 168 // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS) 169 // So if you fix this, fix the others. Currently those include: 170 // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding 171 172 [CLSCompliant(false)] GetByteCount(char* chars, int count)173 public override unsafe int GetByteCount(char* chars, int count) 174 { 175 // Validate Parameters 176 if (chars == null) 177 throw new ArgumentNullException("chars", SR.ArgumentNull_Array); 178 179 if (count < 0) 180 throw new ArgumentOutOfRangeException("count", SR.ArgumentOutOfRange_NeedNonNegNum); 181 182 // Call it with empty encoder 183 return GetByteCount(chars, count, null); 184 } 185 186 // Parent method is safe. 187 // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS) 188 // So if you fix this, fix the others. Currently those include: 189 // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding 190 GetBytes(string s, int charIndex, int charCount, byte[] bytes, int byteIndex)191 public override unsafe int GetBytes(string s, int charIndex, int charCount, 192 byte[] bytes, int byteIndex) 193 { 194 if (s == null || bytes == null) 195 throw new ArgumentNullException((s == null ? "s" : "bytes"), SR.ArgumentNull_Array); 196 197 if (charIndex < 0 || charCount < 0) 198 throw new ArgumentOutOfRangeException((charIndex < 0 ? "charIndex" : "charCount"), SR.ArgumentOutOfRange_NeedNonNegNum); 199 200 if (s.Length - charIndex < charCount) 201 throw new ArgumentOutOfRangeException("s", SR.ArgumentOutOfRange_IndexCount); 202 203 if (byteIndex < 0 || byteIndex > bytes.Length) 204 throw new ArgumentOutOfRangeException("byteIndex", SR.ArgumentOutOfRange_Index); 205 206 int byteCount = bytes.Length - byteIndex; 207 208 fixed (char* pChars = s) fixed (byte* pBytes = &MemoryMarshal.GetReference((Span<byte>)bytes)) 209 return GetBytes(pChars + charIndex, charCount, pBytes + byteIndex, byteCount, null); 210 } 211 212 // Encodes a range of characters in a character array into a range of bytes 213 // in a byte array. An exception occurs if the byte array is not large 214 // enough to hold the complete encoding of the characters. The 215 // GetByteCount method can be used to determine the exact number of 216 // bytes that will be produced for a given range of characters. 217 // Alternatively, the GetMaxByteCount method can be used to 218 // determine the maximum number of bytes that will be produced for a given 219 // number of characters, regardless of the actual character values. 220 // 221 // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS) 222 // So if you fix this, fix the others. Currently those include: 223 // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding 224 // parent method is safe 225 GetBytes(char[] chars, int charIndex, int charCount, byte[] bytes, int byteIndex)226 public override unsafe int GetBytes(char[] chars, int charIndex, int charCount, 227 byte[] bytes, int byteIndex) 228 { 229 // Validate parameters 230 if (chars == null || bytes == null) 231 throw new ArgumentNullException((chars == null ? "chars" : "bytes"), SR.ArgumentNull_Array); 232 233 if (charIndex < 0 || charCount < 0) 234 throw new ArgumentOutOfRangeException((charIndex < 0 ? "charIndex" : "charCount"), SR.ArgumentOutOfRange_NeedNonNegNum); 235 236 if (chars.Length - charIndex < charCount) 237 throw new ArgumentOutOfRangeException("chars", SR.ArgumentOutOfRange_IndexCountBuffer); 238 239 if (byteIndex < 0 || byteIndex > bytes.Length) 240 throw new ArgumentOutOfRangeException("byteIndex", SR.ArgumentOutOfRange_Index); 241 242 // If nothing to encode return 0, avoid fixed problem 243 if (charCount == 0) 244 return 0; 245 246 // Just call pointer version 247 int byteCount = bytes.Length - byteIndex; 248 249 fixed (char* pChars = chars) fixed (byte* pBytes = &MemoryMarshal.GetReference((Span<byte>)bytes)) 250 // Remember that byteCount is # to decode, not size of array. 251 return GetBytes(pChars + charIndex, charCount, pBytes + byteIndex, byteCount, null); 252 } 253 254 // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS) 255 // So if you fix this, fix the others. Currently those include: 256 // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding 257 258 [CLSCompliant(false)] GetBytes(char* chars, int charCount, byte* bytes, int byteCount)259 public override unsafe int GetBytes(char* chars, int charCount, byte* bytes, int byteCount) 260 { 261 // Validate Parameters 262 if (bytes == null || chars == null) 263 throw new ArgumentNullException(bytes == null ? "bytes" : "chars", SR.ArgumentNull_Array); 264 265 if (charCount < 0 || byteCount < 0) 266 throw new ArgumentOutOfRangeException((charCount < 0 ? "charCount" : "byteCount"), SR.ArgumentOutOfRange_NeedNonNegNum); 267 268 return GetBytes(chars, charCount, bytes, byteCount, null); 269 } 270 271 // Returns the number of characters produced by decoding a range of bytes 272 // in a byte array. 273 // 274 // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS) 275 // So if you fix this, fix the others. Currently those include: 276 // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding 277 // parent method is safe 278 GetCharCount(byte[] bytes, int index, int count)279 public override unsafe int GetCharCount(byte[] bytes, int index, int count) 280 { 281 // Validate Parameters 282 if (bytes == null) 283 throw new ArgumentNullException("bytes", SR.ArgumentNull_Array); 284 285 if (index < 0 || count < 0) 286 throw new ArgumentOutOfRangeException((index < 0 ? "index" : "count"), SR.ArgumentOutOfRange_NeedNonNegNum); 287 288 if (bytes.Length - index < count) 289 throw new ArgumentOutOfRangeException("bytes", SR.ArgumentOutOfRange_IndexCountBuffer); 290 291 // If no input just return 0, fixed doesn't like 0 length arrays. 292 if (count == 0) 293 return 0; 294 295 // Just call pointer version 296 fixed (byte* pBytes = bytes) 297 return GetCharCount(pBytes + index, count, null); 298 } 299 300 // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS) 301 // So if you fix this, fix the others. Currently those include: 302 // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding 303 304 [CLSCompliant(false)] GetCharCount(byte* bytes, int count)305 public override unsafe int GetCharCount(byte* bytes, int count) 306 { 307 // Validate Parameters 308 if (bytes == null) 309 throw new ArgumentNullException("bytes", SR.ArgumentNull_Array); 310 311 if (count < 0) 312 throw new ArgumentOutOfRangeException("count", SR.ArgumentOutOfRange_NeedNonNegNum); 313 314 return GetCharCount(bytes, count, null); 315 } 316 317 // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS) 318 // So if you fix this, fix the others. Currently those include: 319 // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding 320 // parent method is safe 321 GetChars(byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex)322 public override unsafe int GetChars(byte[] bytes, int byteIndex, int byteCount, 323 char[] chars, int charIndex) 324 { 325 // Validate Parameters 326 if (bytes == null || chars == null) 327 throw new ArgumentNullException(bytes == null ? "bytes" : "chars", SR.ArgumentNull_Array); 328 329 if (byteIndex < 0 || byteCount < 0) 330 throw new ArgumentOutOfRangeException((byteIndex < 0 ? "byteIndex" : "byteCount"), SR.ArgumentOutOfRange_NeedNonNegNum); 331 332 if ( bytes.Length - byteIndex < byteCount) 333 throw new ArgumentOutOfRangeException("bytes", SR.ArgumentOutOfRange_IndexCountBuffer); 334 335 if (charIndex < 0 || charIndex > chars.Length) 336 throw new ArgumentOutOfRangeException("charIndex", SR.ArgumentOutOfRange_Index); 337 338 // If no input, return 0 & avoid fixed problem 339 if (byteCount == 0) 340 return 0; 341 342 // Just call pointer version 343 int charCount = chars.Length - charIndex; 344 345 fixed (byte* pBytes = bytes) fixed (char* pChars = &MemoryMarshal.GetReference((Span<char>)chars)) 346 // Remember that charCount is # to decode, not size of array 347 return GetChars(pBytes + byteIndex, byteCount, pChars + charIndex, charCount, null); 348 } 349 350 // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS) 351 // So if you fix this, fix the others. Currently those include: 352 // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding 353 354 [CLSCompliant(false)] GetChars(byte* bytes, int byteCount, char* chars, int charCount)355 public unsafe override int GetChars(byte* bytes, int byteCount, char* chars, int charCount) 356 { 357 // Validate Parameters 358 if (bytes == null || chars == null) 359 throw new ArgumentNullException(bytes == null ? "bytes" : "chars", SR.ArgumentNull_Array); 360 361 if (charCount < 0 || byteCount < 0) 362 throw new ArgumentOutOfRangeException((charCount < 0 ? "charCount" : "byteCount"), SR.ArgumentOutOfRange_NeedNonNegNum); 363 364 return GetChars(bytes, byteCount, chars, charCount, null); 365 } 366 367 // Returns a string containing the decoded representation of a range of 368 // bytes in a byte array. 369 // 370 // All of our public Encodings that don't use EncodingNLS must have this (including EncodingNLS) 371 // So if you fix this, fix the others. Currently those include: 372 // EncodingNLS, UTF7Encoding, UTF8Encoding, UTF32Encoding, ASCIIEncoding, UnicodeEncoding 373 // parent method is safe 374 GetString(byte[] bytes, int index, int count)375 public override unsafe String GetString(byte[] bytes, int index, int count) 376 { 377 // Validate Parameters 378 if (bytes == null) 379 throw new ArgumentNullException("bytes", SR.ArgumentNull_Array); 380 381 if (index < 0 || count < 0) 382 throw new ArgumentOutOfRangeException((index < 0 ? "index" : "count"), SR.ArgumentOutOfRange_NeedNonNegNum); 383 384 if (bytes.Length - index < count) 385 throw new ArgumentOutOfRangeException("bytes", SR.ArgumentOutOfRange_IndexCountBuffer); 386 387 // Avoid problems with empty input buffer 388 if (count == 0) return String.Empty; 389 390 fixed (byte* pBytes = bytes) 391 return String.CreateStringFromEncoding( 392 pBytes + index, count, this); 393 } 394 395 // 396 // End of standard methods copied from EncodingNLS.cs 397 // 398 GetByteCount(char* chars, int count, EncoderNLS baseEncoder)399 internal override unsafe int GetByteCount(char* chars, int count, EncoderNLS baseEncoder) 400 { 401 Debug.Assert(chars != null, "[UTF7Encoding.GetByteCount]chars!=null"); 402 Debug.Assert(count >= 0, "[UTF7Encoding.GetByteCount]count >=0"); 403 404 // Just call GetBytes with bytes == null 405 return GetBytes(chars, count, null, 0, baseEncoder); 406 } 407 GetBytes(char* chars, int charCount, byte* bytes, int byteCount, EncoderNLS baseEncoder)408 internal override unsafe int GetBytes(char* chars, int charCount, 409 byte* bytes, int byteCount, EncoderNLS baseEncoder) 410 { 411 Debug.Assert(byteCount >= 0, "[UTF7Encoding.GetBytes]byteCount >=0"); 412 Debug.Assert(chars != null, "[UTF7Encoding.GetBytes]chars!=null"); 413 Debug.Assert(charCount >= 0, "[UTF7Encoding.GetBytes]charCount >=0"); 414 415 // Get encoder info 416 UTF7Encoding.Encoder encoder = (UTF7Encoding.Encoder)baseEncoder; 417 418 // Default bits & count 419 int bits = 0; 420 int bitCount = -1; 421 422 // prepare our helpers 423 Encoding.EncodingByteBuffer buffer = new Encoding.EncodingByteBuffer( 424 this, encoder, bytes, byteCount, chars, charCount); 425 426 if (encoder != null) 427 { 428 bits = encoder.bits; 429 bitCount = encoder.bitCount; 430 431 // May have had too many left over 432 while (bitCount >= 6) 433 { 434 bitCount -= 6; 435 // If we fail we'll never really have enough room 436 if (!buffer.AddByte(_base64Bytes[(bits >> bitCount) & 0x3F])) 437 ThrowBytesOverflow(encoder, buffer.Count == 0); 438 } 439 } 440 441 while (buffer.MoreData) 442 { 443 char currentChar = buffer.GetNextChar(); 444 445 if (currentChar < 0x80 && _directEncode[currentChar]) 446 { 447 if (bitCount >= 0) 448 { 449 if (bitCount > 0) 450 { 451 // Try to add the next byte 452 if (!buffer.AddByte(_base64Bytes[bits << 6 - bitCount & 0x3F])) 453 break; // Stop here, didn't throw 454 455 bitCount = 0; 456 } 457 458 // Need to get emit '-' and our char, 2 bytes total 459 if (!buffer.AddByte((byte)'-')) 460 break; // Stop here, didn't throw 461 462 bitCount = -1; 463 } 464 465 // Need to emit our char 466 if (!buffer.AddByte((byte)currentChar)) 467 break; // Stop here, didn't throw 468 } 469 else if (bitCount < 0 && currentChar == '+') 470 { 471 if (!buffer.AddByte((byte)'+', (byte)'-')) 472 break; // Stop here, didn't throw 473 } 474 else 475 { 476 if (bitCount < 0) 477 { 478 // Need to emit a + and 12 bits (3 bytes) 479 // Only 12 of the 16 bits will be emitted this time, the other 4 wait 'til next time 480 if (!buffer.AddByte((byte)'+')) 481 break; // Stop here, didn't throw 482 483 // We're now in bit mode, but haven't stored data yet 484 bitCount = 0; 485 } 486 487 // Add our bits 488 bits = bits << 16 | currentChar; 489 bitCount += 16; 490 491 while (bitCount >= 6) 492 { 493 bitCount -= 6; 494 if (!buffer.AddByte(_base64Bytes[(bits >> bitCount) & 0x3F])) 495 { 496 bitCount += 6; // We didn't use these bits 497 currentChar = buffer.GetNextChar(); // We're processing this char still, but AddByte 498 // --'d it when we ran out of space 499 break; // Stop here, not enough room for bytes 500 } 501 } 502 503 if (bitCount >= 6) 504 break; // Didn't have room to encode enough bits 505 } 506 } 507 508 // Now if we have bits left over we have to encode them. 509 // MustFlush may have been cleared by encoding.ThrowBytesOverflow earlier if converting 510 if (bitCount >= 0 && (encoder == null || encoder.MustFlush)) 511 { 512 // Do we have bits we have to stick in? 513 if (bitCount > 0) 514 { 515 if (buffer.AddByte(_base64Bytes[(bits << (6 - bitCount)) & 0x3F])) 516 { 517 // Emitted spare bits, 0 bits left 518 bitCount = 0; 519 } 520 } 521 522 // If converting and failed bitCount above, then we'll fail this too 523 if (buffer.AddByte((byte)'-')) 524 { 525 // turned off bit mode'; 526 bits = 0; 527 bitCount = -1; 528 } 529 else 530 // If not successful, convert will maintain state for next time, also 531 // AddByte will have decremented our char count, however we need it to remain the same 532 buffer.GetNextChar(); 533 } 534 535 // Do we have an encoder we're allowed to use? 536 // bytes == null if counting, so don't use encoder then 537 if (bytes != null && encoder != null) 538 { 539 // We already cleared bits & bitcount for mustflush case 540 encoder.bits = bits; 541 encoder.bitCount = bitCount; 542 encoder._charsUsed = buffer.CharsUsed; 543 } 544 545 return buffer.Count; 546 } 547 GetCharCount(byte* bytes, int count, DecoderNLS baseDecoder)548 internal override unsafe int GetCharCount(byte* bytes, int count, DecoderNLS baseDecoder) 549 { 550 Debug.Assert(count >= 0, "[UTF7Encoding.GetCharCount]count >=0"); 551 Debug.Assert(bytes != null, "[UTF7Encoding.GetCharCount]bytes!=null"); 552 553 // Just call GetChars with null char* to do counting 554 return GetChars(bytes, count, null, 0, baseDecoder); 555 } 556 GetChars(byte* bytes, int byteCount, char* chars, int charCount, DecoderNLS baseDecoder)557 internal override unsafe int GetChars(byte* bytes, int byteCount, 558 char* chars, int charCount, DecoderNLS baseDecoder) 559 { 560 Debug.Assert(byteCount >= 0, "[UTF7Encoding.GetChars]byteCount >=0"); 561 Debug.Assert(bytes != null, "[UTF7Encoding.GetChars]bytes!=null"); 562 Debug.Assert(charCount >= 0, "[UTF7Encoding.GetChars]charCount >=0"); 563 564 // Might use a decoder 565 UTF7Encoding.Decoder decoder = (UTF7Encoding.Decoder)baseDecoder; 566 567 // Get our output buffer info. 568 Encoding.EncodingCharBuffer buffer = new Encoding.EncodingCharBuffer( 569 this, decoder, chars, charCount, bytes, byteCount); 570 571 // Get decoder info 572 int bits = 0; 573 int bitCount = -1; 574 bool firstByte = false; 575 if (decoder != null) 576 { 577 bits = decoder.bits; 578 bitCount = decoder.bitCount; 579 firstByte = decoder.firstByte; 580 581 Debug.Assert(firstByte == false || decoder.bitCount <= 0, 582 "[UTF7Encoding.GetChars]If remembered bits, then first byte flag shouldn't be set"); 583 } 584 585 // We may have had bits in the decoder that we couldn't output last time, so do so now 586 if (bitCount >= 16) 587 { 588 // Check our decoder buffer 589 if (!buffer.AddChar((char)((bits >> (bitCount - 16)) & 0xFFFF))) 590 ThrowCharsOverflow(decoder, true); // Always throw, they need at least 1 char even in Convert 591 592 // Used this one, clean up extra bits 593 bitCount -= 16; 594 } 595 596 // Loop through the input 597 while (buffer.MoreData) 598 { 599 byte currentByte = buffer.GetNextByte(); 600 int c; 601 602 if (bitCount >= 0) 603 { 604 // 605 // Modified base 64 encoding. 606 // 607 sbyte v; 608 if (currentByte < 0x80 && ((v = _base64Values[currentByte]) >= 0)) 609 { 610 firstByte = false; 611 bits = (bits << 6) | ((byte)v); 612 bitCount += 6; 613 if (bitCount >= 16) 614 { 615 c = (bits >> (bitCount - 16)) & 0xFFFF; 616 bitCount -= 16; 617 } 618 // If not enough bits just continue 619 else continue; 620 } 621 else 622 { 623 // If it wasn't a base 64 byte, everything's going to turn off base 64 mode 624 bitCount = -1; 625 626 if (currentByte != '-') 627 { 628 // >= 0x80 (because of 1st if statemtn) 629 // We need this check since the _base64Values[b] check below need b <= 0x7f. 630 // This is not a valid base 64 byte. Terminate the shifted-sequence and 631 // emit this byte. 632 633 // not in base 64 table 634 // According to the RFC 1642 and the example code of UTF-7 635 // in Unicode 2.0, we should just zero-extend the invalid UTF7 byte 636 637 // Chars won't be updated unless this works, try to fallback 638 if (!buffer.Fallback(currentByte)) 639 break; // Stop here, didn't throw 640 641 // Used that byte, we're done with it 642 continue; 643 } 644 645 // 646 // The encoding for '+' is "+-". 647 // 648 if (firstByte) c = '+'; 649 // We just turn it off if not emitting a +, so we're done. 650 else continue; 651 } 652 // 653 // End of modified base 64 encoding block. 654 // 655 } 656 else if (currentByte == '+') 657 { 658 // 659 // Found the start of a modified base 64 encoding block or a plus sign. 660 // 661 bitCount = 0; 662 firstByte = true; 663 continue; 664 } 665 else 666 { 667 // Normal character 668 if (currentByte >= 0x80) 669 { 670 // Try to fallback 671 if (!buffer.Fallback(currentByte)) 672 break; // Stop here, didn't throw 673 674 // Done falling back 675 continue; 676 } 677 678 // Use the normal character 679 c = currentByte; 680 } 681 682 if (c >= 0) 683 { 684 // Check our buffer 685 if (!buffer.AddChar((char)c)) 686 { 687 // No room. If it was a plain char we'll try again later. 688 // Note, we'll consume this byte and stick it in decoder, even if we can't output it 689 if (bitCount >= 0) // Can we rememmber this byte (char) 690 { 691 buffer.AdjustBytes(+1); // Need to readd the byte that AddChar subtracted when it failed 692 bitCount += 16; // We'll still need that char we have in our bits 693 } 694 break; // didn't throw, stop 695 } 696 } 697 } 698 699 // Stick stuff in the decoder if we can (chars == null if counting, so don't store decoder) 700 if (chars != null && decoder != null) 701 { 702 // MustFlush? (Could've been cleared by ThrowCharsOverflow if Convert & didn't reach end of buffer) 703 if (decoder.MustFlush) 704 { 705 // RFC doesn't specify what would happen if we have non-0 leftover bits, we just drop them 706 decoder.bits = 0; 707 decoder.bitCount = -1; 708 decoder.firstByte = false; 709 } 710 else 711 { 712 decoder.bits = bits; 713 decoder.bitCount = bitCount; 714 decoder.firstByte = firstByte; 715 } 716 decoder._bytesUsed = buffer.BytesUsed; 717 } 718 // else ignore any hanging bits. 719 720 // Return our count 721 return buffer.Count; 722 } 723 724 GetDecoder()725 public override System.Text.Decoder GetDecoder() 726 { 727 return new UTF7Encoding.Decoder(this); 728 } 729 730 GetEncoder()731 public override System.Text.Encoder GetEncoder() 732 { 733 return new UTF7Encoding.Encoder(this); 734 } 735 736 GetMaxByteCount(int charCount)737 public override int GetMaxByteCount(int charCount) 738 { 739 if (charCount < 0) 740 throw new ArgumentOutOfRangeException(nameof(charCount), 741 SR.ArgumentOutOfRange_NeedNonNegNum); 742 743 // Suppose that every char can not be direct-encoded, we know that 744 // a byte can encode 6 bits of the Unicode character. And we will 745 // also need two extra bytes for the shift-in ('+') and shift-out ('-') mark. 746 // Therefore, the max byte should be: 747 // byteCount = 2 + Math.Ceiling((double)charCount * 16 / 6); 748 // That is always <= 2 + 3 * charCount; 749 // Longest case is alternating encoded, direct, encoded data for 5 + 1 + 5... bytes per char. 750 // UTF7 doesn't have left over surrogates, but if no input we may need an output - to turn off 751 // encoding if MustFlush is true. 752 753 // Its easiest to think of this as 2 bytes to turn on/off the base64 mode, then 3 bytes per char. 754 // 3 bytes is 18 bits of encoding, which is more than we need, but if its direct encoded then 3 755 // bytes allows us to turn off and then back on base64 mode if necessary. 756 757 // Note that UTF7 encoded surrogates individually and isn't worried about mismatches, so all 758 // code points are encodable int UTF7. 759 long byteCount = (long)charCount * 3 + 2; 760 761 // check for overflow 762 if (byteCount > 0x7fffffff) 763 throw new ArgumentOutOfRangeException(nameof(charCount), SR.ArgumentOutOfRange_GetByteCountOverflow); 764 765 return (int)byteCount; 766 } 767 768 GetMaxCharCount(int byteCount)769 public override int GetMaxCharCount(int byteCount) 770 { 771 if (byteCount < 0) 772 throw new ArgumentOutOfRangeException(nameof(byteCount), 773 SR.ArgumentOutOfRange_NeedNonNegNum); 774 775 // Worst case is 1 char per byte. Minimum 1 for left over bits in case decoder is being flushed 776 // Also note that we ignore extra bits (per spec), so UTF7 doesn't have unknown in this direction. 777 int charCount = byteCount; 778 if (charCount == 0) charCount = 1; 779 780 return charCount; 781 } 782 783 // Of all the amazing things... This MUST be Decoder so that our com name 784 // for System.Text.Decoder doesn't change 785 private sealed class Decoder : DecoderNLS 786 { 787 /*private*/ 788 internal int bits; 789 /*private*/ 790 internal int bitCount; 791 /*private*/ 792 internal bool firstByte; 793 Decoder(UTF7Encoding encoding)794 public Decoder(UTF7Encoding encoding) : base(encoding) 795 { 796 // base calls reset 797 } 798 Reset()799 public override void Reset() 800 { 801 this.bits = 0; 802 this.bitCount = -1; 803 this.firstByte = false; 804 if (_fallbackBuffer != null) 805 _fallbackBuffer.Reset(); 806 } 807 808 // Anything left in our encoder? 809 internal override bool HasState 810 { 811 get 812 { 813 // NOTE: This forces the last -, which some encoder might not encode. If we 814 // don't see it we don't think we're done reading. 815 return (this.bitCount != -1); 816 } 817 } 818 } 819 820 // Of all the amazing things... This MUST be Encoder so that our com name 821 // for System.Text.Encoder doesn't change 822 private sealed class Encoder : EncoderNLS 823 { 824 /*private*/ 825 internal int bits; 826 /*private*/ 827 internal int bitCount; 828 Encoder(UTF7Encoding encoding)829 public Encoder(UTF7Encoding encoding) : base(encoding) 830 { 831 // base calls reset 832 } 833 Reset()834 public override void Reset() 835 { 836 this.bitCount = -1; 837 this.bits = 0; 838 if (_fallbackBuffer != null) 839 _fallbackBuffer.Reset(); 840 } 841 842 // Anything left in our encoder? 843 internal override bool HasState 844 { 845 get 846 { 847 return (this.bits != 0 || this.bitCount != -1); 848 } 849 } 850 } 851 852 // Preexisting UTF7 behavior for bad bytes was just to spit out the byte as the next char 853 // and turn off base64 mode if it was in that mode. We still exit the mode, but now we fallback. 854 private sealed class DecoderUTF7Fallback : DecoderFallback 855 { 856 // Construction. Default replacement fallback uses no best fit and ? replacement string DecoderUTF7Fallback()857 public DecoderUTF7Fallback() 858 { 859 } 860 CreateFallbackBuffer()861 public override DecoderFallbackBuffer CreateFallbackBuffer() 862 { 863 return new DecoderUTF7FallbackBuffer(this); 864 } 865 866 // Maximum number of characters that this instance of this fallback could return 867 public override int MaxCharCount 868 { 869 get 870 { 871 // returns 1 char per bad byte 872 return 1; 873 } 874 } 875 Equals(Object value)876 public override bool Equals(Object value) 877 { 878 DecoderUTF7Fallback that = value as DecoderUTF7Fallback; 879 if (that != null) 880 { 881 return true; 882 } 883 return (false); 884 } 885 GetHashCode()886 public override int GetHashCode() 887 { 888 return 984; 889 } 890 } 891 892 private sealed class DecoderUTF7FallbackBuffer : DecoderFallbackBuffer 893 { 894 // Store our default string 895 private char cFallback = (char)0; 896 private int iCount = -1; 897 private int iSize; 898 899 // Construction DecoderUTF7FallbackBuffer(DecoderUTF7Fallback fallback)900 public DecoderUTF7FallbackBuffer(DecoderUTF7Fallback fallback) 901 { 902 } 903 904 // Fallback Methods Fallback(byte[] bytesUnknown, int index)905 public override bool Fallback(byte[] bytesUnknown, int index) 906 { 907 // We expect no previous fallback in our buffer 908 Debug.Assert(iCount < 0, "[DecoderUTF7FallbackBuffer.Fallback] Can't have recursive fallbacks"); 909 Debug.Assert(bytesUnknown.Length == 1, "[DecoderUTF7FallbackBuffer.Fallback] Only possible fallback case should be 1 unknown byte"); 910 911 // Go ahead and get our fallback 912 cFallback = (char)bytesUnknown[0]; 913 914 // Any of the fallback characters can be handled except for 0 915 if (cFallback == 0) 916 { 917 return false; 918 } 919 920 iCount = iSize = 1; 921 922 return true; 923 } 924 GetNextChar()925 public override char GetNextChar() 926 { 927 if (iCount-- > 0) 928 return cFallback; 929 930 // Note: this means that 0 in UTF7 stream will never be emitted. 931 return (char)0; 932 } 933 MovePrevious()934 public override bool MovePrevious() 935 { 936 if (iCount >= 0) 937 { 938 iCount++; 939 } 940 941 // return true if we were allowed to do this 942 return (iCount >= 0 && iCount <= iSize); 943 } 944 945 // Return # of chars left in this fallback 946 public override int Remaining 947 { 948 get 949 { 950 return (iCount > 0) ? iCount : 0; 951 } 952 } 953 954 // Clear the buffer Reset()955 public override unsafe void Reset() 956 { 957 iCount = -1; 958 byteStart = null; 959 } 960 961 // This version just counts the fallback and doesn't actually copy anything. InternalFallback(byte[] bytes, byte* pBytes)962 internal unsafe override int InternalFallback(byte[] bytes, byte* pBytes) 963 // Right now this has both bytes and bytes[], since we might have extra bytes, hence the 964 // array, and we might need the index, hence the byte* 965 { 966 // We expect no previous fallback in our buffer 967 Debug.Assert(iCount < 0, "[DecoderUTF7FallbackBuffer.InternalFallback] Can't have recursive fallbacks"); 968 if (bytes.Length != 1) 969 { 970 throw new ArgumentException(SR.Argument_InvalidCharSequenceNoIndex); 971 } 972 973 // Can't fallback a byte 0, so return for that case, 1 otherwise. 974 return bytes[0] == 0 ? 0 : 1; 975 } 976 } 977 } 978 } 979