1 // Licensed to the .NET Foundation under one or more agreements. 2 // The .NET Foundation licenses this file to you under the MIT license. 3 // See the LICENSE file in the project root for more information. 4 5 using System.Diagnostics; 6 using System.Runtime.CompilerServices; 7 using System.Text.Unicode; 8 9 namespace System.Text.Internal 10 { 11 internal readonly struct AllowedCharactersBitmap 12 { 13 private const int ALLOWED_CHARS_BITMAP_LENGTH = 0x10000 / (8 * sizeof(uint)); 14 private readonly uint[] _allowedCharacters; 15 16 // should be called in place of the default ctor CreateNewSystem.Text.Internal.AllowedCharactersBitmap17 public static AllowedCharactersBitmap CreateNew() 18 { 19 return new AllowedCharactersBitmap(new uint[ALLOWED_CHARS_BITMAP_LENGTH]); 20 } 21 AllowedCharactersBitmapSystem.Text.Internal.AllowedCharactersBitmap22 private AllowedCharactersBitmap(uint[] allowedCharacters) 23 { 24 if(allowedCharacters == null) 25 { 26 throw new ArgumentNullException(nameof(allowedCharacters)); 27 } 28 _allowedCharacters = allowedCharacters; 29 } 30 31 // Marks a character as allowed (can be returned unencoded) AllowCharacterSystem.Text.Internal.AllowedCharactersBitmap32 public void AllowCharacter(char character) 33 { 34 int codePoint = character; 35 int index = codePoint >> 5; 36 int offset = codePoint & 0x1F; 37 _allowedCharacters[index] |= 0x1U << offset; 38 } 39 40 // Marks a character as forbidden (must be returned encoded) ForbidCharacterSystem.Text.Internal.AllowedCharactersBitmap41 public void ForbidCharacter(char character) 42 { 43 int codePoint = character; 44 int index = codePoint >> 5; 45 int offset = codePoint & 0x1F; 46 _allowedCharacters[index] &= ~(0x1U << offset); 47 } 48 49 // Forbid codepoints which aren't mapped to characters or which are otherwise always disallowed 50 // (includes categories Cc, Cs, Co, Cn, Zs [except U+0020 SPACE], Zl, Zp) ForbidUndefinedCharactersSystem.Text.Internal.AllowedCharactersBitmap51 public void ForbidUndefinedCharacters() 52 { 53 uint[] definedCharactersBitmap = UnicodeHelpers.GetDefinedCharacterBitmap(); 54 Debug.Assert(definedCharactersBitmap.Length == _allowedCharacters.Length); 55 for (int i = 0; i < _allowedCharacters.Length; i++) 56 { 57 _allowedCharacters[i] &= definedCharactersBitmap[i]; 58 } 59 } 60 61 // Marks all characters as forbidden (must be returned encoded) ClearSystem.Text.Internal.AllowedCharactersBitmap62 public void Clear() 63 { 64 Array.Clear(_allowedCharacters, 0, _allowedCharacters.Length); 65 } 66 67 // Creates a deep copy of this bitmap CloneSystem.Text.Internal.AllowedCharactersBitmap68 public AllowedCharactersBitmap Clone() 69 { 70 return new AllowedCharactersBitmap((uint[])_allowedCharacters.Clone()); 71 } 72 73 // Determines whether the given character can be returned unencoded. IsCharacterAllowedSystem.Text.Internal.AllowedCharactersBitmap74 public bool IsCharacterAllowed(char character) 75 { 76 int codePoint = character; 77 int index = codePoint >> 5; 78 int offset = codePoint & 0x1F; 79 return ((_allowedCharacters[index] >> offset) & 0x1U) != 0; 80 } 81 82 // Determines whether the given character can be returned unencoded. 83 [MethodImpl(MethodImplOptions.AggressiveInlining)] IsUnicodeScalarAllowedSystem.Text.Internal.AllowedCharactersBitmap84 public bool IsUnicodeScalarAllowed(int unicodeScalar) 85 { 86 int index = unicodeScalar >> 5; 87 int offset = unicodeScalar & 0x1F; 88 return ((_allowedCharacters[index] >> offset) & 0x1U) != 0; 89 } 90 91 [MethodImpl(MethodImplOptions.AggressiveInlining)] FindFirstCharacterToEncodeSystem.Text.Internal.AllowedCharactersBitmap92 public unsafe int FindFirstCharacterToEncode(char* text, int textLength) 93 { 94 for (int i = 0; i < textLength; i++) 95 { 96 if (!IsCharacterAllowed(text[i])) { return i; } 97 } 98 return -1; 99 } 100 } 101 } 102