1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
4 
5 using System.Diagnostics;
6 using System.Runtime.CompilerServices;
7 using System.Text.Unicode;
8 
9 namespace System.Text.Internal
10 {
11     internal readonly struct AllowedCharactersBitmap
12     {
13         private const int ALLOWED_CHARS_BITMAP_LENGTH = 0x10000 / (8 * sizeof(uint));
14         private readonly uint[] _allowedCharacters;
15 
16         // should be called in place of the default ctor
CreateNewSystem.Text.Internal.AllowedCharactersBitmap17         public static AllowedCharactersBitmap CreateNew()
18         {
19             return new AllowedCharactersBitmap(new uint[ALLOWED_CHARS_BITMAP_LENGTH]);
20         }
21 
AllowedCharactersBitmapSystem.Text.Internal.AllowedCharactersBitmap22         private AllowedCharactersBitmap(uint[] allowedCharacters)
23         {
24             if(allowedCharacters == null)
25             {
26                 throw new ArgumentNullException(nameof(allowedCharacters));
27             }
28             _allowedCharacters = allowedCharacters;
29         }
30 
31         // Marks a character as allowed (can be returned unencoded)
AllowCharacterSystem.Text.Internal.AllowedCharactersBitmap32         public void AllowCharacter(char character)
33         {
34             int codePoint = character;
35             int index = codePoint >> 5;
36             int offset = codePoint & 0x1F;
37             _allowedCharacters[index] |= 0x1U << offset;
38         }
39 
40         // Marks a character as forbidden (must be returned encoded)
ForbidCharacterSystem.Text.Internal.AllowedCharactersBitmap41         public void ForbidCharacter(char character)
42         {
43             int codePoint = character;
44             int index = codePoint >> 5;
45             int offset = codePoint & 0x1F;
46             _allowedCharacters[index] &= ~(0x1U << offset);
47         }
48 
49         // Forbid codepoints which aren't mapped to characters or which are otherwise always disallowed
50         // (includes categories Cc, Cs, Co, Cn, Zs [except U+0020 SPACE], Zl, Zp)
ForbidUndefinedCharactersSystem.Text.Internal.AllowedCharactersBitmap51         public void ForbidUndefinedCharacters()
52         {
53             uint[] definedCharactersBitmap = UnicodeHelpers.GetDefinedCharacterBitmap();
54             Debug.Assert(definedCharactersBitmap.Length == _allowedCharacters.Length);
55             for (int i = 0; i < _allowedCharacters.Length; i++)
56             {
57                 _allowedCharacters[i] &= definedCharactersBitmap[i];
58             }
59         }
60 
61         // Marks all characters as forbidden (must be returned encoded)
ClearSystem.Text.Internal.AllowedCharactersBitmap62         public void Clear()
63         {
64             Array.Clear(_allowedCharacters, 0, _allowedCharacters.Length);
65         }
66 
67         // Creates a deep copy of this bitmap
CloneSystem.Text.Internal.AllowedCharactersBitmap68         public AllowedCharactersBitmap Clone()
69         {
70             return new AllowedCharactersBitmap((uint[])_allowedCharacters.Clone());
71         }
72 
73         // Determines whether the given character can be returned unencoded.
IsCharacterAllowedSystem.Text.Internal.AllowedCharactersBitmap74         public bool IsCharacterAllowed(char character)
75         {
76             int codePoint = character;
77             int index = codePoint >> 5;
78             int offset = codePoint & 0x1F;
79             return ((_allowedCharacters[index] >> offset) & 0x1U) != 0;
80         }
81 
82         // Determines whether the given character can be returned unencoded.
83         [MethodImpl(MethodImplOptions.AggressiveInlining)]
IsUnicodeScalarAllowedSystem.Text.Internal.AllowedCharactersBitmap84         public bool IsUnicodeScalarAllowed(int unicodeScalar)
85         {
86             int index = unicodeScalar >> 5;
87             int offset = unicodeScalar & 0x1F;
88             return ((_allowedCharacters[index] >> offset) & 0x1U) != 0;
89         }
90 
91         [MethodImpl(MethodImplOptions.AggressiveInlining)]
FindFirstCharacterToEncodeSystem.Text.Internal.AllowedCharactersBitmap92         public unsafe int FindFirstCharacterToEncode(char* text, int textLength)
93         {
94             for (int i = 0; i < textLength; i++)
95             {
96                 if (!IsCharacterAllowed(text[i])) { return i; }
97             }
98             return -1;
99         }
100     }
101 }
102