1 // ==++==
2 //
3 //   Copyright (c) Microsoft Corporation.  All rights reserved.
4 //
5 // ==--==
6 namespace System.Text
7 {
8     using System;
9     using System.Security;
10     using System.Globalization;
11     using System.Text;
12     using System.Runtime.CompilerServices;
13     using System.Runtime.InteropServices;
14     using System.Runtime.Versioning;
15     using System.Diagnostics.Contracts;
16 
17     // This is the enumeration for Normalization Forms
18 [System.Runtime.InteropServices.ComVisible(true)]
19     public enum NormalizationForm
20     {
21 #if !FEATURE_NORM_IDNA_ONLY
22         FormC    = 1,
23         FormD    = 2,
24         FormKC   = 5,
25         FormKD   = 6
26 #endif // !FEATURE_NORM_IDNA_ONLY
27     }
28 
29     internal enum ExtendedNormalizationForms
30     {
31 #if !FEATURE_NORM_IDNA_ONLY
32         FormC    = 1,
33         FormD    = 2,
34         FormKC   = 5,
35         FormKD   = 6,
36 #endif // !FEATURE_NORM_IDNA_ONLY
37         FormIdna = 0xd,
38 #if !FEATURE_NORM_IDNA_ONLY
39         FormCDisallowUnassigned     = 0x101,
40         FormDDisallowUnassigned     = 0x102,
41         FormKCDisallowUnassigned    = 0x105,
42         FormKDDisallowUnassigned    = 0x106,
43 #endif // !FEATURE_NORM_IDNA_ONLY
44         FormIdnaDisallowUnassigned  = 0x10d
45     }
46 
47     // This internal class wraps up our normalization behavior
48 
49     internal class Normalization
50     {
51         //
52         // Flags that track whether given normalization form was initialized
53         //
54 #if !FEATURE_NORM_IDNA_ONLY
55         private static volatile bool NFC;
56         private static volatile bool NFD;
57         private static volatile bool NFKC;
58         private static volatile bool NFKD;
59 #endif // !FEATURE_NORM_IDNA_ONLY
60         private static volatile bool IDNA;
61 #if !FEATURE_NORM_IDNA_ONLY
62         private static volatile bool NFCDisallowUnassigned;
63         private static volatile bool NFDDisallowUnassigned;
64         private static volatile bool NFKCDisallowUnassigned;
65         private static volatile bool NFKDDisallowUnassigned;
66 #endif // !FEATURE_NORM_IDNA_ONLY
67         private static volatile bool IDNADisallowUnassigned;
68         private static volatile bool Other;
69 
70         // These are error codes we get back from the Normalization DLL
71         private const int ERROR_SUCCESS = 0;
72         private const int ERROR_NOT_ENOUGH_MEMORY = 8;
73         private const int ERROR_INVALID_PARAMETER = 87;
74         private const int ERROR_INSUFFICIENT_BUFFER = 122;
75         private const int ERROR_NO_UNICODE_TRANSLATION = 1113;
76 
77         [System.Security.SecurityCritical]  // auto-generated
78         [ResourceExposure(ResourceScope.None)]
79         [ResourceConsumption(ResourceScope.Process, ResourceScope.Process)]
InitializeForm(NormalizationForm form, String strDataFile)80         static private unsafe void InitializeForm(NormalizationForm form, String strDataFile)
81         {
82             byte* pTables = null;
83 
84             // Normalization uses OS on Win8
85             if (!Environment.IsWindows8OrAbove)
86             {
87                 if (strDataFile == null)
88                 {
89                     // They were supposed to have a form that we know about!
90                     throw new ArgumentException(
91                         Environment.GetResourceString("Argument_InvalidNormalizationForm"));
92                 }
93 
94                 // Tell the DLL where to find our data
95                 pTables = GlobalizationAssembly.GetGlobalizationResourceBytePtr(
96                    typeof(Normalization).Assembly, strDataFile);
97                 if (pTables == null)
98                 {
99                     // Unable to load the specified normalizationForm,
100                     // tables not loaded from file
101                     throw new ArgumentException(
102                         Environment.GetResourceString("Argument_InvalidNormalizationForm"));
103                 }
104             }
105 
106             nativeNormalizationInitNormalization(form, pTables);
107         }
108 
109         [System.Security.SecurityCritical]  // auto-generated
EnsureInitialized(NormalizationForm form)110         static private void EnsureInitialized(NormalizationForm form)
111         {
112             switch ((ExtendedNormalizationForms)form)
113             {
114 #if !FEATURE_NORM_IDNA_ONLY
115                 case ExtendedNormalizationForms.FormC:
116                     if (NFC) return;
117                     InitializeForm(form, "normnfc.nlp");
118                     NFC = true;
119                     break;
120 
121                 case ExtendedNormalizationForms.FormD:
122                     if (NFD) return;
123                     InitializeForm(form, "normnfd.nlp");
124                     NFD = true;
125                     break;
126 
127                 case ExtendedNormalizationForms.FormKC:
128                     if (NFKC) return;
129                     InitializeForm(form, "normnfkc.nlp");
130                     NFKC = true;
131                     break;
132 
133                 case ExtendedNormalizationForms.FormKD:
134                     if (NFKD) return;
135                     InitializeForm(form, "normnfkd.nlp");
136                     NFKD = true;
137                     break;
138 #endif // !FEATURE_NORM_IDNA_ONLY
139 
140                 case ExtendedNormalizationForms.FormIdna:
141                     if (IDNA) return;
142                     InitializeForm(form, "normidna.nlp");
143                     IDNA = true;
144                     break;
145 
146 #if !FEATURE_NORM_IDNA_ONLY
147                 case ExtendedNormalizationForms.FormCDisallowUnassigned:
148                     if (NFCDisallowUnassigned) return;
149                     InitializeForm(form, "normnfc.nlp");
150                     NFCDisallowUnassigned = true;
151                     break;
152 
153                 case ExtendedNormalizationForms.FormDDisallowUnassigned:
154                     if (NFDDisallowUnassigned) return;
155                     InitializeForm(form, "normnfd.nlp");
156                     NFDDisallowUnassigned = true;
157                     break;
158 
159                 case ExtendedNormalizationForms.FormKCDisallowUnassigned:
160                     if (NFKCDisallowUnassigned) return;
161                     InitializeForm(form, "normnfkc.nlp");
162                     NFKCDisallowUnassigned = true;
163                     break;
164 
165                 case ExtendedNormalizationForms.FormKDDisallowUnassigned:
166                     if (NFKDDisallowUnassigned) return;
167                     InitializeForm(form, "normnfkd.nlp");
168                     NFKDDisallowUnassigned = true;
169                     break;
170 #endif // !FEATURE_NORM_IDNA_ONLY
171 
172                 case ExtendedNormalizationForms.FormIdnaDisallowUnassigned:
173                     if (IDNADisallowUnassigned) return;
174                     InitializeForm(form, "normidna.nlp");
175                     IDNADisallowUnassigned = true;
176                     break;
177 
178                 default:
179                     if (Other) return;
180                     InitializeForm(form, null);
181                     Other = true;
182                     break;
183             }
184         }
185 
186         [System.Security.SecurityCritical]
IsNormalized(String strInput, NormalizationForm normForm)187         internal static bool IsNormalized(String strInput, NormalizationForm normForm)
188         {
189             Contract.Requires(strInput != null);
190 
191             EnsureInitialized(normForm);
192 
193             int iError = ERROR_SUCCESS;
194             bool result = nativeNormalizationIsNormalizedString(
195                                 normForm,
196                                 ref iError,
197                                 strInput,
198                                 strInput.Length);
199 
200             switch(iError)
201             {
202                 // Success doesn't need to do anything
203                 case ERROR_SUCCESS:
204                     break;
205 
206                 // Do appropriate stuff for the individual errors:
207                 case ERROR_INVALID_PARAMETER:
208                 case ERROR_NO_UNICODE_TRANSLATION:
209                     throw new ArgumentException(
210                         Environment.GetResourceString("Argument_InvalidCharSequenceNoIndex" ),
211                         "strInput");
212                 case ERROR_NOT_ENOUGH_MEMORY:
213                     throw new OutOfMemoryException(
214                         Environment.GetResourceString("Arg_OutOfMemoryException"));
215                 default:
216                     throw new InvalidOperationException(
217                         Environment.GetResourceString("UnknownError_Num", iError));
218             }
219 
220             return result;
221         }
222 
223         [System.Security.SecurityCritical]
Normalize(String strInput, NormalizationForm normForm)224         internal static String Normalize(String strInput, NormalizationForm normForm)
225         {
226             Contract.Requires(strInput != null);
227 
228             EnsureInitialized(normForm);
229 
230             int iError = ERROR_SUCCESS;
231 
232             // Guess our buffer size first
233             int iLength = nativeNormalizationNormalizeString(normForm, ref iError, strInput, strInput.Length, null, 0);
234 
235             // Could have an error (actually it'd be quite hard to have an error here)
236             if (iError != ERROR_SUCCESS)
237             {
238                 if (iError == ERROR_INVALID_PARAMETER)
239                     throw new ArgumentException(
240                         Environment.GetResourceString("Argument_InvalidCharSequenceNoIndex" ),
241                         "strInput");
242 
243                 // We shouldn't really be able to get here..., guessing length is
244                 // a trivial math function...
245                 // Can't really be Out of Memory, but just in case:
246                 if (iError == ERROR_NOT_ENOUGH_MEMORY)
247                     throw new OutOfMemoryException(
248                         Environment.GetResourceString("Arg_OutOfMemoryException"));
249 
250                 // Who knows what happened?  Not us!
251                 throw new InvalidOperationException(
252                     Environment.GetResourceString("UnknownError_Num", iError));
253             }
254 
255             // Don't break for empty strings (only possible for D & KD and not really possible at that)
256             if (iLength == 0) return String.Empty;
257 
258             // Someplace to stick our buffer
259             char[] cBuffer = null;
260 
261             for (;;)
262             {
263                 // (re)allocation buffer and normalize string
264                 cBuffer = new char[iLength];
265 
266                 iLength = nativeNormalizationNormalizeString(
267                                     normForm,
268                                     ref iError,
269                                     strInput,
270                                     strInput.Length,
271                                     cBuffer,
272                                     cBuffer.Length);
273 
274                 if (iError == ERROR_SUCCESS)
275                     break;
276 
277                 // Could have an error (actually it'd be quite hard to have an error here)
278                 switch(iError)
279                 {
280                     // Do appropriate stuff for the individual errors:
281                     case ERROR_INSUFFICIENT_BUFFER:
282                         Contract.Assert(iLength > cBuffer.Length, "Buffer overflow should have iLength > cBuffer.Length");
283                         continue;
284 
285                     case ERROR_INVALID_PARAMETER:
286                     case ERROR_NO_UNICODE_TRANSLATION:
287                         // Illegal code point or order found.  Ie: FFFE or D800 D800, etc.
288                         throw new ArgumentException(
289                             Environment.GetResourceString("Argument_InvalidCharSequence", iLength ),
290                             "strInput");
291                     case ERROR_NOT_ENOUGH_MEMORY:
292                         throw new OutOfMemoryException(
293                             Environment.GetResourceString("Arg_OutOfMemoryException"));
294 
295                     default:
296                         // We shouldn't get here...
297                         throw new InvalidOperationException(
298                             Environment.GetResourceString("UnknownError_Num", iError));
299                 }
300             }
301 
302             // Copy our buffer into our new string, which will be the appropriate size
303             return new String(cBuffer, 0, iLength);
304         }
305 
306         [System.Security.SecurityCritical]  // auto-generated
307         [ResourceExposure(ResourceScope.None)]
308         [MethodImplAttribute(MethodImplOptions.InternalCall)]
nativeNormalizationNormalizeString( NormalizationForm normForm, ref int iError, String lpSrcString, int cwSrcLength, char[] lpDstString, int cwDstLength)309         unsafe private static extern int nativeNormalizationNormalizeString(
310             NormalizationForm normForm, ref int iError,
311             String lpSrcString, int cwSrcLength,
312             char[] lpDstString, int cwDstLength);
313 
314         [System.Security.SecurityCritical]  // auto-generated
315         [ResourceExposure(ResourceScope.None)]
316         [MethodImplAttribute(MethodImplOptions.InternalCall)]
nativeNormalizationIsNormalizedString( NormalizationForm normForm, ref int iError, String lpString, int cwLength)317         unsafe private static extern bool nativeNormalizationIsNormalizedString(
318             NormalizationForm normForm, ref int iError,
319             String lpString, int cwLength);
320 
321         [System.Security.SecurityCritical]  // auto-generated
322         [ResourceExposure(ResourceScope.Process)]
323         [SuppressUnmanagedCodeSecurity]
324         [DllImport(JitHelpers.QCall, CharSet = CharSet.Unicode)]
nativeNormalizationInitNormalization( NormalizationForm normForm, byte* pTableData)325         unsafe private static extern void nativeNormalizationInitNormalization(
326             NormalizationForm normForm, byte* pTableData);
327     }
328 }
329