1 // ==++== 2 // 3 // Copyright (c) Microsoft Corporation. All rights reserved. 4 // 5 // ==--== 6 namespace System.Text 7 { 8 using System; 9 using System.Security; 10 using System.Globalization; 11 using System.Text; 12 using System.Runtime.CompilerServices; 13 using System.Runtime.InteropServices; 14 using System.Runtime.Versioning; 15 using System.Diagnostics.Contracts; 16 17 // This is the enumeration for Normalization Forms 18 [System.Runtime.InteropServices.ComVisible(true)] 19 public enum NormalizationForm 20 { 21 #if !FEATURE_NORM_IDNA_ONLY 22 FormC = 1, 23 FormD = 2, 24 FormKC = 5, 25 FormKD = 6 26 #endif // !FEATURE_NORM_IDNA_ONLY 27 } 28 29 internal enum ExtendedNormalizationForms 30 { 31 #if !FEATURE_NORM_IDNA_ONLY 32 FormC = 1, 33 FormD = 2, 34 FormKC = 5, 35 FormKD = 6, 36 #endif // !FEATURE_NORM_IDNA_ONLY 37 FormIdna = 0xd, 38 #if !FEATURE_NORM_IDNA_ONLY 39 FormCDisallowUnassigned = 0x101, 40 FormDDisallowUnassigned = 0x102, 41 FormKCDisallowUnassigned = 0x105, 42 FormKDDisallowUnassigned = 0x106, 43 #endif // !FEATURE_NORM_IDNA_ONLY 44 FormIdnaDisallowUnassigned = 0x10d 45 } 46 47 // This internal class wraps up our normalization behavior 48 49 internal class Normalization 50 { 51 // 52 // Flags that track whether given normalization form was initialized 53 // 54 #if !FEATURE_NORM_IDNA_ONLY 55 private static volatile bool NFC; 56 private static volatile bool NFD; 57 private static volatile bool NFKC; 58 private static volatile bool NFKD; 59 #endif // !FEATURE_NORM_IDNA_ONLY 60 private static volatile bool IDNA; 61 #if !FEATURE_NORM_IDNA_ONLY 62 private static volatile bool NFCDisallowUnassigned; 63 private static volatile bool NFDDisallowUnassigned; 64 private static volatile bool NFKCDisallowUnassigned; 65 private static volatile bool NFKDDisallowUnassigned; 66 #endif // !FEATURE_NORM_IDNA_ONLY 67 private static volatile bool IDNADisallowUnassigned; 68 private static volatile bool Other; 69 70 // These are error codes we get back from the Normalization DLL 71 private const int ERROR_SUCCESS = 0; 72 private const int ERROR_NOT_ENOUGH_MEMORY = 8; 73 private const int ERROR_INVALID_PARAMETER = 87; 74 private const int ERROR_INSUFFICIENT_BUFFER = 122; 75 private const int ERROR_NO_UNICODE_TRANSLATION = 1113; 76 77 [System.Security.SecurityCritical] // auto-generated 78 [ResourceExposure(ResourceScope.None)] 79 [ResourceConsumption(ResourceScope.Process, ResourceScope.Process)] InitializeForm(NormalizationForm form, String strDataFile)80 static private unsafe void InitializeForm(NormalizationForm form, String strDataFile) 81 { 82 byte* pTables = null; 83 84 // Normalization uses OS on Win8 85 if (!Environment.IsWindows8OrAbove) 86 { 87 if (strDataFile == null) 88 { 89 // They were supposed to have a form that we know about! 90 throw new ArgumentException( 91 Environment.GetResourceString("Argument_InvalidNormalizationForm")); 92 } 93 94 // Tell the DLL where to find our data 95 pTables = GlobalizationAssembly.GetGlobalizationResourceBytePtr( 96 typeof(Normalization).Assembly, strDataFile); 97 if (pTables == null) 98 { 99 // Unable to load the specified normalizationForm, 100 // tables not loaded from file 101 throw new ArgumentException( 102 Environment.GetResourceString("Argument_InvalidNormalizationForm")); 103 } 104 } 105 106 nativeNormalizationInitNormalization(form, pTables); 107 } 108 109 [System.Security.SecurityCritical] // auto-generated EnsureInitialized(NormalizationForm form)110 static private void EnsureInitialized(NormalizationForm form) 111 { 112 switch ((ExtendedNormalizationForms)form) 113 { 114 #if !FEATURE_NORM_IDNA_ONLY 115 case ExtendedNormalizationForms.FormC: 116 if (NFC) return; 117 InitializeForm(form, "normnfc.nlp"); 118 NFC = true; 119 break; 120 121 case ExtendedNormalizationForms.FormD: 122 if (NFD) return; 123 InitializeForm(form, "normnfd.nlp"); 124 NFD = true; 125 break; 126 127 case ExtendedNormalizationForms.FormKC: 128 if (NFKC) return; 129 InitializeForm(form, "normnfkc.nlp"); 130 NFKC = true; 131 break; 132 133 case ExtendedNormalizationForms.FormKD: 134 if (NFKD) return; 135 InitializeForm(form, "normnfkd.nlp"); 136 NFKD = true; 137 break; 138 #endif // !FEATURE_NORM_IDNA_ONLY 139 140 case ExtendedNormalizationForms.FormIdna: 141 if (IDNA) return; 142 InitializeForm(form, "normidna.nlp"); 143 IDNA = true; 144 break; 145 146 #if !FEATURE_NORM_IDNA_ONLY 147 case ExtendedNormalizationForms.FormCDisallowUnassigned: 148 if (NFCDisallowUnassigned) return; 149 InitializeForm(form, "normnfc.nlp"); 150 NFCDisallowUnassigned = true; 151 break; 152 153 case ExtendedNormalizationForms.FormDDisallowUnassigned: 154 if (NFDDisallowUnassigned) return; 155 InitializeForm(form, "normnfd.nlp"); 156 NFDDisallowUnassigned = true; 157 break; 158 159 case ExtendedNormalizationForms.FormKCDisallowUnassigned: 160 if (NFKCDisallowUnassigned) return; 161 InitializeForm(form, "normnfkc.nlp"); 162 NFKCDisallowUnassigned = true; 163 break; 164 165 case ExtendedNormalizationForms.FormKDDisallowUnassigned: 166 if (NFKDDisallowUnassigned) return; 167 InitializeForm(form, "normnfkd.nlp"); 168 NFKDDisallowUnassigned = true; 169 break; 170 #endif // !FEATURE_NORM_IDNA_ONLY 171 172 case ExtendedNormalizationForms.FormIdnaDisallowUnassigned: 173 if (IDNADisallowUnassigned) return; 174 InitializeForm(form, "normidna.nlp"); 175 IDNADisallowUnassigned = true; 176 break; 177 178 default: 179 if (Other) return; 180 InitializeForm(form, null); 181 Other = true; 182 break; 183 } 184 } 185 186 [System.Security.SecurityCritical] IsNormalized(String strInput, NormalizationForm normForm)187 internal static bool IsNormalized(String strInput, NormalizationForm normForm) 188 { 189 Contract.Requires(strInput != null); 190 191 EnsureInitialized(normForm); 192 193 int iError = ERROR_SUCCESS; 194 bool result = nativeNormalizationIsNormalizedString( 195 normForm, 196 ref iError, 197 strInput, 198 strInput.Length); 199 200 switch(iError) 201 { 202 // Success doesn't need to do anything 203 case ERROR_SUCCESS: 204 break; 205 206 // Do appropriate stuff for the individual errors: 207 case ERROR_INVALID_PARAMETER: 208 case ERROR_NO_UNICODE_TRANSLATION: 209 throw new ArgumentException( 210 Environment.GetResourceString("Argument_InvalidCharSequenceNoIndex" ), 211 "strInput"); 212 case ERROR_NOT_ENOUGH_MEMORY: 213 throw new OutOfMemoryException( 214 Environment.GetResourceString("Arg_OutOfMemoryException")); 215 default: 216 throw new InvalidOperationException( 217 Environment.GetResourceString("UnknownError_Num", iError)); 218 } 219 220 return result; 221 } 222 223 [System.Security.SecurityCritical] Normalize(String strInput, NormalizationForm normForm)224 internal static String Normalize(String strInput, NormalizationForm normForm) 225 { 226 Contract.Requires(strInput != null); 227 228 EnsureInitialized(normForm); 229 230 int iError = ERROR_SUCCESS; 231 232 // Guess our buffer size first 233 int iLength = nativeNormalizationNormalizeString(normForm, ref iError, strInput, strInput.Length, null, 0); 234 235 // Could have an error (actually it'd be quite hard to have an error here) 236 if (iError != ERROR_SUCCESS) 237 { 238 if (iError == ERROR_INVALID_PARAMETER) 239 throw new ArgumentException( 240 Environment.GetResourceString("Argument_InvalidCharSequenceNoIndex" ), 241 "strInput"); 242 243 // We shouldn't really be able to get here..., guessing length is 244 // a trivial math function... 245 // Can't really be Out of Memory, but just in case: 246 if (iError == ERROR_NOT_ENOUGH_MEMORY) 247 throw new OutOfMemoryException( 248 Environment.GetResourceString("Arg_OutOfMemoryException")); 249 250 // Who knows what happened? Not us! 251 throw new InvalidOperationException( 252 Environment.GetResourceString("UnknownError_Num", iError)); 253 } 254 255 // Don't break for empty strings (only possible for D & KD and not really possible at that) 256 if (iLength == 0) return String.Empty; 257 258 // Someplace to stick our buffer 259 char[] cBuffer = null; 260 261 for (;;) 262 { 263 // (re)allocation buffer and normalize string 264 cBuffer = new char[iLength]; 265 266 iLength = nativeNormalizationNormalizeString( 267 normForm, 268 ref iError, 269 strInput, 270 strInput.Length, 271 cBuffer, 272 cBuffer.Length); 273 274 if (iError == ERROR_SUCCESS) 275 break; 276 277 // Could have an error (actually it'd be quite hard to have an error here) 278 switch(iError) 279 { 280 // Do appropriate stuff for the individual errors: 281 case ERROR_INSUFFICIENT_BUFFER: 282 Contract.Assert(iLength > cBuffer.Length, "Buffer overflow should have iLength > cBuffer.Length"); 283 continue; 284 285 case ERROR_INVALID_PARAMETER: 286 case ERROR_NO_UNICODE_TRANSLATION: 287 // Illegal code point or order found. Ie: FFFE or D800 D800, etc. 288 throw new ArgumentException( 289 Environment.GetResourceString("Argument_InvalidCharSequence", iLength ), 290 "strInput"); 291 case ERROR_NOT_ENOUGH_MEMORY: 292 throw new OutOfMemoryException( 293 Environment.GetResourceString("Arg_OutOfMemoryException")); 294 295 default: 296 // We shouldn't get here... 297 throw new InvalidOperationException( 298 Environment.GetResourceString("UnknownError_Num", iError)); 299 } 300 } 301 302 // Copy our buffer into our new string, which will be the appropriate size 303 return new String(cBuffer, 0, iLength); 304 } 305 306 [System.Security.SecurityCritical] // auto-generated 307 [ResourceExposure(ResourceScope.None)] 308 [MethodImplAttribute(MethodImplOptions.InternalCall)] nativeNormalizationNormalizeString( NormalizationForm normForm, ref int iError, String lpSrcString, int cwSrcLength, char[] lpDstString, int cwDstLength)309 unsafe private static extern int nativeNormalizationNormalizeString( 310 NormalizationForm normForm, ref int iError, 311 String lpSrcString, int cwSrcLength, 312 char[] lpDstString, int cwDstLength); 313 314 [System.Security.SecurityCritical] // auto-generated 315 [ResourceExposure(ResourceScope.None)] 316 [MethodImplAttribute(MethodImplOptions.InternalCall)] nativeNormalizationIsNormalizedString( NormalizationForm normForm, ref int iError, String lpString, int cwLength)317 unsafe private static extern bool nativeNormalizationIsNormalizedString( 318 NormalizationForm normForm, ref int iError, 319 String lpString, int cwLength); 320 321 [System.Security.SecurityCritical] // auto-generated 322 [ResourceExposure(ResourceScope.Process)] 323 [SuppressUnmanagedCodeSecurity] 324 [DllImport(JitHelpers.QCall, CharSet = CharSet.Unicode)] nativeNormalizationInitNormalization( NormalizationForm normForm, byte* pTableData)325 unsafe private static extern void nativeNormalizationInitNormalization( 326 NormalizationForm normForm, byte* pTableData); 327 } 328 } 329