1 // Licensed to the .NET Foundation under one or more agreements. 2 // The .NET Foundation licenses this file to you under the MIT license. 3 // See the LICENSE file in the project root for more information. 4 5 using System.Diagnostics; 6 using System.Runtime.CompilerServices; 7 using System.Runtime.InteropServices; 8 using System.Security; 9 10 namespace System.Globalization 11 { 12 public partial class CompareInfo 13 { 14 [NonSerialized] 15 private Interop.GlobalizationInterop.SafeSortHandle _sortHandle; 16 17 [NonSerialized] 18 private bool _isAsciiEqualityOrdinal; 19 InitSort(CultureInfo culture)20 private void InitSort(CultureInfo culture) 21 { 22 _sortName = culture.SortName; 23 24 if (_invariantMode) 25 { 26 _isAsciiEqualityOrdinal = true; 27 } 28 else 29 { 30 Interop.GlobalizationInterop.ResultCode resultCode = Interop.GlobalizationInterop.GetSortHandle(GetNullTerminatedUtf8String(_sortName), out _sortHandle); 31 if (resultCode != Interop.GlobalizationInterop.ResultCode.Success) 32 { 33 _sortHandle.Dispose(); 34 35 if (resultCode == Interop.GlobalizationInterop.ResultCode.OutOfMemory) 36 throw new OutOfMemoryException(); 37 38 throw new ExternalException(SR.Arg_ExternalException); 39 } 40 _isAsciiEqualityOrdinal = (_sortName == "en-US" || _sortName == ""); 41 } 42 } 43 IndexOfOrdinalCore(string source, string value, int startIndex, int count, bool ignoreCase)44 internal static unsafe int IndexOfOrdinalCore(string source, string value, int startIndex, int count, bool ignoreCase) 45 { 46 Debug.Assert(!GlobalizationMode.Invariant); 47 48 Debug.Assert(source != null); 49 Debug.Assert(value != null); 50 51 if (value.Length == 0) 52 { 53 return startIndex; 54 } 55 56 if (count < value.Length) 57 { 58 return -1; 59 } 60 61 if (ignoreCase) 62 { 63 fixed (char* pSource = source) 64 { 65 int index = Interop.GlobalizationInterop.IndexOfOrdinalIgnoreCase(value, value.Length, pSource + startIndex, count, findLast: false); 66 return index != -1 ? 67 startIndex + index : 68 -1; 69 } 70 } 71 72 int endIndex = startIndex + (count - value.Length); 73 for (int i = startIndex; i <= endIndex; i++) 74 { 75 int valueIndex, sourceIndex; 76 77 for (valueIndex = 0, sourceIndex = i; 78 valueIndex < value.Length && source[sourceIndex] == value[valueIndex]; 79 valueIndex++, sourceIndex++) ; 80 81 if (valueIndex == value.Length) 82 { 83 return i; 84 } 85 } 86 87 return -1; 88 } 89 LastIndexOfOrdinalCore(string source, string value, int startIndex, int count, bool ignoreCase)90 internal static unsafe int LastIndexOfOrdinalCore(string source, string value, int startIndex, int count, bool ignoreCase) 91 { 92 Debug.Assert(!GlobalizationMode.Invariant); 93 94 Debug.Assert(source != null); 95 Debug.Assert(value != null); 96 97 if (value.Length == 0) 98 { 99 return startIndex; 100 } 101 102 if (count < value.Length) 103 { 104 return -1; 105 } 106 107 // startIndex is the index into source where we start search backwards from. 108 // leftStartIndex is the index into source of the start of the string that is 109 // count characters away from startIndex. 110 int leftStartIndex = startIndex - count + 1; 111 112 if (ignoreCase) 113 { 114 fixed (char* pSource = source) 115 { 116 int lastIndex = Interop.GlobalizationInterop.IndexOfOrdinalIgnoreCase(value, value.Length, pSource + leftStartIndex, count, findLast: true); 117 return lastIndex != -1 ? 118 leftStartIndex + lastIndex : 119 -1; 120 } 121 } 122 123 for (int i = startIndex - value.Length + 1; i >= leftStartIndex; i--) 124 { 125 int valueIndex, sourceIndex; 126 127 for (valueIndex = 0, sourceIndex = i; 128 valueIndex < value.Length && source[sourceIndex] == value[valueIndex]; 129 valueIndex++, sourceIndex++) ; 130 131 if (valueIndex == value.Length) { 132 return i; 133 } 134 } 135 136 return -1; 137 } 138 CompareStringOrdinalIgnoreCase(char* string1, int count1, char* string2, int count2)139 private static unsafe int CompareStringOrdinalIgnoreCase(char* string1, int count1, char* string2, int count2) 140 { 141 Debug.Assert(!GlobalizationMode.Invariant); 142 143 return Interop.GlobalizationInterop.CompareStringOrdinalIgnoreCase(string1, count1, string2, count2); 144 } 145 146 // TODO https://github.com/dotnet/coreclr/issues/13827: 147 // This method shouldn't be necessary, as we should be able to just use the overload 148 // that takes two spans. But due to this issue, that's adding significant overhead. CompareString(ReadOnlySpan<char> string1, string string2, CompareOptions options)149 private unsafe int CompareString(ReadOnlySpan<char> string1, string string2, CompareOptions options) 150 { 151 Debug.Assert(!_invariantMode); 152 Debug.Assert(string2 != null); 153 Debug.Assert((options & (CompareOptions.Ordinal | CompareOptions.OrdinalIgnoreCase)) == 0); 154 155 fixed (char* pString1 = &MemoryMarshal.GetReference(string1)) 156 fixed (char* pString2 = &string2.GetRawStringData()) 157 { 158 return Interop.GlobalizationInterop.CompareString(_sortHandle, pString1, string1.Length, pString2, string2.Length, options); 159 } 160 } 161 CompareString(ReadOnlySpan<char> string1, ReadOnlySpan<char> string2, CompareOptions options)162 private unsafe int CompareString(ReadOnlySpan<char> string1, ReadOnlySpan<char> string2, CompareOptions options) 163 { 164 Debug.Assert(!_invariantMode); 165 Debug.Assert((options & (CompareOptions.Ordinal | CompareOptions.OrdinalIgnoreCase)) == 0); 166 167 fixed (char* pString1 = &MemoryMarshal.GetReference(string1)) 168 fixed (char* pString2 = &MemoryMarshal.GetReference(string2)) 169 { 170 return Interop.GlobalizationInterop.CompareString(_sortHandle, pString1, string1.Length, pString2, string2.Length, options); 171 } 172 } 173 IndexOfCore(string source, string target, int startIndex, int count, CompareOptions options, int* matchLengthPtr)174 internal unsafe int IndexOfCore(string source, string target, int startIndex, int count, CompareOptions options, int* matchLengthPtr) 175 { 176 Debug.Assert(!_invariantMode); 177 178 Debug.Assert(!string.IsNullOrEmpty(source)); 179 Debug.Assert(target != null); 180 Debug.Assert((options & CompareOptions.OrdinalIgnoreCase) == 0); 181 182 int index; 183 184 if (target.Length == 0) 185 { 186 if(matchLengthPtr != null) 187 *matchLengthPtr = 0; 188 return startIndex; 189 } 190 191 if (options == CompareOptions.Ordinal) 192 { 193 index = IndexOfOrdinal(source, target, startIndex, count, ignoreCase: false); 194 if(index != -1) 195 { 196 if(matchLengthPtr != null) 197 *matchLengthPtr = target.Length; 198 } 199 return index; 200 } 201 #if CORECLR 202 if (_isAsciiEqualityOrdinal && CanUseAsciiOrdinalForOptions(options) && source.IsFastSort() && target.IsFastSort()) 203 { 204 index = IndexOf(source, target, startIndex, count, GetOrdinalCompareOptions(options)); 205 if(index != -1) 206 { 207 if(matchLengthPtr != null) 208 *matchLengthPtr = target.Length; 209 } 210 return index; 211 } 212 #endif 213 fixed (char* pSource = source) 214 { 215 index = Interop.GlobalizationInterop.IndexOf(_sortHandle, target, target.Length, pSource + startIndex, count, options, matchLengthPtr); 216 217 return index != -1 ? index + startIndex : -1; 218 } 219 } 220 LastIndexOfCore(string source, string target, int startIndex, int count, CompareOptions options)221 private unsafe int LastIndexOfCore(string source, string target, int startIndex, int count, CompareOptions options) 222 { 223 Debug.Assert(!_invariantMode); 224 225 Debug.Assert(!string.IsNullOrEmpty(source)); 226 Debug.Assert(target != null); 227 Debug.Assert((options & CompareOptions.OrdinalIgnoreCase) == 0); 228 229 if (target.Length == 0) 230 { 231 return startIndex; 232 } 233 234 if (options == CompareOptions.Ordinal) 235 { 236 return LastIndexOfOrdinalCore(source, target, startIndex, count, ignoreCase: false); 237 } 238 239 #if CORECLR 240 if (_isAsciiEqualityOrdinal && CanUseAsciiOrdinalForOptions(options) && source.IsFastSort() && target.IsFastSort()) 241 { 242 return LastIndexOf(source, target, startIndex, count, GetOrdinalCompareOptions(options)); 243 } 244 #endif 245 246 // startIndex is the index into source where we start search backwards from. leftStartIndex is the index into source 247 // of the start of the string that is count characters away from startIndex. 248 int leftStartIndex = (startIndex - count + 1); 249 250 fixed (char* pSource = source) 251 { 252 int lastIndex = Interop.GlobalizationInterop.LastIndexOf(_sortHandle, target, target.Length, pSource + (startIndex - count + 1), count, options); 253 254 return lastIndex != -1 ? lastIndex + leftStartIndex : -1; 255 } 256 } 257 StartsWith(string source, string prefix, CompareOptions options)258 private bool StartsWith(string source, string prefix, CompareOptions options) 259 { 260 Debug.Assert(!_invariantMode); 261 262 Debug.Assert(!string.IsNullOrEmpty(source)); 263 Debug.Assert(!string.IsNullOrEmpty(prefix)); 264 Debug.Assert((options & (CompareOptions.Ordinal | CompareOptions.OrdinalIgnoreCase)) == 0); 265 266 #if CORECLR 267 if (_isAsciiEqualityOrdinal && CanUseAsciiOrdinalForOptions(options) && source.IsFastSort() && prefix.IsFastSort()) 268 { 269 return IsPrefix(source, prefix, GetOrdinalCompareOptions(options)); 270 } 271 #endif 272 273 return Interop.GlobalizationInterop.StartsWith(_sortHandle, prefix, prefix.Length, source, source.Length, options); 274 } 275 EndsWith(string source, string suffix, CompareOptions options)276 private bool EndsWith(string source, string suffix, CompareOptions options) 277 { 278 Debug.Assert(!_invariantMode); 279 280 Debug.Assert(!string.IsNullOrEmpty(source)); 281 Debug.Assert(!string.IsNullOrEmpty(suffix)); 282 Debug.Assert((options & (CompareOptions.Ordinal | CompareOptions.OrdinalIgnoreCase)) == 0); 283 284 #if CORECLR 285 if (_isAsciiEqualityOrdinal && CanUseAsciiOrdinalForOptions(options) && source.IsFastSort() && suffix.IsFastSort()) 286 { 287 return IsSuffix(source, suffix, GetOrdinalCompareOptions(options)); 288 } 289 #endif 290 291 return Interop.GlobalizationInterop.EndsWith(_sortHandle, suffix, suffix.Length, source, source.Length, options); 292 } 293 CreateSortKey(String source, CompareOptions options)294 private unsafe SortKey CreateSortKey(String source, CompareOptions options) 295 { 296 Debug.Assert(!_invariantMode); 297 298 if (source==null) { throw new ArgumentNullException(nameof(source)); } 299 300 if ((options & ValidSortkeyCtorMaskOffFlags) != 0) 301 { 302 throw new ArgumentException(SR.Argument_InvalidFlag, nameof(options)); 303 } 304 305 byte [] keyData; 306 if (source.Length == 0) 307 { 308 keyData = Array.Empty<Byte>(); 309 } 310 else 311 { 312 int sortKeyLength = Interop.GlobalizationInterop.GetSortKey(_sortHandle, source, source.Length, null, 0, options); 313 keyData = new byte[sortKeyLength]; 314 315 fixed (byte* pSortKey = keyData) 316 { 317 Interop.GlobalizationInterop.GetSortKey(_sortHandle, source, source.Length, pSortKey, sortKeyLength, options); 318 } 319 } 320 321 return new SortKey(Name, source, options, keyData); 322 } 323 IsSortable(char *text, int length)324 private unsafe static bool IsSortable(char *text, int length) 325 { 326 Debug.Assert(!GlobalizationMode.Invariant); 327 328 int index = 0; 329 UnicodeCategory uc; 330 331 while (index < length) 332 { 333 if (Char.IsHighSurrogate(text[index])) 334 { 335 if (index == length - 1 || !Char.IsLowSurrogate(text[index+1])) 336 return false; // unpaired surrogate 337 338 uc = CharUnicodeInfo.InternalGetUnicodeCategory(Char.ConvertToUtf32(text[index], text[index+1])); 339 if (uc == UnicodeCategory.PrivateUse || uc == UnicodeCategory.OtherNotAssigned) 340 return false; 341 342 index += 2; 343 continue; 344 } 345 346 if (Char.IsLowSurrogate(text[index])) 347 { 348 return false; // unpaired surrogate 349 } 350 351 uc = CharUnicodeInfo.GetUnicodeCategory(text[index]); 352 if (uc == UnicodeCategory.PrivateUse || uc == UnicodeCategory.OtherNotAssigned) 353 { 354 return false; 355 } 356 357 index++; 358 } 359 360 return true; 361 } 362 363 // ----------------------------- 364 // ---- PAL layer ends here ---- 365 // ----------------------------- 366 GetHashCodeOfStringCore(string source, CompareOptions options)367 internal unsafe int GetHashCodeOfStringCore(string source, CompareOptions options) 368 { 369 Debug.Assert(!_invariantMode); 370 371 Debug.Assert(source != null); 372 Debug.Assert((options & (CompareOptions.Ordinal | CompareOptions.OrdinalIgnoreCase)) == 0); 373 374 if (source.Length == 0) 375 { 376 return 0; 377 } 378 379 int sortKeyLength = Interop.GlobalizationInterop.GetSortKey(_sortHandle, source, source.Length, null, 0, options); 380 381 // As an optimization, for small sort keys we allocate the buffer on the stack. 382 if (sortKeyLength <= 256) 383 { 384 byte* pSortKey = stackalloc byte[sortKeyLength]; 385 Interop.GlobalizationInterop.GetSortKey(_sortHandle, source, source.Length, pSortKey, sortKeyLength, options); 386 return InternalHashSortKey(pSortKey, sortKeyLength); 387 } 388 389 byte[] sortKey = new byte[sortKeyLength]; 390 391 fixed (byte* pSortKey = &sortKey[0]) 392 { 393 Interop.GlobalizationInterop.GetSortKey(_sortHandle, source, source.Length, pSortKey, sortKeyLength, options); 394 return InternalHashSortKey(pSortKey, sortKeyLength); 395 } 396 } 397 InternalHashSortKey(byte* sortKey, int sortKeyLength)398 private static unsafe int InternalHashSortKey(byte* sortKey, int sortKeyLength) 399 { 400 // TODO: Random hashing is yet to be done 401 // Active Issue: https://github.com/dotnet/corert/issues/2588 402 403 int hash1 = 5381; 404 int hash2 = hash1; 405 if (sortKeyLength == 0) 406 { 407 return 0; 408 } 409 if (sortKeyLength == 1) 410 { 411 return (((hash1 << 5) + hash1) ^ sortKey[0]) + (hash2 * 1566083941); 412 } 413 414 for (int i = 0; i < (sortKeyLength & ~1); i += 2) 415 { 416 hash1 = ((hash1 << 5) + hash1) ^ sortKey[i]; 417 hash2 = ((hash2 << 5) + hash2) ^ sortKey[i+1]; 418 } 419 return hash1 + (hash2 * 1566083941); 420 } 421 GetOrdinalCompareOptions(CompareOptions options)422 private static CompareOptions GetOrdinalCompareOptions(CompareOptions options) 423 { 424 if ((options & CompareOptions.IgnoreCase) == CompareOptions.IgnoreCase) 425 { 426 return CompareOptions.OrdinalIgnoreCase; 427 } 428 else 429 { 430 return CompareOptions.Ordinal; 431 } 432 } 433 CanUseAsciiOrdinalForOptions(CompareOptions options)434 private static bool CanUseAsciiOrdinalForOptions(CompareOptions options) 435 { 436 // Unlike the other Ignore options, IgnoreSymbols impacts ASCII characters (e.g. '). 437 return (options & CompareOptions.IgnoreSymbols) == 0; 438 } 439 GetNullTerminatedUtf8String(string s)440 private static byte[] GetNullTerminatedUtf8String(string s) 441 { 442 int byteLen = System.Text.Encoding.UTF8.GetByteCount(s); 443 444 // Allocate an extra byte (which defaults to 0) as the null terminator. 445 byte[] buffer = new byte[byteLen + 1]; 446 447 int bytesWritten = System.Text.Encoding.UTF8.GetBytes(s, 0, s.Length, buffer, 0); 448 449 Debug.Assert(bytesWritten == byteLen); 450 451 return buffer; 452 } 453 GetSortVersion()454 private SortVersion GetSortVersion() 455 { 456 Debug.Assert(!_invariantMode); 457 458 int sortVersion = Interop.GlobalizationInterop.GetSortVersion(_sortHandle); 459 return new SortVersion(sortVersion, LCID, new Guid(sortVersion, 0, 0, 0, 0, 0, 0, 460 (byte) (LCID >> 24), 461 (byte) ((LCID & 0x00FF0000) >> 16), 462 (byte) ((LCID & 0x0000FF00) >> 8), 463 (byte) (LCID & 0xFF))); 464 } 465 } 466 } 467