1 // Licensed to the .NET Foundation under one or more agreements. 2 // The .NET Foundation licenses this file to you under the MIT license. 3 // See the LICENSE file in the project root for more information. 4 5 namespace System.IO 6 { 7 internal static class PatternMatcher 8 { 9 /// <devdoc> 10 /// Private constants (directly from C header files) 11 /// </devdoc> 12 private const int MATCHES_ARRAY_SIZE = 16; 13 private const char ANSI_DOS_STAR = '>'; 14 private const char ANSI_DOS_QM = '<'; 15 private const char DOS_DOT = '"'; 16 17 /// <devdoc> 18 /// Tells whether a given name matches the expression given with a strict (i.e. UNIX like) 19 /// semantics. This code is a port of unmanaged code. Original code comment follows: 20 /// 21 /// Routine Description: 22 /// 23 /// This routine compares a Dbcs name and an expression and tells the caller 24 /// if the name is in the language defined by the expression. The input name 25 /// cannot contain wildcards, while the expression may contain wildcards. 26 /// 27 /// Expression wild cards are evaluated as shown in the nondeterministic 28 /// finite automatons below. Note that ~* and ~? are DOS_STAR and DOS_QM. 29 /// 30 /// 31 /// ~* is DOS_STAR, ~? is DOS_QM, and ~. is DOS_DOT 32 /// 33 /// 34 /// S 35 /// <-----< 36 /// X | | e Y 37 /// X * Y == (0)----->-(1)->-----(2)-----(3) 38 /// 39 /// 40 /// S-. 41 /// <-----< 42 /// X | | e Y 43 /// X ~* Y == (0)----->-(1)->-----(2)-----(3) 44 /// 45 /// 46 /// 47 /// X S S Y 48 /// X ?? Y == (0)---(1)---(2)---(3)---(4) 49 /// 50 /// 51 /// 52 /// X . . Y 53 /// X ~.~. Y == (0)---(1)----(2)------(3)---(4) 54 /// | |________| 55 /// | ^ | 56 /// |_______________| 57 /// ^EOF or .^ 58 /// 59 /// 60 /// X S-. S-. Y 61 /// X ~?~? Y == (0)---(1)-----(2)-----(3)---(4) 62 /// | |________| 63 /// | ^ | 64 /// |_______________| 65 /// ^EOF or .^ 66 /// 67 /// 68 /// 69 /// where S is any single character 70 /// 71 /// S-. is any single character except the final . 72 /// 73 /// e is a null character transition 74 /// 75 /// EOF is the end of the name string 76 /// 77 /// In words: 78 /// 79 /// * matches 0 or more characters. 80 /// 81 /// ? matches exactly 1 character. 82 /// 83 /// DOS_STAR matches 0 or more characters until encountering and matching 84 /// the final . in the name. 85 /// 86 /// DOS_QM matches any single character, or upon encountering a period or 87 /// end of name string, advances the expression to the end of the 88 /// set of contiguous DOS_QMs. 89 /// 90 /// DOS_DOT matches either a . or zero characters beyond name string. 91 /// 92 /// Arguments: 93 /// 94 /// Expression - Supplies the input expression to check against 95 /// 96 /// Name - Supplies the input name to check for. 97 /// 98 /// Return Value: 99 /// 100 /// BOOLEAN - TRUE if Name is an element in the set of strings denoted 101 /// by the input Expression and FALSE otherwise. 102 /// 103 /// </devdoc> StrictMatchPattern(string expression, string name)104 public static bool StrictMatchPattern(string expression, string name) 105 { 106 // 107 // The idea behind the algorithm is pretty simple. We keep track of 108 // all possible locations in the regular expression that are matching 109 // the name. If when the name has been exhausted one of the locations 110 // in the expression is also just exhausted, the name is in the language 111 // defined by the regular expression. 112 // 113 114 if (name == null || name.Length == 0 || expression == null || expression.Length == 0) 115 { 116 return false; 117 } 118 119 // 120 // Special case by far the most common wild card search of * or *.* 121 // 122 123 if (expression.Equals("*") || expression.Equals("*.*")) 124 { 125 return true; 126 } 127 128 // If this class is ever exposed for generic use, 129 // we need to make sure that name doesn't contain wildcards. Currently 130 // the only component that calls this method is FileSystemWatcher and 131 // it will never pass a name that contains a wildcard. 132 133 134 // 135 // Also special case expressions of the form *X. With this and the prior 136 // case we have covered virtually all normal queries. 137 // 138 if (expression[0] == '*' && expression.IndexOf('*', 1) == -1) 139 { 140 int rightLength = expression.Length - 1; 141 // if name is shorter that the stuff to the right of * in expression, we don't 142 // need to do the string compare, otherwise we compare rightlength characters 143 // and the end of both strings. 144 if (name.Length >= rightLength && 145 string.Compare(expression, 1, name, name.Length - rightLength, rightLength, PathInternal.StringComparison) == 0) 146 { 147 return true; 148 } 149 } 150 151 // 152 // Walk through the name string, picking off characters. We go one 153 // character beyond the end because some wild cards are able to match 154 // zero characters beyond the end of the string. 155 // 156 // With each new name character we determine a new set of states that 157 // match the name so far. We use two arrays that we swap back and forth 158 // for this purpose. One array lists the possible expression states for 159 // all name characters up to but not including the current one, and other 160 // array is used to build up the list of states considering the current 161 // name character as well. The arrays are then switched and the process 162 // repeated. 163 // 164 // There is not a one-to-one correspondence between state number and 165 // offset into the expression. This is evident from the NFAs in the 166 // initial comment to this function. State numbering is not continuous. 167 // This allows a simple conversion between state number and expression 168 // offset. Each character in the expression can represent one or two 169 // states. * and DOS_STAR generate two states: ExprOffset*2 and 170 // ExprOffset*2 + 1. All other expression characters can produce only 171 // a single state. Thus ExprOffset = State/2. 172 // 173 // 174 // Here is a short description of the variables involved: 175 // 176 // NameOffset - The offset of the current name char being processed. 177 // 178 // ExprOffset - The offset of the current expression char being processed. 179 // 180 // SrcCount - Prior match being investigated with current name char 181 // 182 // DestCount - Next location to put a matching assuming current name char 183 // 184 // NameFinished - Allows one more iteration through the Matches array 185 // after the name is exhausted (to come *s for example) 186 // 187 // PreviousDestCount - This is used to prevent entry duplication, see comment 188 // 189 // PreviousMatches - Holds the previous set of matches (the Src array) 190 // 191 // CurrentMatches - Holds the current set of matches (the Dest array) 192 // 193 // AuxBuffer, LocalBuffer - the storage for the Matches arrays 194 // 195 196 // 197 // Set up the initial variables 198 // 199 int nameOffset; 200 int exprOffset; 201 int length; 202 203 int srcCount; 204 int destCount; 205 int previousDestCount; 206 int matchesCount; 207 208 char nameChar = '\0'; 209 char exprChar = '\0'; 210 211 int[] previousMatches = new int[MATCHES_ARRAY_SIZE]; 212 int[] currentMatches = new int[MATCHES_ARRAY_SIZE]; 213 214 int maxState; 215 int currentState; 216 217 bool nameFinished = false; 218 219 previousMatches[0] = 0; 220 matchesCount = 1; 221 222 nameOffset = 0; 223 maxState = expression.Length * 2; 224 225 while (!nameFinished) 226 { 227 if (nameOffset < name.Length) 228 { 229 nameChar = name[nameOffset]; 230 nameOffset++; 231 } 232 else 233 { 234 nameFinished = true; 235 236 // 237 // if we have already exhausted the expression, C#. Don't 238 // continue. 239 // 240 if (previousMatches[matchesCount - 1] == maxState) 241 { 242 break; 243 } 244 } 245 246 // 247 // Now, for each of the previous stored expression matches, see what 248 // we can do with this name character. 249 // 250 srcCount = 0; 251 destCount = 0; 252 previousDestCount = 0; 253 254 while (srcCount < matchesCount) 255 { 256 // 257 // We have to carry on our expression analysis as far as possible 258 // for each character of name, so we loop here until the 259 // expression stops matching. A clue here is that expression 260 // cases that can match zero or more characters end with a 261 // continue, while those that can accept only a single character 262 // end with a break. 263 // 264 exprOffset = ((previousMatches[srcCount++] + 1) / 2); 265 length = 0; 266 267 while (true) 268 { 269 if (exprOffset == expression.Length) 270 { 271 break; 272 } 273 274 // 275 // The first time through the loop we don't want 276 // to increment ExprOffset. 277 // 278 279 exprOffset += length; 280 281 currentState = exprOffset * 2; 282 283 if (exprOffset == expression.Length) 284 { 285 currentMatches[destCount++] = maxState; 286 break; 287 } 288 289 exprChar = expression[exprOffset]; 290 length = 1; 291 292 // 293 // We may be about to exhaust the local 294 // space for ExpressionMatches[][], so we have to allocate 295 // some pool if this is the case. 296 // 297 298 if (destCount >= MATCHES_ARRAY_SIZE - 2) 299 { 300 int newSize = currentMatches.Length * 2; 301 int[] tmp = new int[newSize]; 302 Array.Copy(currentMatches, 0, tmp, 0, currentMatches.Length); 303 currentMatches = tmp; 304 305 tmp = new int[newSize]; 306 Array.Copy(previousMatches, 0, tmp, 0, previousMatches.Length); 307 previousMatches = tmp; 308 } 309 310 // 311 // * matches any character zero or more times. 312 // 313 314 if (exprChar == '*') 315 { 316 currentMatches[destCount++] = currentState; 317 currentMatches[destCount++] = (currentState + 1); 318 continue; 319 } 320 321 // 322 // DOS_STAR matches any character except . zero or more times. 323 // 324 325 if (exprChar == ANSI_DOS_STAR) 326 { 327 bool iCanEatADot = false; 328 329 // 330 // If we are at a period, determine if we are allowed to 331 // consume it, i.e. make sure it is not the last one. 332 // 333 if (!nameFinished && (nameChar == '.')) 334 { 335 char tmpChar; 336 int offset; 337 338 int nameLength = name.Length; 339 for (offset = nameOffset; offset < nameLength; offset++) 340 { 341 tmpChar = name[offset]; 342 length = 1; 343 344 if (tmpChar == '.') 345 { 346 iCanEatADot = true; 347 break; 348 } 349 } 350 } 351 352 if (nameFinished || (nameChar != '.') || iCanEatADot) 353 { 354 currentMatches[destCount++] = currentState; 355 currentMatches[destCount++] = (currentState + 1); 356 continue; 357 } 358 else 359 { 360 // 361 // We are at a period. We can only match zero 362 // characters (i.e. the epsilon transition). 363 // 364 currentMatches[destCount++] = (currentState + 1); 365 continue; 366 } 367 } 368 369 // 370 // The following expression characters all match by consuming 371 // a character, thus force the expression, and thus state 372 // forward. 373 // 374 currentState += length * 2; 375 376 // 377 // DOS_QM is the most complicated. If the name is finished, 378 // we can match zero characters. If this name is a '.', we 379 // don't match, but look at the next expression. Otherwise 380 // we match a single character. 381 // 382 if (exprChar == ANSI_DOS_QM) 383 { 384 if (nameFinished || (nameChar == '.')) 385 { 386 continue; 387 } 388 389 currentMatches[destCount++] = currentState; 390 break; 391 } 392 393 // 394 // A DOS_DOT can match either a period, or zero characters 395 // beyond the end of name. 396 // 397 if (exprChar == DOS_DOT) 398 { 399 if (nameFinished) 400 { 401 continue; 402 } 403 404 if (nameChar == '.') 405 { 406 currentMatches[destCount++] = currentState; 407 break; 408 } 409 } 410 411 // 412 // From this point on a name character is required to even 413 // continue, let alone make a match. 414 // 415 if (nameFinished) 416 { 417 break; 418 } 419 420 // 421 // If this expression was a '?' we can match it once. 422 // 423 if (exprChar == '?') 424 { 425 currentMatches[destCount++] = currentState; 426 break; 427 } 428 429 // 430 // Finally, check if the expression char matches the name char 431 // 432 433 if (PathInternal.IsCaseSensitive ? 434 (exprChar == nameChar) : 435 (char.ToUpperInvariant(exprChar) == char.ToUpperInvariant(nameChar))) 436 { 437 currentMatches[destCount++] = currentState; 438 break; 439 } 440 441 // 442 // The expression didn't match so go look at the next 443 // previous match. 444 // 445 446 break; 447 } 448 449 450 // 451 // Prevent duplication in the destination array. 452 // 453 // Each of the arrays is monotonically increasing and non- 454 // duplicating, thus we skip over any source element in the src 455 // array if we just added the same element to the destination 456 // array. This guarantees non-duplication in the dest. array. 457 // 458 459 if ((srcCount < matchesCount) && (previousDestCount < destCount)) 460 { 461 while (previousDestCount < destCount) 462 { 463 int previousLength = previousMatches.Length; 464 while ((srcCount < previousLength) && (previousMatches[srcCount] < currentMatches[previousDestCount])) 465 { 466 srcCount += 1; 467 } 468 previousDestCount += 1; 469 } 470 } 471 } 472 473 // 474 // If we found no matches in the just finished iteration, it's time 475 // to bail. 476 // 477 478 if (destCount == 0) 479 { 480 return false; 481 } 482 483 // 484 // Swap the meaning the two arrays 485 // 486 487 { 488 int[] tmp; 489 490 tmp = previousMatches; 491 492 previousMatches = currentMatches; 493 494 currentMatches = tmp; 495 } 496 497 matchesCount = destCount; 498 } 499 500 currentState = previousMatches[matchesCount - 1]; 501 502 return currentState == maxState; 503 } 504 } 505 } 506