1package doublestar 2 3import ( 4 "fmt" 5 "os" 6 "path" 7 "path/filepath" 8 "sort" 9 "strings" 10 "unicode/utf8" 11) 12 13// An OS abstracts functions in the standard library's os package. 14type OS interface { 15 Lstat(name string) (os.FileInfo, error) 16 Open(name string) (*os.File, error) 17 PathSeparator() rune 18 Stat(name string) (os.FileInfo, error) 19} 20 21// StandardOS is a value that implements the OS interface by calling functions 22// in the standard libray's os package. 23var StandardOS OS = standardOS{} 24 25// A standardOS implements OS by calling functions in the standard library's os 26// package. 27type standardOS struct{} 28 29func (standardOS) Lstat(name string) (os.FileInfo, error) { return os.Lstat(name) } 30func (standardOS) Open(name string) (*os.File, error) { return os.Open(name) } 31func (standardOS) PathSeparator() rune { return os.PathSeparator } 32func (standardOS) Stat(name string) (os.FileInfo, error) { return os.Stat(name) } 33 34// ErrBadPattern indicates a pattern was malformed. 35var ErrBadPattern = path.ErrBadPattern 36 37// Split a path on the given separator, respecting escaping. 38func splitPathOnSeparator(path string, separator rune) (ret []string) { 39 idx := 0 40 if separator == '\\' { 41 // if the separator is '\\', then we can just split... 42 ret = strings.Split(path, string(separator)) 43 idx = len(ret) 44 } else { 45 // otherwise, we need to be careful of situations where the separator was escaped 46 cnt := strings.Count(path, string(separator)) 47 if cnt == 0 { 48 return []string{path} 49 } 50 51 ret = make([]string, cnt+1) 52 pathlen := len(path) 53 separatorLen := utf8.RuneLen(separator) 54 emptyEnd := false 55 for start := 0; start < pathlen; { 56 end := indexRuneWithEscaping(path[start:], separator) 57 if end == -1 { 58 emptyEnd = false 59 end = pathlen 60 } else { 61 emptyEnd = true 62 end += start 63 } 64 ret[idx] = path[start:end] 65 start = end + separatorLen 66 idx++ 67 } 68 69 // If the last rune is a path separator, we need to append an empty string to 70 // represent the last, empty path component. By default, the strings from 71 // make([]string, ...) will be empty, so we just need to icrement the count 72 if emptyEnd { 73 idx++ 74 } 75 } 76 77 return ret[:idx] 78} 79 80// Find the first index of a rune in a string, 81// ignoring any times the rune is escaped using "\". 82func indexRuneWithEscaping(s string, r rune) int { 83 end := strings.IndexRune(s, r) 84 if end == -1 { 85 return -1 86 } 87 if end > 0 && s[end-1] == '\\' { 88 start := end + utf8.RuneLen(r) 89 end = indexRuneWithEscaping(s[start:], r) 90 if end != -1 { 91 end += start 92 } 93 } 94 return end 95} 96 97// Find the last index of a rune in a string, 98// ignoring any times the rune is escaped using "\". 99func lastIndexRuneWithEscaping(s string, r rune) int { 100 end := strings.LastIndex(s, string(r)) 101 if end == -1 { 102 return -1 103 } 104 if end > 0 && s[end-1] == '\\' { 105 end = lastIndexRuneWithEscaping(s[:end-1], r) 106 } 107 return end 108} 109 110// Find the index of the first instance of one of the unicode characters in 111// chars, ignoring any times those characters are escaped using "\". 112func indexAnyWithEscaping(s, chars string) int { 113 end := strings.IndexAny(s, chars) 114 if end == -1 { 115 return -1 116 } 117 if end > 0 && s[end-1] == '\\' { 118 _, adj := utf8.DecodeRuneInString(s[end:]) 119 start := end + adj 120 end = indexAnyWithEscaping(s[start:], chars) 121 if end != -1 { 122 end += start 123 } 124 } 125 return end 126} 127 128// Split a set of alternatives such as {alt1,alt2,...} and returns the index of 129// the rune after the closing curly brace. Respects nested alternatives and 130// escaped runes. 131func splitAlternatives(s string) (ret []string, idx int) { 132 ret = make([]string, 0, 2) 133 idx = 0 134 slen := len(s) 135 braceCnt := 1 136 esc := false 137 start := 0 138 for braceCnt > 0 { 139 if idx >= slen { 140 return nil, -1 141 } 142 143 sRune, adj := utf8.DecodeRuneInString(s[idx:]) 144 if esc { 145 esc = false 146 } else if sRune == '\\' { 147 esc = true 148 } else if sRune == '{' { 149 braceCnt++ 150 } else if sRune == '}' { 151 braceCnt-- 152 } else if sRune == ',' && braceCnt == 1 { 153 ret = append(ret, s[start:idx]) 154 start = idx + adj 155 } 156 157 idx += adj 158 } 159 ret = append(ret, s[start:idx-1]) 160 return 161} 162 163// Returns true if the pattern is "zero length", meaning 164// it could match zero or more characters. 165func isZeroLengthPattern(pattern string) (ret bool, err error) { 166 // * can match zero 167 if pattern == "" || pattern == "*" || pattern == "**" { 168 return true, nil 169 } 170 171 // an alternative with zero length can match zero, for example {,x} - the 172 // first alternative has zero length 173 r, adj := utf8.DecodeRuneInString(pattern) 174 if r == '{' { 175 options, endOptions := splitAlternatives(pattern[adj:]) 176 if endOptions == -1 { 177 return false, ErrBadPattern 178 } 179 if ret, err = isZeroLengthPattern(pattern[adj+endOptions:]); !ret || err != nil { 180 return 181 } 182 for _, o := range options { 183 if ret, err = isZeroLengthPattern(o); ret || err != nil { 184 return 185 } 186 } 187 } 188 189 return false, nil 190} 191 192// Match returns true if name matches the shell file name pattern. 193// The pattern syntax is: 194// 195// pattern: 196// { term } 197// term: 198// '*' matches any sequence of non-path-separators 199// '**' matches any sequence of characters, including 200// path separators. 201// '?' matches any single non-path-separator character 202// '[' [ '^' ] { character-range } ']' 203// character class (must be non-empty) 204// '{' { term } [ ',' { term } ... ] '}' 205// c matches character c (c != '*', '?', '\\', '[') 206// '\\' c matches character c 207// 208// character-range: 209// c matches character c (c != '\\', '-', ']') 210// '\\' c matches character c 211// lo '-' hi matches character c for lo <= c <= hi 212// 213// Match requires pattern to match all of name, not just a substring. 214// The path-separator defaults to the '/' character. The only possible 215// returned error is ErrBadPattern, when pattern is malformed. 216// 217// Note: this is meant as a drop-in replacement for path.Match() which 218// always uses '/' as the path separator. If you want to support systems 219// which use a different path separator (such as Windows), what you want 220// is the PathMatch() function below. 221// 222func Match(pattern, name string) (bool, error) { 223 return matchWithSeparator(pattern, name, '/') 224} 225 226// PathMatch is like Match except that it uses your system's path separator. 227// For most systems, this will be '/'. However, for Windows, it would be '\\'. 228// Note that for systems where the path separator is '\\', escaping is 229// disabled. 230// 231// Note: this is meant as a drop-in replacement for filepath.Match(). 232// 233func PathMatch(pattern, name string) (bool, error) { 234 return PathMatchOS(StandardOS, pattern, name) 235} 236 237// PathMatchOS is like PathMatch except that it uses vos's path separator. 238func PathMatchOS(vos OS, pattern, name string) (bool, error) { 239 pattern = filepath.ToSlash(pattern) 240 return matchWithSeparator(pattern, name, vos.PathSeparator()) 241} 242 243// Match returns true if name matches the shell file name pattern. 244// The pattern syntax is: 245// 246// pattern: 247// { term } 248// term: 249// '*' matches any sequence of non-path-separators 250// '**' matches any sequence of characters, including 251// path separators. 252// '?' matches any single non-path-separator character 253// '[' [ '^' ] { character-range } ']' 254// character class (must be non-empty) 255// '{' { term } [ ',' { term } ... ] '}' 256// c matches character c (c != '*', '?', '\\', '[') 257// '\\' c matches character c 258// 259// character-range: 260// c matches character c (c != '\\', '-', ']') 261// '\\' c matches character c, unless separator is '\\' 262// lo '-' hi matches character c for lo <= c <= hi 263// 264// Match requires pattern to match all of name, not just a substring. 265// The only possible returned error is ErrBadPattern, when pattern 266// is malformed. 267// 268func matchWithSeparator(pattern, name string, separator rune) (bool, error) { 269 nameComponents := splitPathOnSeparator(name, separator) 270 return doMatching(pattern, nameComponents) 271} 272 273func doMatching(pattern string, nameComponents []string) (matched bool, err error) { 274 // check for some base-cases 275 patternLen, nameLen := len(pattern), len(nameComponents) 276 if patternLen == 0 && nameLen == 0 { 277 return true, nil 278 } 279 if patternLen == 0 { 280 if nameLen == 1 && nameComponents[0] == "" { 281 return true, nil 282 } else if nameLen == 0 { 283 return false, nil 284 } 285 } 286 287 slashIdx := indexRuneWithEscaping(pattern, '/') 288 lastComponent := slashIdx == -1 289 if lastComponent { 290 slashIdx = len(pattern) 291 } 292 if pattern[:slashIdx] == "**" { 293 // if our last pattern component is a doublestar, we're done - 294 // doublestar will match any remaining name components, if any. 295 if lastComponent { 296 return true, nil 297 } 298 299 // otherwise, try matching remaining components 300 for nameIdx := 0; nameIdx < nameLen; nameIdx++ { 301 if m, _ := doMatching(pattern[slashIdx+1:], nameComponents[nameIdx:]); m { 302 return true, nil 303 } 304 } 305 return false, nil 306 } 307 308 var matches []string 309 matches, err = matchComponent(pattern, nameComponents[0]) 310 if matches == nil || err != nil { 311 return 312 } 313 if len(matches) == 0 && nameLen == 1 { 314 return true, nil 315 } 316 317 if nameLen > 1 { 318 for _, alt := range matches { 319 matched, err = doMatching(alt, nameComponents[1:]) 320 if matched || err != nil { 321 return 322 } 323 } 324 } 325 326 return false, nil 327} 328 329// Glob returns the names of all files matching pattern or nil 330// if there is no matching file. The syntax of pattern is the same 331// as in Match. The pattern may describe hierarchical names such as 332// /usr/*/bin/ed (assuming the Separator is '/'). 333// 334// Glob ignores file system errors such as I/O errors reading directories. 335// The only possible returned error is ErrBadPattern, when pattern 336// is malformed. 337// 338// Your system path separator is automatically used. This means on 339// systems where the separator is '\\' (Windows), escaping will be 340// disabled. 341// 342// Note: this is meant as a drop-in replacement for filepath.Glob(). 343// 344func Glob(pattern string) (matches []string, err error) { 345 return GlobOS(StandardOS, pattern) 346} 347 348// GlobOS is like Glob except that it operates on vos. 349func GlobOS(vos OS, pattern string) (matches []string, err error) { 350 if len(pattern) == 0 { 351 return nil, nil 352 } 353 354 // if the pattern starts with alternatives, we need to handle that here - the 355 // alternatives may be a mix of relative and absolute 356 if pattern[0] == '{' { 357 options, endOptions := splitAlternatives(pattern[1:]) 358 if endOptions == -1 { 359 return nil, ErrBadPattern 360 } 361 for _, o := range options { 362 m, e := Glob(o + pattern[endOptions+1:]) 363 if e != nil { 364 return nil, e 365 } 366 matches = append(matches, m...) 367 } 368 return matches, nil 369 } 370 371 // If the pattern is relative or absolute and we're on a non-Windows machine, 372 // volumeName will be an empty string. If it is absolute and we're on a 373 // Windows machine, volumeName will be a drive letter ("C:") for filesystem 374 // paths or \\<server>\<share> for UNC paths. 375 isAbs := filepath.IsAbs(pattern) || pattern[0] == '\\' || pattern[0] == '/' 376 volumeName := filepath.VolumeName(pattern) 377 isWindowsUNC := strings.HasPrefix(volumeName, `\\`) 378 if isWindowsUNC || isAbs { 379 startIdx := len(volumeName) + 1 380 return doGlob(vos, fmt.Sprintf("%s%s", volumeName, string(vos.PathSeparator())), filepath.ToSlash(pattern[startIdx:]), matches) 381 } 382 383 // otherwise, it's a relative pattern 384 return doGlob(vos, ".", filepath.ToSlash(pattern), matches) 385} 386 387// Perform a glob 388func doGlob(vos OS, basedir, pattern string, matches []string) (m []string, e error) { 389 m = matches 390 e = nil 391 392 // if the pattern starts with any path components that aren't globbed (ie, 393 // `path/to/glob*`), we can skip over the un-globbed components (`path/to` in 394 // our example). 395 globIdx := indexAnyWithEscaping(pattern, "*?[{\\") 396 if globIdx > 0 { 397 globIdx = lastIndexRuneWithEscaping(pattern[:globIdx], '/') 398 } else if globIdx == -1 { 399 globIdx = lastIndexRuneWithEscaping(pattern, '/') 400 } 401 if globIdx > 0 { 402 basedir = filepath.Join(basedir, pattern[:globIdx]) 403 pattern = pattern[globIdx+1:] 404 } 405 406 // Lstat will return an error if the file/directory doesn't exist 407 fi, err := vos.Lstat(basedir) 408 if err != nil { 409 return 410 } 411 412 // if the pattern is empty, we've found a match 413 if len(pattern) == 0 { 414 m = append(m, basedir) 415 return 416 } 417 418 // otherwise, we need to check each item in the directory... 419 // first, if basedir is a symlink, follow it... 420 if (fi.Mode() & os.ModeSymlink) != 0 { 421 fi, err = vos.Stat(basedir) 422 if err != nil { 423 return 424 } 425 } 426 427 // confirm it's a directory... 428 if !fi.IsDir() { 429 return 430 } 431 432 // read directory 433 dir, err := vos.Open(basedir) 434 if err != nil { 435 return 436 } 437 defer func() { 438 if err := dir.Close(); e == nil { 439 e = err 440 } 441 }() 442 443 files, err := dir.Readdir(-1) 444 if err != nil { 445 return nil, err 446 } 447 sort.Slice(files, func(i, j int) bool { return files[i].Name() < files[j].Name() }) 448 449 slashIdx := indexRuneWithEscaping(pattern, '/') 450 lastComponent := slashIdx == -1 451 if lastComponent { 452 slashIdx = len(pattern) 453 } 454 if pattern[:slashIdx] == "**" { 455 // if the current component is a doublestar, we'll try depth-first 456 for _, file := range files { 457 // if symlink, we may want to follow 458 if (file.Mode() & os.ModeSymlink) != 0 { 459 file, err = vos.Stat(filepath.Join(basedir, file.Name())) 460 if err != nil { 461 continue 462 } 463 } 464 465 if file.IsDir() { 466 // recurse into directories 467 if lastComponent { 468 m = append(m, filepath.Join(basedir, file.Name())) 469 } 470 m, e = doGlob(vos, filepath.Join(basedir, file.Name()), pattern, m) 471 } else if lastComponent { 472 // if the pattern's last component is a doublestar, we match filenames, too 473 m = append(m, filepath.Join(basedir, file.Name())) 474 } 475 } 476 if lastComponent { 477 return // we're done 478 } 479 480 pattern = pattern[slashIdx+1:] 481 } 482 483 // check items in current directory and recurse 484 var match []string 485 for _, file := range files { 486 match, e = matchComponent(pattern, file.Name()) 487 if e != nil { 488 return 489 } 490 if match != nil { 491 if len(match) == 0 { 492 m = append(m, filepath.Join(basedir, file.Name())) 493 } else { 494 for _, alt := range match { 495 m, e = doGlob(vos, filepath.Join(basedir, file.Name()), alt, m) 496 } 497 } 498 } 499 } 500 return 501} 502 503// Attempt to match a single path component with a pattern. Note that the 504// pattern may include multiple components but that the "name" is just a single 505// path component. The return value is a slice of patterns that should be 506// checked against subsequent path components or nil, indicating that the 507// pattern does not match this path. It is assumed that pattern components are 508// separated by '/' 509func matchComponent(pattern, name string) ([]string, error) { 510 // check for matches one rune at a time 511 patternLen, nameLen := len(pattern), len(name) 512 patIdx, nameIdx := 0, 0 513 for patIdx < patternLen && nameIdx < nameLen { 514 patRune, patAdj := utf8.DecodeRuneInString(pattern[patIdx:]) 515 nameRune, nameAdj := utf8.DecodeRuneInString(name[nameIdx:]) 516 if patRune == '/' { 517 patIdx++ 518 break 519 } else if patRune == '\\' { 520 // handle escaped runes, only if separator isn't '\\' 521 patIdx += patAdj 522 patRune, patAdj = utf8.DecodeRuneInString(pattern[patIdx:]) 523 if patRune == utf8.RuneError { 524 return nil, ErrBadPattern 525 } else if patRune == nameRune { 526 patIdx += patAdj 527 nameIdx += nameAdj 528 } else { 529 return nil, nil 530 } 531 } else if patRune == '*' { 532 // handle stars - a star at the end of the pattern or before a separator 533 // will always match the rest of the path component 534 if patIdx += patAdj; patIdx >= patternLen { 535 return []string{}, nil 536 } 537 if patRune, patAdj = utf8.DecodeRuneInString(pattern[patIdx:]); patRune == '/' { 538 return []string{pattern[patIdx+patAdj:]}, nil 539 } 540 541 // check if we can make any matches 542 for ; nameIdx < nameLen; nameIdx += nameAdj { 543 if m, e := matchComponent(pattern[patIdx:], name[nameIdx:]); m != nil || e != nil { 544 return m, e 545 } 546 } 547 return nil, nil 548 } else if patRune == '[' { 549 // handle character sets 550 patIdx += patAdj 551 endClass := indexRuneWithEscaping(pattern[patIdx:], ']') 552 if endClass == -1 { 553 return nil, ErrBadPattern 554 } 555 endClass += patIdx 556 classRunes := []rune(pattern[patIdx:endClass]) 557 classRunesLen := len(classRunes) 558 if classRunesLen > 0 { 559 classIdx := 0 560 matchClass := false 561 if classRunes[0] == '^' { 562 classIdx++ 563 } 564 for classIdx < classRunesLen { 565 low := classRunes[classIdx] 566 if low == '-' { 567 return nil, ErrBadPattern 568 } 569 classIdx++ 570 if low == '\\' { 571 if classIdx < classRunesLen { 572 low = classRunes[classIdx] 573 classIdx++ 574 } else { 575 return nil, ErrBadPattern 576 } 577 } 578 high := low 579 if classIdx < classRunesLen && classRunes[classIdx] == '-' { 580 // we have a range of runes 581 if classIdx++; classIdx >= classRunesLen { 582 return nil, ErrBadPattern 583 } 584 high = classRunes[classIdx] 585 if high == '-' { 586 return nil, ErrBadPattern 587 } 588 classIdx++ 589 if high == '\\' { 590 if classIdx < classRunesLen { 591 high = classRunes[classIdx] 592 classIdx++ 593 } else { 594 return nil, ErrBadPattern 595 } 596 } 597 } 598 if low <= nameRune && nameRune <= high { 599 matchClass = true 600 } 601 } 602 if matchClass == (classRunes[0] == '^') { 603 return nil, nil 604 } 605 } else { 606 return nil, ErrBadPattern 607 } 608 patIdx = endClass + 1 609 nameIdx += nameAdj 610 } else if patRune == '{' { 611 // handle alternatives such as {alt1,alt2,...} 612 patIdx += patAdj 613 options, endOptions := splitAlternatives(pattern[patIdx:]) 614 if endOptions == -1 { 615 return nil, ErrBadPattern 616 } 617 patIdx += endOptions 618 619 results := make([][]string, 0, len(options)) 620 totalResults := 0 621 for _, o := range options { 622 m, e := matchComponent(o+pattern[patIdx:], name[nameIdx:]) 623 if e != nil { 624 return nil, e 625 } 626 if m != nil { 627 results = append(results, m) 628 totalResults += len(m) 629 } 630 } 631 if len(results) > 0 { 632 lst := make([]string, 0, totalResults) 633 for _, m := range results { 634 lst = append(lst, m...) 635 } 636 return lst, nil 637 } 638 639 return nil, nil 640 } else if patRune == '?' || patRune == nameRune { 641 // handle single-rune wildcard 642 patIdx += patAdj 643 nameIdx += nameAdj 644 } else { 645 return nil, nil 646 } 647 } 648 if nameIdx >= nameLen { 649 if patIdx >= patternLen { 650 return []string{}, nil 651 } 652 653 pattern = pattern[patIdx:] 654 slashIdx := indexRuneWithEscaping(pattern, '/') 655 testPattern := pattern 656 if slashIdx >= 0 { 657 testPattern = pattern[:slashIdx] 658 } 659 660 zeroLength, err := isZeroLengthPattern(testPattern) 661 if err != nil { 662 return nil, err 663 } 664 if zeroLength { 665 if slashIdx == -1 { 666 return []string{}, nil 667 } else { 668 return []string{pattern[slashIdx+1:]}, nil 669 } 670 } 671 } 672 return nil, nil 673} 674