1package doublestar
2
3import (
4	"fmt"
5	"os"
6	"path"
7	"path/filepath"
8	"sort"
9	"strings"
10	"unicode/utf8"
11)
12
13// An OS abstracts functions in the standard library's os package.
14type OS interface {
15	Lstat(name string) (os.FileInfo, error)
16	Open(name string) (*os.File, error)
17	PathSeparator() rune
18	Stat(name string) (os.FileInfo, error)
19}
20
21// StandardOS is a value that implements the OS interface by calling functions
22// in the standard libray's os package.
23var StandardOS OS = standardOS{}
24
25// A standardOS implements OS by calling functions in the standard library's os
26// package.
27type standardOS struct{}
28
29func (standardOS) Lstat(name string) (os.FileInfo, error) { return os.Lstat(name) }
30func (standardOS) Open(name string) (*os.File, error)     { return os.Open(name) }
31func (standardOS) PathSeparator() rune                    { return os.PathSeparator }
32func (standardOS) Stat(name string) (os.FileInfo, error)  { return os.Stat(name) }
33
34// ErrBadPattern indicates a pattern was malformed.
35var ErrBadPattern = path.ErrBadPattern
36
37// Split a path on the given separator, respecting escaping.
38func splitPathOnSeparator(path string, separator rune) (ret []string) {
39	idx := 0
40	if separator == '\\' {
41		// if the separator is '\\', then we can just split...
42		ret = strings.Split(path, string(separator))
43		idx = len(ret)
44	} else {
45		// otherwise, we need to be careful of situations where the separator was escaped
46		cnt := strings.Count(path, string(separator))
47		if cnt == 0 {
48			return []string{path}
49		}
50
51		ret = make([]string, cnt+1)
52		pathlen := len(path)
53		separatorLen := utf8.RuneLen(separator)
54		emptyEnd := false
55		for start := 0; start < pathlen; {
56			end := indexRuneWithEscaping(path[start:], separator)
57			if end == -1 {
58				emptyEnd = false
59				end = pathlen
60			} else {
61				emptyEnd = true
62				end += start
63			}
64			ret[idx] = path[start:end]
65			start = end + separatorLen
66			idx++
67		}
68
69		// If the last rune is a path separator, we need to append an empty string to
70		// represent the last, empty path component. By default, the strings from
71		// make([]string, ...) will be empty, so we just need to icrement the count
72		if emptyEnd {
73			idx++
74		}
75	}
76
77	return ret[:idx]
78}
79
80// Find the first index of a rune in a string,
81// ignoring any times the rune is escaped using "\".
82func indexRuneWithEscaping(s string, r rune) int {
83	end := strings.IndexRune(s, r)
84	if end == -1 {
85		return -1
86	}
87	if end > 0 && s[end-1] == '\\' {
88		start := end + utf8.RuneLen(r)
89		end = indexRuneWithEscaping(s[start:], r)
90		if end != -1 {
91			end += start
92		}
93	}
94	return end
95}
96
97// Find the last index of a rune in a string,
98// ignoring any times the rune is escaped using "\".
99func lastIndexRuneWithEscaping(s string, r rune) int {
100	end := strings.LastIndex(s, string(r))
101	if end == -1 {
102		return -1
103	}
104	if end > 0 && s[end-1] == '\\' {
105		end = lastIndexRuneWithEscaping(s[:end-1], r)
106	}
107	return end
108}
109
110// Find the index of the first instance of one of the unicode characters in
111// chars, ignoring any times those characters are escaped using "\".
112func indexAnyWithEscaping(s, chars string) int {
113	end := strings.IndexAny(s, chars)
114	if end == -1 {
115		return -1
116	}
117	if end > 0 && s[end-1] == '\\' {
118		_, adj := utf8.DecodeRuneInString(s[end:])
119		start := end + adj
120		end = indexAnyWithEscaping(s[start:], chars)
121		if end != -1 {
122			end += start
123		}
124	}
125	return end
126}
127
128// Split a set of alternatives such as {alt1,alt2,...} and returns the index of
129// the rune after the closing curly brace. Respects nested alternatives and
130// escaped runes.
131func splitAlternatives(s string) (ret []string, idx int) {
132	ret = make([]string, 0, 2)
133	idx = 0
134	slen := len(s)
135	braceCnt := 1
136	esc := false
137	start := 0
138	for braceCnt > 0 {
139		if idx >= slen {
140			return nil, -1
141		}
142
143		sRune, adj := utf8.DecodeRuneInString(s[idx:])
144		if esc {
145			esc = false
146		} else if sRune == '\\' {
147			esc = true
148		} else if sRune == '{' {
149			braceCnt++
150		} else if sRune == '}' {
151			braceCnt--
152		} else if sRune == ',' && braceCnt == 1 {
153			ret = append(ret, s[start:idx])
154			start = idx + adj
155		}
156
157		idx += adj
158	}
159	ret = append(ret, s[start:idx-1])
160	return
161}
162
163// Returns true if the pattern is "zero length", meaning
164// it could match zero or more characters.
165func isZeroLengthPattern(pattern string) (ret bool, err error) {
166	// * can match zero
167	if pattern == "" || pattern == "*" || pattern == "**" {
168		return true, nil
169	}
170
171	// an alternative with zero length can match zero, for example {,x} - the
172	// first alternative has zero length
173	r, adj := utf8.DecodeRuneInString(pattern)
174	if r == '{' {
175		options, endOptions := splitAlternatives(pattern[adj:])
176		if endOptions == -1 {
177			return false, ErrBadPattern
178		}
179		if ret, err = isZeroLengthPattern(pattern[adj+endOptions:]); !ret || err != nil {
180			return
181		}
182		for _, o := range options {
183			if ret, err = isZeroLengthPattern(o); ret || err != nil {
184				return
185			}
186		}
187	}
188
189	return false, nil
190}
191
192// Match returns true if name matches the shell file name pattern.
193// The pattern syntax is:
194//
195//  pattern:
196//    { term }
197//  term:
198//    '*'         matches any sequence of non-path-separators
199//    '**'        matches any sequence of characters, including
200//                path separators.
201//    '?'         matches any single non-path-separator character
202//    '[' [ '^' ] { character-range } ']'
203//          character class (must be non-empty)
204//    '{' { term } [ ',' { term } ... ] '}'
205//    c           matches character c (c != '*', '?', '\\', '[')
206//    '\\' c      matches character c
207//
208//  character-range:
209//    c           matches character c (c != '\\', '-', ']')
210//    '\\' c      matches character c
211//    lo '-' hi   matches character c for lo <= c <= hi
212//
213// Match requires pattern to match all of name, not just a substring.
214// The path-separator defaults to the '/' character. The only possible
215// returned error is ErrBadPattern, when pattern is malformed.
216//
217// Note: this is meant as a drop-in replacement for path.Match() which
218// always uses '/' as the path separator. If you want to support systems
219// which use a different path separator (such as Windows), what you want
220// is the PathMatch() function below.
221//
222func Match(pattern, name string) (bool, error) {
223	return matchWithSeparator(pattern, name, '/')
224}
225
226// PathMatch is like Match except that it uses your system's path separator.
227// For most systems, this will be '/'. However, for Windows, it would be '\\'.
228// Note that for systems where the path separator is '\\', escaping is
229// disabled.
230//
231// Note: this is meant as a drop-in replacement for filepath.Match().
232//
233func PathMatch(pattern, name string) (bool, error) {
234	return PathMatchOS(StandardOS, pattern, name)
235}
236
237// PathMatchOS is like PathMatch except that it uses vos's path separator.
238func PathMatchOS(vos OS, pattern, name string) (bool, error) {
239	pattern = filepath.ToSlash(pattern)
240	return matchWithSeparator(pattern, name, vos.PathSeparator())
241}
242
243// Match returns true if name matches the shell file name pattern.
244// The pattern syntax is:
245//
246//  pattern:
247//    { term }
248//  term:
249//    '*'         matches any sequence of non-path-separators
250//              '**'        matches any sequence of characters, including
251//                          path separators.
252//    '?'         matches any single non-path-separator character
253//    '[' [ '^' ] { character-range } ']'
254//          character class (must be non-empty)
255//    '{' { term } [ ',' { term } ... ] '}'
256//    c           matches character c (c != '*', '?', '\\', '[')
257//    '\\' c      matches character c
258//
259//  character-range:
260//    c           matches character c (c != '\\', '-', ']')
261//    '\\' c      matches character c, unless separator is '\\'
262//    lo '-' hi   matches character c for lo <= c <= hi
263//
264// Match requires pattern to match all of name, not just a substring.
265// The only possible returned error is ErrBadPattern, when pattern
266// is malformed.
267//
268func matchWithSeparator(pattern, name string, separator rune) (bool, error) {
269	nameComponents := splitPathOnSeparator(name, separator)
270	return doMatching(pattern, nameComponents)
271}
272
273func doMatching(pattern string, nameComponents []string) (matched bool, err error) {
274	// check for some base-cases
275	patternLen, nameLen := len(pattern), len(nameComponents)
276	if patternLen == 0 && nameLen == 0 {
277		return true, nil
278	}
279	if patternLen == 0 {
280		if nameLen == 1 && nameComponents[0] == "" {
281			return true, nil
282		} else if nameLen == 0 {
283			return false, nil
284		}
285	}
286
287	slashIdx := indexRuneWithEscaping(pattern, '/')
288	lastComponent := slashIdx == -1
289	if lastComponent {
290		slashIdx = len(pattern)
291	}
292	if pattern[:slashIdx] == "**" {
293		// if our last pattern component is a doublestar, we're done -
294		// doublestar will match any remaining name components, if any.
295		if lastComponent {
296			return true, nil
297		}
298
299		// otherwise, try matching remaining components
300		for nameIdx := 0; nameIdx < nameLen; nameIdx++ {
301			if m, _ := doMatching(pattern[slashIdx+1:], nameComponents[nameIdx:]); m {
302				return true, nil
303			}
304		}
305		return false, nil
306	}
307
308	var matches []string
309	matches, err = matchComponent(pattern, nameComponents[0])
310	if matches == nil || err != nil {
311		return
312	}
313	if len(matches) == 0 && nameLen == 1 {
314		return true, nil
315	}
316
317	if nameLen > 1 {
318		for _, alt := range matches {
319			matched, err = doMatching(alt, nameComponents[1:])
320			if matched || err != nil {
321				return
322			}
323		}
324	}
325
326	return false, nil
327}
328
329// Glob returns the names of all files matching pattern or nil
330// if there is no matching file. The syntax of pattern is the same
331// as in Match. The pattern may describe hierarchical names such as
332// /usr/*/bin/ed (assuming the Separator is '/').
333//
334// Glob ignores file system errors such as I/O errors reading directories.
335// The only possible returned error is ErrBadPattern, when pattern
336// is malformed.
337//
338// Your system path separator is automatically used. This means on
339// systems where the separator is '\\' (Windows), escaping will be
340// disabled.
341//
342// Note: this is meant as a drop-in replacement for filepath.Glob().
343//
344func Glob(pattern string) (matches []string, err error) {
345	return GlobOS(StandardOS, pattern)
346}
347
348// GlobOS is like Glob except that it operates on vos.
349func GlobOS(vos OS, pattern string) (matches []string, err error) {
350	if len(pattern) == 0 {
351		return nil, nil
352	}
353
354	// if the pattern starts with alternatives, we need to handle that here - the
355	// alternatives may be a mix of relative and absolute
356	if pattern[0] == '{' {
357		options, endOptions := splitAlternatives(pattern[1:])
358		if endOptions == -1 {
359			return nil, ErrBadPattern
360		}
361		for _, o := range options {
362			m, e := Glob(o + pattern[endOptions+1:])
363			if e != nil {
364				return nil, e
365			}
366			matches = append(matches, m...)
367		}
368		return matches, nil
369	}
370
371	// If the pattern is relative or absolute and we're on a non-Windows machine,
372	// volumeName will be an empty string. If it is absolute and we're on a
373	// Windows machine, volumeName will be a drive letter ("C:") for filesystem
374	// paths or \\<server>\<share> for UNC paths.
375	isAbs := filepath.IsAbs(pattern) || pattern[0] == '\\' || pattern[0] == '/'
376	volumeName := filepath.VolumeName(pattern)
377	isWindowsUNC := strings.HasPrefix(volumeName, `\\`)
378	if isWindowsUNC || isAbs {
379		startIdx := len(volumeName) + 1
380		return doGlob(vos, fmt.Sprintf("%s%s", volumeName, string(vos.PathSeparator())), filepath.ToSlash(pattern[startIdx:]), matches)
381	}
382
383	// otherwise, it's a relative pattern
384	return doGlob(vos, ".", filepath.ToSlash(pattern), matches)
385}
386
387// Perform a glob
388func doGlob(vos OS, basedir, pattern string, matches []string) (m []string, e error) {
389	m = matches
390	e = nil
391
392	// if the pattern starts with any path components that aren't globbed (ie,
393	// `path/to/glob*`), we can skip over the un-globbed components (`path/to` in
394	// our example).
395	globIdx := indexAnyWithEscaping(pattern, "*?[{\\")
396	if globIdx > 0 {
397		globIdx = lastIndexRuneWithEscaping(pattern[:globIdx], '/')
398	} else if globIdx == -1 {
399		globIdx = lastIndexRuneWithEscaping(pattern, '/')
400	}
401	if globIdx > 0 {
402		basedir = filepath.Join(basedir, pattern[:globIdx])
403		pattern = pattern[globIdx+1:]
404	}
405
406	// Lstat will return an error if the file/directory doesn't exist
407	fi, err := vos.Lstat(basedir)
408	if err != nil {
409		return
410	}
411
412	// if the pattern is empty, we've found a match
413	if len(pattern) == 0 {
414		m = append(m, basedir)
415		return
416	}
417
418	// otherwise, we need to check each item in the directory...
419	// first, if basedir is a symlink, follow it...
420	if (fi.Mode() & os.ModeSymlink) != 0 {
421		fi, err = vos.Stat(basedir)
422		if err != nil {
423			return
424		}
425	}
426
427	// confirm it's a directory...
428	if !fi.IsDir() {
429		return
430	}
431
432	// read directory
433	dir, err := vos.Open(basedir)
434	if err != nil {
435		return
436	}
437	defer func() {
438		if err := dir.Close(); e == nil {
439			e = err
440		}
441	}()
442
443	files, err := dir.Readdir(-1)
444	if err != nil {
445		return nil, err
446	}
447	sort.Slice(files, func(i, j int) bool { return files[i].Name() < files[j].Name() })
448
449	slashIdx := indexRuneWithEscaping(pattern, '/')
450	lastComponent := slashIdx == -1
451	if lastComponent {
452		slashIdx = len(pattern)
453	}
454	if pattern[:slashIdx] == "**" {
455		// if the current component is a doublestar, we'll try depth-first
456		for _, file := range files {
457			// if symlink, we may want to follow
458			if (file.Mode() & os.ModeSymlink) != 0 {
459				file, err = vos.Stat(filepath.Join(basedir, file.Name()))
460				if err != nil {
461					continue
462				}
463			}
464
465			if file.IsDir() {
466				// recurse into directories
467				if lastComponent {
468					m = append(m, filepath.Join(basedir, file.Name()))
469				}
470				m, e = doGlob(vos, filepath.Join(basedir, file.Name()), pattern, m)
471			} else if lastComponent {
472				// if the pattern's last component is a doublestar, we match filenames, too
473				m = append(m, filepath.Join(basedir, file.Name()))
474			}
475		}
476		if lastComponent {
477			return // we're done
478		}
479
480		pattern = pattern[slashIdx+1:]
481	}
482
483	// check items in current directory and recurse
484	var match []string
485	for _, file := range files {
486		match, e = matchComponent(pattern, file.Name())
487		if e != nil {
488			return
489		}
490		if match != nil {
491			if len(match) == 0 {
492				m = append(m, filepath.Join(basedir, file.Name()))
493			} else {
494				for _, alt := range match {
495					m, e = doGlob(vos, filepath.Join(basedir, file.Name()), alt, m)
496				}
497			}
498		}
499	}
500	return
501}
502
503// Attempt to match a single path component with a pattern. Note that the
504// pattern may include multiple components but that the "name" is just a single
505// path component. The return value is a slice of patterns that should be
506// checked against subsequent path components or nil, indicating that the
507// pattern does not match this path. It is assumed that pattern components are
508// separated by '/'
509func matchComponent(pattern, name string) ([]string, error) {
510	// check for matches one rune at a time
511	patternLen, nameLen := len(pattern), len(name)
512	patIdx, nameIdx := 0, 0
513	for patIdx < patternLen && nameIdx < nameLen {
514		patRune, patAdj := utf8.DecodeRuneInString(pattern[patIdx:])
515		nameRune, nameAdj := utf8.DecodeRuneInString(name[nameIdx:])
516		if patRune == '/' {
517			patIdx++
518			break
519		} else if patRune == '\\' {
520			// handle escaped runes, only if separator isn't '\\'
521			patIdx += patAdj
522			patRune, patAdj = utf8.DecodeRuneInString(pattern[patIdx:])
523			if patRune == utf8.RuneError {
524				return nil, ErrBadPattern
525			} else if patRune == nameRune {
526				patIdx += patAdj
527				nameIdx += nameAdj
528			} else {
529				return nil, nil
530			}
531		} else if patRune == '*' {
532			// handle stars - a star at the end of the pattern or before a separator
533			// will always match the rest of the path component
534			if patIdx += patAdj; patIdx >= patternLen {
535				return []string{}, nil
536			}
537			if patRune, patAdj = utf8.DecodeRuneInString(pattern[patIdx:]); patRune == '/' {
538				return []string{pattern[patIdx+patAdj:]}, nil
539			}
540
541			// check if we can make any matches
542			for ; nameIdx < nameLen; nameIdx += nameAdj {
543				if m, e := matchComponent(pattern[patIdx:], name[nameIdx:]); m != nil || e != nil {
544					return m, e
545				}
546			}
547			return nil, nil
548		} else if patRune == '[' {
549			// handle character sets
550			patIdx += patAdj
551			endClass := indexRuneWithEscaping(pattern[patIdx:], ']')
552			if endClass == -1 {
553				return nil, ErrBadPattern
554			}
555			endClass += patIdx
556			classRunes := []rune(pattern[patIdx:endClass])
557			classRunesLen := len(classRunes)
558			if classRunesLen > 0 {
559				classIdx := 0
560				matchClass := false
561				if classRunes[0] == '^' {
562					classIdx++
563				}
564				for classIdx < classRunesLen {
565					low := classRunes[classIdx]
566					if low == '-' {
567						return nil, ErrBadPattern
568					}
569					classIdx++
570					if low == '\\' {
571						if classIdx < classRunesLen {
572							low = classRunes[classIdx]
573							classIdx++
574						} else {
575							return nil, ErrBadPattern
576						}
577					}
578					high := low
579					if classIdx < classRunesLen && classRunes[classIdx] == '-' {
580						// we have a range of runes
581						if classIdx++; classIdx >= classRunesLen {
582							return nil, ErrBadPattern
583						}
584						high = classRunes[classIdx]
585						if high == '-' {
586							return nil, ErrBadPattern
587						}
588						classIdx++
589						if high == '\\' {
590							if classIdx < classRunesLen {
591								high = classRunes[classIdx]
592								classIdx++
593							} else {
594								return nil, ErrBadPattern
595							}
596						}
597					}
598					if low <= nameRune && nameRune <= high {
599						matchClass = true
600					}
601				}
602				if matchClass == (classRunes[0] == '^') {
603					return nil, nil
604				}
605			} else {
606				return nil, ErrBadPattern
607			}
608			patIdx = endClass + 1
609			nameIdx += nameAdj
610		} else if patRune == '{' {
611			// handle alternatives such as {alt1,alt2,...}
612			patIdx += patAdj
613			options, endOptions := splitAlternatives(pattern[patIdx:])
614			if endOptions == -1 {
615				return nil, ErrBadPattern
616			}
617			patIdx += endOptions
618
619			results := make([][]string, 0, len(options))
620			totalResults := 0
621			for _, o := range options {
622				m, e := matchComponent(o+pattern[patIdx:], name[nameIdx:])
623				if e != nil {
624					return nil, e
625				}
626				if m != nil {
627					results = append(results, m)
628					totalResults += len(m)
629				}
630			}
631			if len(results) > 0 {
632				lst := make([]string, 0, totalResults)
633				for _, m := range results {
634					lst = append(lst, m...)
635				}
636				return lst, nil
637			}
638
639			return nil, nil
640		} else if patRune == '?' || patRune == nameRune {
641			// handle single-rune wildcard
642			patIdx += patAdj
643			nameIdx += nameAdj
644		} else {
645			return nil, nil
646		}
647	}
648	if nameIdx >= nameLen {
649		if patIdx >= patternLen {
650			return []string{}, nil
651		}
652
653		pattern = pattern[patIdx:]
654		slashIdx := indexRuneWithEscaping(pattern, '/')
655		testPattern := pattern
656		if slashIdx >= 0 {
657			testPattern = pattern[:slashIdx]
658		}
659
660		zeroLength, err := isZeroLengthPattern(testPattern)
661		if err != nil {
662			return nil, err
663		}
664		if zeroLength {
665			if slashIdx == -1 {
666				return []string{}, nil
667			} else {
668				return []string{pattern[slashIdx+1:]}, nil
669			}
670		}
671	}
672	return nil, nil
673}
674