1// Package match provides a simple pattern matcher with unicode support.
2package match
3
4import (
5	"unicode/utf8"
6)
7
8// Match returns true if str matches pattern. This is a very
9// simple wildcard match where '*' matches on any number characters
10// and '?' matches on any one character.
11//
12// pattern:
13// 	{ term }
14// term:
15// 	'*'         matches any sequence of non-Separator characters
16// 	'?'         matches any single non-Separator character
17// 	c           matches character c (c != '*', '?', '\\')
18// 	'\\' c      matches character c
19//
20func Match(str, pattern string) bool {
21	if pattern == "*" {
22		return true
23	}
24	return match(str, pattern, 0, nil, -1) == rMatch
25}
26
27// MatchLimit is the same as Match but will limit the complexity of the match
28// operation. This is to avoid long running matches, specifically to avoid ReDos
29// attacks from arbritary inputs.
30//
31// How it works:
32// The underlying match routine is recursive and may call itself when it
33// encounters a sandwiched wildcard pattern, such as: `user:*:name`.
34// Everytime it calls itself a counter is incremented.
35// The operation is stopped when counter > maxcomp*len(str).
36func MatchLimit(str, pattern string, maxcomp int) (matched, stopped bool) {
37	if pattern == "*" {
38		return true, false
39	}
40	counter := 0
41	r := match(str, pattern, len(str), &counter, maxcomp)
42	if r == rStop {
43		return false, true
44	}
45	return r == rMatch, false
46}
47
48type result int
49
50const (
51	rNoMatch result = iota
52	rMatch
53	rStop
54)
55
56func match(str, pat string, slen int, counter *int, maxcomp int) result {
57	// check complexity limit
58	if maxcomp > -1 {
59		if *counter > slen*maxcomp {
60			return rStop
61		}
62		*counter++
63	}
64
65	for len(pat) > 0 {
66		var wild bool
67		pc, ps := rune(pat[0]), 1
68		if pc > 0x7f {
69			pc, ps = utf8.DecodeRuneInString(pat)
70		}
71		var sc rune
72		var ss int
73		if len(str) > 0 {
74			sc, ss = rune(str[0]), 1
75			if sc > 0x7f {
76				sc, ss = utf8.DecodeRuneInString(str)
77			}
78		}
79		switch pc {
80		case '?':
81			if ss == 0 {
82				return rNoMatch
83			}
84		case '*':
85			// Ignore repeating stars.
86			for len(pat) > 1 && pat[1] == '*' {
87				pat = pat[1:]
88			}
89
90			// If this star is the last character then it must be a match.
91			if len(pat) == 1 {
92				return rMatch
93			}
94
95			// Match and trim any non-wildcard suffix characters.
96			var ok bool
97			str, pat, ok = matchTrimSuffix(str, pat)
98			if !ok {
99				return rNoMatch
100			}
101
102			// Check for single star again.
103			if len(pat) == 1 {
104				return rMatch
105			}
106
107			// Perform recursive wildcard search.
108			r := match(str, pat[1:], slen, counter, maxcomp)
109			if r != rNoMatch {
110				return r
111			}
112			if len(str) == 0 {
113				return rNoMatch
114			}
115			wild = true
116		default:
117			if ss == 0 {
118				return rNoMatch
119			}
120			if pc == '\\' {
121				pat = pat[ps:]
122				pc, ps = utf8.DecodeRuneInString(pat)
123				if ps == 0 {
124					return rNoMatch
125				}
126			}
127			if sc != pc {
128				return rNoMatch
129			}
130		}
131		str = str[ss:]
132		if !wild {
133			pat = pat[ps:]
134		}
135	}
136	if len(str) == 0 {
137		return rMatch
138	}
139	return rNoMatch
140}
141
142// matchTrimSuffix matches and trims any non-wildcard suffix characters.
143// Returns the trimed string and pattern.
144//
145// This is called because the pattern contains extra data after the wildcard
146// star. Here we compare any suffix characters in the pattern to the suffix of
147// the target string. Basically a reverse match that stops when a wildcard
148// character is reached. This is a little trickier than a forward match because
149// we need to evaluate an escaped character in reverse.
150//
151// Any matched characters will be trimmed from both the target
152// string and the pattern.
153func matchTrimSuffix(str, pat string) (string, string, bool) {
154	// It's expected that the pattern has at least two bytes and the first byte
155	// is a wildcard star '*'
156	match := true
157	for len(str) > 0 && len(pat) > 1 {
158		pc, ps := utf8.DecodeLastRuneInString(pat)
159		var esc bool
160		for i := 0; ; i++ {
161			if pat[len(pat)-ps-i-1] != '\\' {
162				if i&1 == 1 {
163					esc = true
164					ps++
165				}
166				break
167			}
168		}
169		if pc == '*' && !esc {
170			match = true
171			break
172		}
173		sc, ss := utf8.DecodeLastRuneInString(str)
174		if !((pc == '?' && !esc) || pc == sc) {
175			match = false
176			break
177		}
178		str = str[:len(str)-ss]
179		pat = pat[:len(pat)-ps]
180	}
181	return str, pat, match
182}
183
184var maxRuneBytes = [...]byte{244, 143, 191, 191}
185
186// Allowable parses the pattern and determines the minimum and maximum allowable
187// values that the pattern can represent.
188// When the max cannot be determined, 'true' will be returned
189// for infinite.
190func Allowable(pattern string) (min, max string) {
191	if pattern == "" || pattern[0] == '*' {
192		return "", ""
193	}
194
195	minb := make([]byte, 0, len(pattern))
196	maxb := make([]byte, 0, len(pattern))
197	var wild bool
198	for i := 0; i < len(pattern); i++ {
199		if pattern[i] == '*' {
200			wild = true
201			break
202		}
203		if pattern[i] == '?' {
204			minb = append(minb, 0)
205			maxb = append(maxb, maxRuneBytes[:]...)
206		} else {
207			minb = append(minb, pattern[i])
208			maxb = append(maxb, pattern[i])
209		}
210	}
211	if wild {
212		r, n := utf8.DecodeLastRune(maxb)
213		if r != utf8.RuneError {
214			if r < utf8.MaxRune {
215				r++
216				if r > 0x7f {
217					b := make([]byte, 4)
218					nn := utf8.EncodeRune(b, r)
219					maxb = append(maxb[:len(maxb)-n], b[:nn]...)
220				} else {
221					maxb = append(maxb[:len(maxb)-n], byte(r))
222				}
223			}
224		}
225	}
226	return string(minb), string(maxb)
227}
228
229// IsPattern returns true if the string is a pattern.
230func IsPattern(str string) bool {
231	for i := 0; i < len(str); i++ {
232		if str[i] == '*' || str[i] == '?' {
233			return true
234		}
235	}
236	return false
237}
238