1// Package match provides a simple pattern matcher with unicode support. 2package match 3 4import ( 5 "unicode/utf8" 6) 7 8// Match returns true if str matches pattern. This is a very 9// simple wildcard match where '*' matches on any number characters 10// and '?' matches on any one character. 11// 12// pattern: 13// { term } 14// term: 15// '*' matches any sequence of non-Separator characters 16// '?' matches any single non-Separator character 17// c matches character c (c != '*', '?', '\\') 18// '\\' c matches character c 19// 20func Match(str, pattern string) bool { 21 if pattern == "*" { 22 return true 23 } 24 return match(str, pattern, 0, nil, -1) == rMatch 25} 26 27// MatchLimit is the same as Match but will limit the complexity of the match 28// operation. This is to avoid long running matches, specifically to avoid ReDos 29// attacks from arbritary inputs. 30// 31// How it works: 32// The underlying match routine is recursive and may call itself when it 33// encounters a sandwiched wildcard pattern, such as: `user:*:name`. 34// Everytime it calls itself a counter is incremented. 35// The operation is stopped when counter > maxcomp*len(str). 36func MatchLimit(str, pattern string, maxcomp int) (matched, stopped bool) { 37 if pattern == "*" { 38 return true, false 39 } 40 counter := 0 41 r := match(str, pattern, len(str), &counter, maxcomp) 42 if r == rStop { 43 return false, true 44 } 45 return r == rMatch, false 46} 47 48type result int 49 50const ( 51 rNoMatch result = iota 52 rMatch 53 rStop 54) 55 56func match(str, pat string, slen int, counter *int, maxcomp int) result { 57 // check complexity limit 58 if maxcomp > -1 { 59 if *counter > slen*maxcomp { 60 return rStop 61 } 62 *counter++ 63 } 64 65 for len(pat) > 0 { 66 var wild bool 67 pc, ps := rune(pat[0]), 1 68 if pc > 0x7f { 69 pc, ps = utf8.DecodeRuneInString(pat) 70 } 71 var sc rune 72 var ss int 73 if len(str) > 0 { 74 sc, ss = rune(str[0]), 1 75 if sc > 0x7f { 76 sc, ss = utf8.DecodeRuneInString(str) 77 } 78 } 79 switch pc { 80 case '?': 81 if ss == 0 { 82 return rNoMatch 83 } 84 case '*': 85 // Ignore repeating stars. 86 for len(pat) > 1 && pat[1] == '*' { 87 pat = pat[1:] 88 } 89 90 // If this star is the last character then it must be a match. 91 if len(pat) == 1 { 92 return rMatch 93 } 94 95 // Match and trim any non-wildcard suffix characters. 96 var ok bool 97 str, pat, ok = matchTrimSuffix(str, pat) 98 if !ok { 99 return rNoMatch 100 } 101 102 // Check for single star again. 103 if len(pat) == 1 { 104 return rMatch 105 } 106 107 // Perform recursive wildcard search. 108 r := match(str, pat[1:], slen, counter, maxcomp) 109 if r != rNoMatch { 110 return r 111 } 112 if len(str) == 0 { 113 return rNoMatch 114 } 115 wild = true 116 default: 117 if ss == 0 { 118 return rNoMatch 119 } 120 if pc == '\\' { 121 pat = pat[ps:] 122 pc, ps = utf8.DecodeRuneInString(pat) 123 if ps == 0 { 124 return rNoMatch 125 } 126 } 127 if sc != pc { 128 return rNoMatch 129 } 130 } 131 str = str[ss:] 132 if !wild { 133 pat = pat[ps:] 134 } 135 } 136 if len(str) == 0 { 137 return rMatch 138 } 139 return rNoMatch 140} 141 142// matchTrimSuffix matches and trims any non-wildcard suffix characters. 143// Returns the trimed string and pattern. 144// 145// This is called because the pattern contains extra data after the wildcard 146// star. Here we compare any suffix characters in the pattern to the suffix of 147// the target string. Basically a reverse match that stops when a wildcard 148// character is reached. This is a little trickier than a forward match because 149// we need to evaluate an escaped character in reverse. 150// 151// Any matched characters will be trimmed from both the target 152// string and the pattern. 153func matchTrimSuffix(str, pat string) (string, string, bool) { 154 // It's expected that the pattern has at least two bytes and the first byte 155 // is a wildcard star '*' 156 match := true 157 for len(str) > 0 && len(pat) > 1 { 158 pc, ps := utf8.DecodeLastRuneInString(pat) 159 var esc bool 160 for i := 0; ; i++ { 161 if pat[len(pat)-ps-i-1] != '\\' { 162 if i&1 == 1 { 163 esc = true 164 ps++ 165 } 166 break 167 } 168 } 169 if pc == '*' && !esc { 170 match = true 171 break 172 } 173 sc, ss := utf8.DecodeLastRuneInString(str) 174 if !((pc == '?' && !esc) || pc == sc) { 175 match = false 176 break 177 } 178 str = str[:len(str)-ss] 179 pat = pat[:len(pat)-ps] 180 } 181 return str, pat, match 182} 183 184var maxRuneBytes = [...]byte{244, 143, 191, 191} 185 186// Allowable parses the pattern and determines the minimum and maximum allowable 187// values that the pattern can represent. 188// When the max cannot be determined, 'true' will be returned 189// for infinite. 190func Allowable(pattern string) (min, max string) { 191 if pattern == "" || pattern[0] == '*' { 192 return "", "" 193 } 194 195 minb := make([]byte, 0, len(pattern)) 196 maxb := make([]byte, 0, len(pattern)) 197 var wild bool 198 for i := 0; i < len(pattern); i++ { 199 if pattern[i] == '*' { 200 wild = true 201 break 202 } 203 if pattern[i] == '?' { 204 minb = append(minb, 0) 205 maxb = append(maxb, maxRuneBytes[:]...) 206 } else { 207 minb = append(minb, pattern[i]) 208 maxb = append(maxb, pattern[i]) 209 } 210 } 211 if wild { 212 r, n := utf8.DecodeLastRune(maxb) 213 if r != utf8.RuneError { 214 if r < utf8.MaxRune { 215 r++ 216 if r > 0x7f { 217 b := make([]byte, 4) 218 nn := utf8.EncodeRune(b, r) 219 maxb = append(maxb[:len(maxb)-n], b[:nn]...) 220 } else { 221 maxb = append(maxb[:len(maxb)-n], byte(r)) 222 } 223 } 224 } 225 } 226 return string(minb), string(maxb) 227} 228 229// IsPattern returns true if the string is a pattern. 230func IsPattern(str string) bool { 231 for i := 0; i < len(str); i++ { 232 if str[i] == '*' || str[i] == '?' { 233 return true 234 } 235 } 236 return false 237} 238