1package stdlib 2 3import ( 4 "fmt" 5 "regexp" 6 resyntax "regexp/syntax" 7 8 "github.com/zclconf/go-cty/cty" 9 "github.com/zclconf/go-cty/cty/function" 10) 11 12var RegexFunc = function.New(&function.Spec{ 13 Params: []function.Parameter{ 14 { 15 Name: "pattern", 16 Type: cty.String, 17 }, 18 { 19 Name: "string", 20 Type: cty.String, 21 }, 22 }, 23 Type: func(args []cty.Value) (cty.Type, error) { 24 if !args[0].IsKnown() { 25 // We can't predict our type without seeing our pattern 26 return cty.DynamicPseudoType, nil 27 } 28 29 retTy, err := regexPatternResultType(args[0].AsString()) 30 if err != nil { 31 err = function.NewArgError(0, err) 32 } 33 return retTy, err 34 }, 35 Impl: func(args []cty.Value, retType cty.Type) (cty.Value, error) { 36 if retType == cty.DynamicPseudoType { 37 return cty.DynamicVal, nil 38 } 39 40 re, err := regexp.Compile(args[0].AsString()) 41 if err != nil { 42 // Should never happen, since we checked this in the Type function above. 43 return cty.NilVal, function.NewArgErrorf(0, "error parsing pattern: %s", err) 44 } 45 str := args[1].AsString() 46 47 captureIdxs := re.FindStringSubmatchIndex(str) 48 if captureIdxs == nil { 49 return cty.NilVal, fmt.Errorf("pattern did not match any part of the given string") 50 } 51 52 return regexPatternResult(re, str, captureIdxs, retType), nil 53 }, 54}) 55 56var RegexAllFunc = function.New(&function.Spec{ 57 Params: []function.Parameter{ 58 { 59 Name: "pattern", 60 Type: cty.String, 61 }, 62 { 63 Name: "string", 64 Type: cty.String, 65 }, 66 }, 67 Type: func(args []cty.Value) (cty.Type, error) { 68 if !args[0].IsKnown() { 69 // We can't predict our type without seeing our pattern, 70 // but we do know it'll always be a list of something. 71 return cty.List(cty.DynamicPseudoType), nil 72 } 73 74 retTy, err := regexPatternResultType(args[0].AsString()) 75 if err != nil { 76 err = function.NewArgError(0, err) 77 } 78 return cty.List(retTy), err 79 }, 80 Impl: func(args []cty.Value, retType cty.Type) (cty.Value, error) { 81 ety := retType.ElementType() 82 if ety == cty.DynamicPseudoType { 83 return cty.DynamicVal, nil 84 } 85 86 re, err := regexp.Compile(args[0].AsString()) 87 if err != nil { 88 // Should never happen, since we checked this in the Type function above. 89 return cty.NilVal, function.NewArgErrorf(0, "error parsing pattern: %s", err) 90 } 91 str := args[1].AsString() 92 93 captureIdxsEach := re.FindAllStringSubmatchIndex(str, -1) 94 if len(captureIdxsEach) == 0 { 95 return cty.ListValEmpty(ety), nil 96 } 97 98 elems := make([]cty.Value, len(captureIdxsEach)) 99 for i, captureIdxs := range captureIdxsEach { 100 elems[i] = regexPatternResult(re, str, captureIdxs, ety) 101 } 102 return cty.ListVal(elems), nil 103 }, 104}) 105 106// Regex is a function that extracts one or more substrings from a given 107// string by applying a regular expression pattern, describing the first 108// match. 109// 110// The return type depends on the composition of the capture groups (if any) 111// in the pattern: 112// 113// - If there are no capture groups at all, the result is a single string 114// representing the entire matched pattern. 115// - If all of the capture groups are named, the result is an object whose 116// keys are the named groups and whose values are their sub-matches, or 117// null if a particular sub-group was inside another group that didn't 118// match. 119// - If none of the capture groups are named, the result is a tuple whose 120// elements are the sub-groups in order and whose values are their 121// sub-matches, or null if a particular sub-group was inside another group 122// that didn't match. 123// - It is invalid to use both named and un-named capture groups together in 124// the same pattern. 125// 126// If the pattern doesn't match, this function returns an error. To test for 127// a match, call RegexAll and check if the length of the result is greater 128// than zero. 129func Regex(pattern, str cty.Value) (cty.Value, error) { 130 return RegexFunc.Call([]cty.Value{pattern, str}) 131} 132 133// RegexAll is similar to Regex but it finds all of the non-overlapping matches 134// in the given string and returns a list of them. 135// 136// The result type is always a list, whose element type is deduced from the 137// pattern in the same way as the return type for Regex is decided. 138// 139// If the pattern doesn't match at all, this function returns an empty list. 140func RegexAll(pattern, str cty.Value) (cty.Value, error) { 141 return RegexAllFunc.Call([]cty.Value{pattern, str}) 142} 143 144// regexPatternResultType parses the given regular expression pattern and 145// returns the structural type that would be returned to represent its 146// capture groups. 147// 148// Returns an error if parsing fails or if the pattern uses a mixture of 149// named and unnamed capture groups, which is not permitted. 150func regexPatternResultType(pattern string) (cty.Type, error) { 151 re, rawErr := regexp.Compile(pattern) 152 switch err := rawErr.(type) { 153 case *resyntax.Error: 154 return cty.NilType, fmt.Errorf("invalid regexp pattern: %s in %s", err.Code, err.Expr) 155 case error: 156 // Should never happen, since all regexp compile errors should 157 // be resyntax.Error, but just in case... 158 return cty.NilType, fmt.Errorf("error parsing pattern: %s", err) 159 } 160 161 allNames := re.SubexpNames()[1:] 162 var names []string 163 unnamed := 0 164 for _, name := range allNames { 165 if name == "" { 166 unnamed++ 167 } else { 168 if names == nil { 169 names = make([]string, 0, len(allNames)) 170 } 171 names = append(names, name) 172 } 173 } 174 switch { 175 case unnamed == 0 && len(names) == 0: 176 // If there are no capture groups at all then we'll return just a 177 // single string for the whole match. 178 return cty.String, nil 179 case unnamed > 0 && len(names) > 0: 180 return cty.NilType, fmt.Errorf("invalid regexp pattern: cannot mix both named and unnamed capture groups") 181 case unnamed > 0: 182 // For unnamed captures, we return a tuple of them all in order. 183 etys := make([]cty.Type, unnamed) 184 for i := range etys { 185 etys[i] = cty.String 186 } 187 return cty.Tuple(etys), nil 188 default: 189 // For named captures, we return an object using the capture names 190 // as keys. 191 atys := make(map[string]cty.Type, len(names)) 192 for _, name := range names { 193 atys[name] = cty.String 194 } 195 return cty.Object(atys), nil 196 } 197} 198 199func regexPatternResult(re *regexp.Regexp, str string, captureIdxs []int, retType cty.Type) cty.Value { 200 switch { 201 case retType == cty.String: 202 start, end := captureIdxs[0], captureIdxs[1] 203 return cty.StringVal(str[start:end]) 204 case retType.IsTupleType(): 205 captureIdxs = captureIdxs[2:] // index 0 is the whole pattern span, which we ignore by skipping one pair 206 vals := make([]cty.Value, len(captureIdxs)/2) 207 for i := range vals { 208 start, end := captureIdxs[i*2], captureIdxs[i*2+1] 209 if start < 0 || end < 0 { 210 vals[i] = cty.NullVal(cty.String) // Did not match anything because containing group didn't match 211 continue 212 } 213 vals[i] = cty.StringVal(str[start:end]) 214 } 215 return cty.TupleVal(vals) 216 case retType.IsObjectType(): 217 captureIdxs = captureIdxs[2:] // index 0 is the whole pattern span, which we ignore by skipping one pair 218 vals := make(map[string]cty.Value, len(captureIdxs)/2) 219 names := re.SubexpNames()[1:] 220 for i, name := range names { 221 start, end := captureIdxs[i*2], captureIdxs[i*2+1] 222 if start < 0 || end < 0 { 223 vals[name] = cty.NullVal(cty.String) // Did not match anything because containing group didn't match 224 continue 225 } 226 vals[name] = cty.StringVal(str[start:end]) 227 } 228 return cty.ObjectVal(vals) 229 default: 230 // Should never happen 231 panic(fmt.Sprintf("invalid return type %#v", retType)) 232 } 233} 234