1package stdlib
2
3import (
4	"fmt"
5	"regexp"
6	resyntax "regexp/syntax"
7
8	"github.com/zclconf/go-cty/cty"
9	"github.com/zclconf/go-cty/cty/function"
10)
11
12var RegexFunc = function.New(&function.Spec{
13	Params: []function.Parameter{
14		{
15			Name: "pattern",
16			Type: cty.String,
17		},
18		{
19			Name: "string",
20			Type: cty.String,
21		},
22	},
23	Type: func(args []cty.Value) (cty.Type, error) {
24		if !args[0].IsKnown() {
25			// We can't predict our type without seeing our pattern
26			return cty.DynamicPseudoType, nil
27		}
28
29		retTy, err := regexPatternResultType(args[0].AsString())
30		if err != nil {
31			err = function.NewArgError(0, err)
32		}
33		return retTy, err
34	},
35	Impl: func(args []cty.Value, retType cty.Type) (cty.Value, error) {
36		if retType == cty.DynamicPseudoType {
37			return cty.DynamicVal, nil
38		}
39
40		re, err := regexp.Compile(args[0].AsString())
41		if err != nil {
42			// Should never happen, since we checked this in the Type function above.
43			return cty.NilVal, function.NewArgErrorf(0, "error parsing pattern: %s", err)
44		}
45		str := args[1].AsString()
46
47		captureIdxs := re.FindStringSubmatchIndex(str)
48		if captureIdxs == nil {
49			return cty.NilVal, fmt.Errorf("pattern did not match any part of the given string")
50		}
51
52		return regexPatternResult(re, str, captureIdxs, retType), nil
53	},
54})
55
56var RegexAllFunc = function.New(&function.Spec{
57	Params: []function.Parameter{
58		{
59			Name: "pattern",
60			Type: cty.String,
61		},
62		{
63			Name: "string",
64			Type: cty.String,
65		},
66	},
67	Type: func(args []cty.Value) (cty.Type, error) {
68		if !args[0].IsKnown() {
69			// We can't predict our type without seeing our pattern,
70			// but we do know it'll always be a list of something.
71			return cty.List(cty.DynamicPseudoType), nil
72		}
73
74		retTy, err := regexPatternResultType(args[0].AsString())
75		if err != nil {
76			err = function.NewArgError(0, err)
77		}
78		return cty.List(retTy), err
79	},
80	Impl: func(args []cty.Value, retType cty.Type) (cty.Value, error) {
81		ety := retType.ElementType()
82		if ety == cty.DynamicPseudoType {
83			return cty.DynamicVal, nil
84		}
85
86		re, err := regexp.Compile(args[0].AsString())
87		if err != nil {
88			// Should never happen, since we checked this in the Type function above.
89			return cty.NilVal, function.NewArgErrorf(0, "error parsing pattern: %s", err)
90		}
91		str := args[1].AsString()
92
93		captureIdxsEach := re.FindAllStringSubmatchIndex(str, -1)
94		if len(captureIdxsEach) == 0 {
95			return cty.ListValEmpty(ety), nil
96		}
97
98		elems := make([]cty.Value, len(captureIdxsEach))
99		for i, captureIdxs := range captureIdxsEach {
100			elems[i] = regexPatternResult(re, str, captureIdxs, ety)
101		}
102		return cty.ListVal(elems), nil
103	},
104})
105
106// Regex is a function that extracts one or more substrings from a given
107// string by applying a regular expression pattern, describing the first
108// match.
109//
110// The return type depends on the composition of the capture groups (if any)
111// in the pattern:
112//
113//   - If there are no capture groups at all, the result is a single string
114//     representing the entire matched pattern.
115//   - If all of the capture groups are named, the result is an object whose
116//     keys are the named groups and whose values are their sub-matches, or
117//     null if a particular sub-group was inside another group that didn't
118//     match.
119//   - If none of the capture groups are named, the result is a tuple whose
120//     elements are the sub-groups in order and whose values are their
121//     sub-matches, or null if a particular sub-group was inside another group
122//     that didn't match.
123//   - It is invalid to use both named and un-named capture groups together in
124//     the same pattern.
125//
126// If the pattern doesn't match, this function returns an error. To test for
127// a match, call RegexAll and check if the length of the result is greater
128// than zero.
129func Regex(pattern, str cty.Value) (cty.Value, error) {
130	return RegexFunc.Call([]cty.Value{pattern, str})
131}
132
133// RegexAll is similar to Regex but it finds all of the non-overlapping matches
134// in the given string and returns a list of them.
135//
136// The result type is always a list, whose element type is deduced from the
137// pattern in the same way as the return type for Regex is decided.
138//
139// If the pattern doesn't match at all, this function returns an empty list.
140func RegexAll(pattern, str cty.Value) (cty.Value, error) {
141	return RegexAllFunc.Call([]cty.Value{pattern, str})
142}
143
144// regexPatternResultType parses the given regular expression pattern and
145// returns the structural type that would be returned to represent its
146// capture groups.
147//
148// Returns an error if parsing fails or if the pattern uses a mixture of
149// named and unnamed capture groups, which is not permitted.
150func regexPatternResultType(pattern string) (cty.Type, error) {
151	re, rawErr := regexp.Compile(pattern)
152	switch err := rawErr.(type) {
153	case *resyntax.Error:
154		return cty.NilType, fmt.Errorf("invalid regexp pattern: %s in %s", err.Code, err.Expr)
155	case error:
156		// Should never happen, since all regexp compile errors should
157		// be resyntax.Error, but just in case...
158		return cty.NilType, fmt.Errorf("error parsing pattern: %s", err)
159	}
160
161	allNames := re.SubexpNames()[1:]
162	var names []string
163	unnamed := 0
164	for _, name := range allNames {
165		if name == "" {
166			unnamed++
167		} else {
168			if names == nil {
169				names = make([]string, 0, len(allNames))
170			}
171			names = append(names, name)
172		}
173	}
174	switch {
175	case unnamed == 0 && len(names) == 0:
176		// If there are no capture groups at all then we'll return just a
177		// single string for the whole match.
178		return cty.String, nil
179	case unnamed > 0 && len(names) > 0:
180		return cty.NilType, fmt.Errorf("invalid regexp pattern: cannot mix both named and unnamed capture groups")
181	case unnamed > 0:
182		// For unnamed captures, we return a tuple of them all in order.
183		etys := make([]cty.Type, unnamed)
184		for i := range etys {
185			etys[i] = cty.String
186		}
187		return cty.Tuple(etys), nil
188	default:
189		// For named captures, we return an object using the capture names
190		// as keys.
191		atys := make(map[string]cty.Type, len(names))
192		for _, name := range names {
193			atys[name] = cty.String
194		}
195		return cty.Object(atys), nil
196	}
197}
198
199func regexPatternResult(re *regexp.Regexp, str string, captureIdxs []int, retType cty.Type) cty.Value {
200	switch {
201	case retType == cty.String:
202		start, end := captureIdxs[0], captureIdxs[1]
203		return cty.StringVal(str[start:end])
204	case retType.IsTupleType():
205		captureIdxs = captureIdxs[2:] // index 0 is the whole pattern span, which we ignore by skipping one pair
206		vals := make([]cty.Value, len(captureIdxs)/2)
207		for i := range vals {
208			start, end := captureIdxs[i*2], captureIdxs[i*2+1]
209			if start < 0 || end < 0 {
210				vals[i] = cty.NullVal(cty.String) // Did not match anything because containing group didn't match
211				continue
212			}
213			vals[i] = cty.StringVal(str[start:end])
214		}
215		return cty.TupleVal(vals)
216	case retType.IsObjectType():
217		captureIdxs = captureIdxs[2:] // index 0 is the whole pattern span, which we ignore by skipping one pair
218		vals := make(map[string]cty.Value, len(captureIdxs)/2)
219		names := re.SubexpNames()[1:]
220		for i, name := range names {
221			start, end := captureIdxs[i*2], captureIdxs[i*2+1]
222			if start < 0 || end < 0 {
223				vals[name] = cty.NullVal(cty.String) // Did not match anything because containing group didn't match
224				continue
225			}
226			vals[name] = cty.StringVal(str[start:end])
227		}
228		return cty.ObjectVal(vals)
229	default:
230		// Should never happen
231		panic(fmt.Sprintf("invalid return type %#v", retType))
232	}
233}
234