1// Copyright 2010 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package regexp
6
7import (
8	"fmt"
9	"strings"
10	"testing"
11)
12
13// For each pattern/text pair, what is the expected output of each function?
14// We can derive the textual results from the indexed results, the non-submatch
15// results from the submatched results, the single results from the 'all' results,
16// and the byte results from the string results. Therefore the table includes
17// only the FindAllStringSubmatchIndex result.
18type FindTest struct {
19	pat     string
20	text    string
21	matches [][]int
22}
23
24func (t FindTest) String() string {
25	return fmt.Sprintf("pat: %#q text: %#q", t.pat, t.text)
26}
27
28var findTests = []FindTest{
29	{``, ``, build(1, 0, 0)},
30	{`^abcdefg`, "abcdefg", build(1, 0, 7)},
31	{`a+`, "baaab", build(1, 1, 4)},
32	{"abcd..", "abcdef", build(1, 0, 6)},
33	{`a`, "a", build(1, 0, 1)},
34	{`x`, "y", nil},
35	{`b`, "abc", build(1, 1, 2)},
36	{`.`, "a", build(1, 0, 1)},
37	{`.*`, "abcdef", build(1, 0, 6)},
38	{`^`, "abcde", build(1, 0, 0)},
39	{`$`, "abcde", build(1, 5, 5)},
40	{`^abcd$`, "abcd", build(1, 0, 4)},
41	{`^bcd'`, "abcdef", nil},
42	{`^abcd$`, "abcde", nil},
43	{`a+`, "baaab", build(1, 1, 4)},
44	{`a*`, "baaab", build(3, 0, 0, 1, 4, 5, 5)},
45	{`[a-z]+`, "abcd", build(1, 0, 4)},
46	{`[^a-z]+`, "ab1234cd", build(1, 2, 6)},
47	{`[a\-\]z]+`, "az]-bcz", build(2, 0, 4, 6, 7)},
48	{`[^\n]+`, "abcd\n", build(1, 0, 4)},
49	{`[日本語]+`, "日本語日本語", build(1, 0, 18)},
50	{`日本語+`, "日本語", build(1, 0, 9)},
51	{`日本語+`, "日本語語語語", build(1, 0, 18)},
52	{`()`, "", build(1, 0, 0, 0, 0)},
53	{`(a)`, "a", build(1, 0, 1, 0, 1)},
54	{`(.)(.)`, "日a", build(1, 0, 4, 0, 3, 3, 4)},
55	{`(.*)`, "", build(1, 0, 0, 0, 0)},
56	{`(.*)`, "abcd", build(1, 0, 4, 0, 4)},
57	{`(..)(..)`, "abcd", build(1, 0, 4, 0, 2, 2, 4)},
58	{`(([^xyz]*)(d))`, "abcd", build(1, 0, 4, 0, 4, 0, 3, 3, 4)},
59	{`((a|b|c)*(d))`, "abcd", build(1, 0, 4, 0, 4, 2, 3, 3, 4)},
60	{`(((a|b|c)*)(d))`, "abcd", build(1, 0, 4, 0, 4, 0, 3, 2, 3, 3, 4)},
61	{`\a\f\n\r\t\v`, "\a\f\n\r\t\v", build(1, 0, 6)},
62	{`[\a\f\n\r\t\v]+`, "\a\f\n\r\t\v", build(1, 0, 6)},
63
64	{`a*(|(b))c*`, "aacc", build(1, 0, 4, 2, 2, -1, -1)},
65	{`(.*).*`, "ab", build(1, 0, 2, 0, 2)},
66	{`[.]`, ".", build(1, 0, 1)},
67	{`/$`, "/abc/", build(1, 4, 5)},
68	{`/$`, "/abc", nil},
69
70	// multiple matches
71	{`.`, "abc", build(3, 0, 1, 1, 2, 2, 3)},
72	{`(.)`, "abc", build(3, 0, 1, 0, 1, 1, 2, 1, 2, 2, 3, 2, 3)},
73	{`.(.)`, "abcd", build(2, 0, 2, 1, 2, 2, 4, 3, 4)},
74	{`ab*`, "abbaab", build(3, 0, 3, 3, 4, 4, 6)},
75	{`a(b*)`, "abbaab", build(3, 0, 3, 1, 3, 3, 4, 4, 4, 4, 6, 5, 6)},
76
77	// fixed bugs
78	{`ab$`, "cab", build(1, 1, 3)},
79	{`axxb$`, "axxcb", nil},
80	{`data`, "daXY data", build(1, 5, 9)},
81	{`da(.)a$`, "daXY data", build(1, 5, 9, 7, 8)},
82	{`zx+`, "zzx", build(1, 1, 3)},
83	{`ab$`, "abcab", build(1, 3, 5)},
84	{`(aa)*$`, "a", build(1, 1, 1, -1, -1)},
85	{`(?:.|(?:.a))`, "", nil},
86	{`(?:A(?:A|a))`, "Aa", build(1, 0, 2)},
87	{`(?:A|(?:A|a))`, "a", build(1, 0, 1)},
88	{`(a){0}`, "", build(1, 0, 0, -1, -1)},
89	{`(?-s)(?:(?:^).)`, "\n", nil},
90	{`(?s)(?:(?:^).)`, "\n", build(1, 0, 1)},
91	{`(?:(?:^).)`, "\n", nil},
92	{`\b`, "x", build(2, 0, 0, 1, 1)},
93	{`\b`, "xx", build(2, 0, 0, 2, 2)},
94	{`\b`, "x y", build(4, 0, 0, 1, 1, 2, 2, 3, 3)},
95	{`\b`, "xx yy", build(4, 0, 0, 2, 2, 3, 3, 5, 5)},
96	{`\B`, "x", nil},
97	{`\B`, "xx", build(1, 1, 1)},
98	{`\B`, "x y", nil},
99	{`\B`, "xx yy", build(2, 1, 1, 4, 4)},
100
101	// RE2 tests
102	{`[^\S\s]`, "abcd", nil},
103	{`[^\S[:space:]]`, "abcd", nil},
104	{`[^\D\d]`, "abcd", nil},
105	{`[^\D[:digit:]]`, "abcd", nil},
106	{`(?i)\W`, "x", nil},
107	{`(?i)\W`, "k", nil},
108	{`(?i)\W`, "s", nil},
109
110	// can backslash-escape any punctuation
111	{`\!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\=\>\?\@\[\\\]\^\_\{\|\}\~`,
112		`!"#$%&'()*+,-./:;<=>?@[\]^_{|}~`, build(1, 0, 31)},
113	{`[\!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\=\>\?\@\[\\\]\^\_\{\|\}\~]+`,
114		`!"#$%&'()*+,-./:;<=>?@[\]^_{|}~`, build(1, 0, 31)},
115	{"\\`", "`", build(1, 0, 1)},
116	{"[\\`]+", "`", build(1, 0, 1)},
117
118	// long set of matches (longer than startSize)
119	{
120		".",
121		"qwertyuiopasdfghjklzxcvbnm1234567890",
122		build(36, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10,
123			10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16, 17, 17, 18, 18, 19, 19, 20,
124			20, 21, 21, 22, 22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29, 29, 30,
125			30, 31, 31, 32, 32, 33, 33, 34, 34, 35, 35, 36),
126	},
127}
128
129// build is a helper to construct a [][]int by extracting n sequences from x.
130// This represents n matches with len(x)/n submatches each.
131func build(n int, x ...int) [][]int {
132	ret := make([][]int, n)
133	runLength := len(x) / n
134	j := 0
135	for i := range ret {
136		ret[i] = make([]int, runLength)
137		copy(ret[i], x[j:])
138		j += runLength
139		if j > len(x) {
140			panic("invalid build entry")
141		}
142	}
143	return ret
144}
145
146// First the simple cases.
147
148func TestFind(t *testing.T) {
149	for _, test := range findTests {
150		re := MustCompile(test.pat)
151		if re.String() != test.pat {
152			t.Errorf("String() = `%s`; should be `%s`", re.String(), test.pat)
153		}
154		result := re.Find([]byte(test.text))
155		switch {
156		case len(test.matches) == 0 && len(result) == 0:
157			// ok
158		case test.matches == nil && result != nil:
159			t.Errorf("expected no match; got one: %s", test)
160		case test.matches != nil && result == nil:
161			t.Errorf("expected match; got none: %s", test)
162		case test.matches != nil && result != nil:
163			expect := test.text[test.matches[0][0]:test.matches[0][1]]
164			if expect != string(result) {
165				t.Errorf("expected %q got %q: %s", expect, result, test)
166			}
167		}
168	}
169}
170
171func TestFindString(t *testing.T) {
172	for _, test := range findTests {
173		result := MustCompile(test.pat).FindString(test.text)
174		switch {
175		case len(test.matches) == 0 && len(result) == 0:
176			// ok
177		case test.matches == nil && result != "":
178			t.Errorf("expected no match; got one: %s", test)
179		case test.matches != nil && result == "":
180			// Tricky because an empty result has two meanings: no match or empty match.
181			if test.matches[0][0] != test.matches[0][1] {
182				t.Errorf("expected match; got none: %s", test)
183			}
184		case test.matches != nil && result != "":
185			expect := test.text[test.matches[0][0]:test.matches[0][1]]
186			if expect != result {
187				t.Errorf("expected %q got %q: %s", expect, result, test)
188			}
189		}
190	}
191}
192
193func testFindIndex(test *FindTest, result []int, t *testing.T) {
194	switch {
195	case len(test.matches) == 0 && len(result) == 0:
196		// ok
197	case test.matches == nil && result != nil:
198		t.Errorf("expected no match; got one: %s", test)
199	case test.matches != nil && result == nil:
200		t.Errorf("expected match; got none: %s", test)
201	case test.matches != nil && result != nil:
202		expect := test.matches[0]
203		if expect[0] != result[0] || expect[1] != result[1] {
204			t.Errorf("expected %v got %v: %s", expect, result, test)
205		}
206	}
207}
208
209func TestFindIndex(t *testing.T) {
210	for _, test := range findTests {
211		testFindIndex(&test, MustCompile(test.pat).FindIndex([]byte(test.text)), t)
212	}
213}
214
215func TestFindStringIndex(t *testing.T) {
216	for _, test := range findTests {
217		testFindIndex(&test, MustCompile(test.pat).FindStringIndex(test.text), t)
218	}
219}
220
221func TestFindReaderIndex(t *testing.T) {
222	for _, test := range findTests {
223		testFindIndex(&test, MustCompile(test.pat).FindReaderIndex(strings.NewReader(test.text)), t)
224	}
225}
226
227// Now come the simple All cases.
228
229func TestFindAll(t *testing.T) {
230	for _, test := range findTests {
231		result := MustCompile(test.pat).FindAll([]byte(test.text), -1)
232		switch {
233		case test.matches == nil && result == nil:
234			// ok
235		case test.matches == nil && result != nil:
236			t.Errorf("expected no match; got one: %s", test)
237		case test.matches != nil && result == nil:
238			t.Fatalf("expected match; got none: %s", test)
239		case test.matches != nil && result != nil:
240			if len(test.matches) != len(result) {
241				t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test)
242				continue
243			}
244			for k, e := range test.matches {
245				expect := test.text[e[0]:e[1]]
246				if expect != string(result[k]) {
247					t.Errorf("match %d: expected %q got %q: %s", k, expect, result[k], test)
248				}
249			}
250		}
251	}
252}
253
254func TestFindAllString(t *testing.T) {
255	for _, test := range findTests {
256		result := MustCompile(test.pat).FindAllString(test.text, -1)
257		switch {
258		case test.matches == nil && result == nil:
259			// ok
260		case test.matches == nil && result != nil:
261			t.Errorf("expected no match; got one: %s", test)
262		case test.matches != nil && result == nil:
263			t.Errorf("expected match; got none: %s", test)
264		case test.matches != nil && result != nil:
265			if len(test.matches) != len(result) {
266				t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test)
267				continue
268			}
269			for k, e := range test.matches {
270				expect := test.text[e[0]:e[1]]
271				if expect != result[k] {
272					t.Errorf("expected %q got %q: %s", expect, result, test)
273				}
274			}
275		}
276	}
277}
278
279func testFindAllIndex(test *FindTest, result [][]int, t *testing.T) {
280	switch {
281	case test.matches == nil && result == nil:
282		// ok
283	case test.matches == nil && result != nil:
284		t.Errorf("expected no match; got one: %s", test)
285	case test.matches != nil && result == nil:
286		t.Errorf("expected match; got none: %s", test)
287	case test.matches != nil && result != nil:
288		if len(test.matches) != len(result) {
289			t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test)
290			return
291		}
292		for k, e := range test.matches {
293			if e[0] != result[k][0] || e[1] != result[k][1] {
294				t.Errorf("match %d: expected %v got %v: %s", k, e, result[k], test)
295			}
296		}
297	}
298}
299
300func TestFindAllIndex(t *testing.T) {
301	for _, test := range findTests {
302		testFindAllIndex(&test, MustCompile(test.pat).FindAllIndex([]byte(test.text), -1), t)
303	}
304}
305
306func TestFindAllStringIndex(t *testing.T) {
307	for _, test := range findTests {
308		testFindAllIndex(&test, MustCompile(test.pat).FindAllStringIndex(test.text, -1), t)
309	}
310}
311
312// Now come the Submatch cases.
313
314func testSubmatchBytes(test *FindTest, n int, submatches []int, result [][]byte, t *testing.T) {
315	if len(submatches) != len(result)*2 {
316		t.Errorf("match %d: expected %d submatches; got %d: %s", n, len(submatches)/2, len(result), test)
317		return
318	}
319	for k := 0; k < len(submatches); k += 2 {
320		if submatches[k] == -1 {
321			if result[k/2] != nil {
322				t.Errorf("match %d: expected nil got %q: %s", n, result, test)
323			}
324			continue
325		}
326		expect := test.text[submatches[k]:submatches[k+1]]
327		if expect != string(result[k/2]) {
328			t.Errorf("match %d: expected %q got %q: %s", n, expect, result, test)
329			return
330		}
331	}
332}
333
334func TestFindSubmatch(t *testing.T) {
335	for _, test := range findTests {
336		result := MustCompile(test.pat).FindSubmatch([]byte(test.text))
337		switch {
338		case test.matches == nil && result == nil:
339			// ok
340		case test.matches == nil && result != nil:
341			t.Errorf("expected no match; got one: %s", test)
342		case test.matches != nil && result == nil:
343			t.Errorf("expected match; got none: %s", test)
344		case test.matches != nil && result != nil:
345			testSubmatchBytes(&test, 0, test.matches[0], result, t)
346		}
347	}
348}
349
350func testSubmatchString(test *FindTest, n int, submatches []int, result []string, t *testing.T) {
351	if len(submatches) != len(result)*2 {
352		t.Errorf("match %d: expected %d submatches; got %d: %s", n, len(submatches)/2, len(result), test)
353		return
354	}
355	for k := 0; k < len(submatches); k += 2 {
356		if submatches[k] == -1 {
357			if result[k/2] != "" {
358				t.Errorf("match %d: expected nil got %q: %s", n, result, test)
359			}
360			continue
361		}
362		expect := test.text[submatches[k]:submatches[k+1]]
363		if expect != result[k/2] {
364			t.Errorf("match %d: expected %q got %q: %s", n, expect, result, test)
365			return
366		}
367	}
368}
369
370func TestFindStringSubmatch(t *testing.T) {
371	for _, test := range findTests {
372		result := MustCompile(test.pat).FindStringSubmatch(test.text)
373		switch {
374		case test.matches == nil && result == nil:
375			// ok
376		case test.matches == nil && result != nil:
377			t.Errorf("expected no match; got one: %s", test)
378		case test.matches != nil && result == nil:
379			t.Errorf("expected match; got none: %s", test)
380		case test.matches != nil && result != nil:
381			testSubmatchString(&test, 0, test.matches[0], result, t)
382		}
383	}
384}
385
386func testSubmatchIndices(test *FindTest, n int, expect, result []int, t *testing.T) {
387	if len(expect) != len(result) {
388		t.Errorf("match %d: expected %d matches; got %d: %s", n, len(expect)/2, len(result)/2, test)
389		return
390	}
391	for k, e := range expect {
392		if e != result[k] {
393			t.Errorf("match %d: submatch error: expected %v got %v: %s", n, expect, result, test)
394		}
395	}
396}
397
398func testFindSubmatchIndex(test *FindTest, result []int, t *testing.T) {
399	switch {
400	case test.matches == nil && result == nil:
401		// ok
402	case test.matches == nil && result != nil:
403		t.Errorf("expected no match; got one: %s", test)
404	case test.matches != nil && result == nil:
405		t.Errorf("expected match; got none: %s", test)
406	case test.matches != nil && result != nil:
407		testSubmatchIndices(test, 0, test.matches[0], result, t)
408	}
409}
410
411func TestFindSubmatchIndex(t *testing.T) {
412	for _, test := range findTests {
413		testFindSubmatchIndex(&test, MustCompile(test.pat).FindSubmatchIndex([]byte(test.text)), t)
414	}
415}
416
417func TestFindStringSubmatchIndex(t *testing.T) {
418	for _, test := range findTests {
419		testFindSubmatchIndex(&test, MustCompile(test.pat).FindStringSubmatchIndex(test.text), t)
420	}
421}
422
423func TestFindReaderSubmatchIndex(t *testing.T) {
424	for _, test := range findTests {
425		testFindSubmatchIndex(&test, MustCompile(test.pat).FindReaderSubmatchIndex(strings.NewReader(test.text)), t)
426	}
427}
428
429// Now come the monster AllSubmatch cases.
430
431func TestFindAllSubmatch(t *testing.T) {
432	for _, test := range findTests {
433		result := MustCompile(test.pat).FindAllSubmatch([]byte(test.text), -1)
434		switch {
435		case test.matches == nil && result == nil:
436			// ok
437		case test.matches == nil && result != nil:
438			t.Errorf("expected no match; got one: %s", test)
439		case test.matches != nil && result == nil:
440			t.Errorf("expected match; got none: %s", test)
441		case len(test.matches) != len(result):
442			t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test)
443		case test.matches != nil && result != nil:
444			for k, match := range test.matches {
445				testSubmatchBytes(&test, k, match, result[k], t)
446			}
447		}
448	}
449}
450
451func TestFindAllStringSubmatch(t *testing.T) {
452	for _, test := range findTests {
453		result := MustCompile(test.pat).FindAllStringSubmatch(test.text, -1)
454		switch {
455		case test.matches == nil && result == nil:
456			// ok
457		case test.matches == nil && result != nil:
458			t.Errorf("expected no match; got one: %s", test)
459		case test.matches != nil && result == nil:
460			t.Errorf("expected match; got none: %s", test)
461		case len(test.matches) != len(result):
462			t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test)
463		case test.matches != nil && result != nil:
464			for k, match := range test.matches {
465				testSubmatchString(&test, k, match, result[k], t)
466			}
467		}
468	}
469}
470
471func testFindAllSubmatchIndex(test *FindTest, result [][]int, t *testing.T) {
472	switch {
473	case test.matches == nil && result == nil:
474		// ok
475	case test.matches == nil && result != nil:
476		t.Errorf("expected no match; got one: %s", test)
477	case test.matches != nil && result == nil:
478		t.Errorf("expected match; got none: %s", test)
479	case len(test.matches) != len(result):
480		t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test)
481	case test.matches != nil && result != nil:
482		for k, match := range test.matches {
483			testSubmatchIndices(test, k, match, result[k], t)
484		}
485	}
486}
487
488func TestFindAllSubmatchIndex(t *testing.T) {
489	for _, test := range findTests {
490		testFindAllSubmatchIndex(&test, MustCompile(test.pat).FindAllSubmatchIndex([]byte(test.text), -1), t)
491	}
492}
493
494func TestFindAllStringSubmatchIndex(t *testing.T) {
495	for _, test := range findTests {
496		testFindAllSubmatchIndex(&test, MustCompile(test.pat).FindAllStringSubmatchIndex(test.text, -1), t)
497	}
498}
499