1// Copyright 2010 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package regexp
6
7import (
8	"bufio"
9	"compress/bzip2"
10	"fmt"
11	"io"
12	"os"
13	"path/filepath"
14	"regexp/syntax"
15	"strconv"
16	"strings"
17	"testing"
18	"unicode/utf8"
19)
20
21// TestRE2 tests this package's regexp API against test cases
22// considered during RE2's exhaustive tests, which run all possible
23// regexps over a given set of atoms and operators, up to a given
24// complexity, over all possible strings over a given alphabet,
25// up to a given size.  Rather than try to link with RE2, we read a
26// log file containing the test cases and the expected matches.
27// The log file, re2-exhaustive.txt, is generated by running 'make log'
28// in the open source RE2 distribution https://github.com/google/re2/.
29//
30// The test file format is a sequence of stanzas like:
31//
32//	strings
33//	"abc"
34//	"123x"
35//	regexps
36//	"[a-z]+"
37//	0-3;0-3
38//	-;-
39//	"([0-9])([0-9])([0-9])"
40//	-;-
41//	-;0-3 0-1 1-2 2-3
42//
43// The stanza begins by defining a set of strings, quoted
44// using Go double-quote syntax, one per line.  Then the
45// regexps section gives a sequence of regexps to run on
46// the strings.  In the block that follows a regexp, each line
47// gives the semicolon-separated match results of running
48// the regexp on the corresponding string.
49// Each match result is either a single -, meaning no match, or a
50// space-separated sequence of pairs giving the match and
51// submatch indices.  An unmatched subexpression formats
52// its pair as a single - (not illustrated above).  For now
53// each regexp run produces two match results, one for a
54// ``full match'' that restricts the regexp to matching the entire
55// string or nothing, and one for a ``partial match'' that gives
56// the leftmost first match found in the string.
57//
58// Lines beginning with # are comments.  Lines beginning with
59// a capital letter are test names printed during RE2's test suite
60// and are echoed into t but otherwise ignored.
61//
62// At time of writing, re2-exhaustive.txt is 59 MB but compresses to 385 kB,
63// so we store re2-exhaustive.txt.bz2 in the repository and decompress it on the fly.
64//
65func TestRE2Search(t *testing.T) {
66	testRE2(t, "testdata/re2-search.txt")
67}
68
69func testRE2(t *testing.T, file string) {
70	f, err := os.Open(file)
71	if err != nil {
72		t.Fatal(err)
73	}
74	defer f.Close()
75	var txt io.Reader
76	if strings.HasSuffix(file, ".bz2") {
77		z := bzip2.NewReader(f)
78		txt = z
79		file = file[:len(file)-len(".bz2")] // for error messages
80	} else {
81		txt = f
82	}
83	lineno := 0
84	scanner := bufio.NewScanner(txt)
85	var (
86		str       []string
87		input     []string
88		inStrings bool
89		re        *Regexp
90		refull    *Regexp
91		nfail     int
92		ncase     int
93	)
94	for lineno := 1; scanner.Scan(); lineno++ {
95		line := scanner.Text()
96		switch {
97		case line == "":
98			t.Fatalf("%s:%d: unexpected blank line", file, lineno)
99		case line[0] == '#':
100			continue
101		case 'A' <= line[0] && line[0] <= 'Z':
102			// Test name.
103			t.Logf("%s\n", line)
104			continue
105		case line == "strings":
106			str = str[:0]
107			inStrings = true
108		case line == "regexps":
109			inStrings = false
110		case line[0] == '"':
111			q, err := strconv.Unquote(line)
112			if err != nil {
113				// Fatal because we'll get out of sync.
114				t.Fatalf("%s:%d: unquote %s: %v", file, lineno, line, err)
115			}
116			if inStrings {
117				str = append(str, q)
118				continue
119			}
120			// Is a regexp.
121			if len(input) != 0 {
122				t.Fatalf("%s:%d: out of sync: have %d strings left before %#q", file, lineno, len(input), q)
123			}
124			re, err = tryCompile(q)
125			if err != nil {
126				if err.Error() == "error parsing regexp: invalid escape sequence: `\\C`" {
127					// We don't and likely never will support \C; keep going.
128					continue
129				}
130				t.Errorf("%s:%d: compile %#q: %v", file, lineno, q, err)
131				if nfail++; nfail >= 100 {
132					t.Fatalf("stopping after %d errors", nfail)
133				}
134				continue
135			}
136			full := `\A(?:` + q + `)\z`
137			refull, err = tryCompile(full)
138			if err != nil {
139				// Fatal because q worked, so this should always work.
140				t.Fatalf("%s:%d: compile full %#q: %v", file, lineno, full, err)
141			}
142			input = str
143		case line[0] == '-' || '0' <= line[0] && line[0] <= '9':
144			// A sequence of match results.
145			ncase++
146			if re == nil {
147				// Failed to compile: skip results.
148				continue
149			}
150			if len(input) == 0 {
151				t.Fatalf("%s:%d: out of sync: no input remaining", file, lineno)
152			}
153			var text string
154			text, input = input[0], input[1:]
155			if !isSingleBytes(text) && strings.Contains(re.String(), `\B`) {
156				// RE2's \B considers every byte position,
157				// so it sees 'not word boundary' in the
158				// middle of UTF-8 sequences.  This package
159				// only considers the positions between runes,
160				// so it disagrees.  Skip those cases.
161				continue
162			}
163			res := strings.Split(line, ";")
164			if len(res) != len(run) {
165				t.Fatalf("%s:%d: have %d test results, want %d", file, lineno, len(res), len(run))
166			}
167			for i := range res {
168				have, suffix := run[i](re, refull, text)
169				want := parseResult(t, file, lineno, res[i])
170				if !same(have, want) {
171					t.Errorf("%s:%d: %#q%s.FindSubmatchIndex(%#q) = %v, want %v", file, lineno, re, suffix, text, have, want)
172					if nfail++; nfail >= 100 {
173						t.Fatalf("stopping after %d errors", nfail)
174					}
175					continue
176				}
177				b, suffix := match[i](re, refull, text)
178				if b != (want != nil) {
179					t.Errorf("%s:%d: %#q%s.MatchString(%#q) = %v, want %v", file, lineno, re, suffix, text, b, !b)
180					if nfail++; nfail >= 100 {
181						t.Fatalf("stopping after %d errors", nfail)
182					}
183					continue
184				}
185			}
186
187		default:
188			t.Fatalf("%s:%d: out of sync: %s\n", file, lineno, line)
189		}
190	}
191	if err := scanner.Err(); err != nil {
192		t.Fatalf("%s:%d: %v", file, lineno, err)
193	}
194	if len(input) != 0 {
195		t.Fatalf("%s:%d: out of sync: have %d strings left at EOF", file, lineno, len(input))
196	}
197	t.Logf("%d cases tested", ncase)
198}
199
200var run = []func(*Regexp, *Regexp, string) ([]int, string){
201	runFull,
202	runPartial,
203	runFullLongest,
204	runPartialLongest,
205}
206
207func runFull(re, refull *Regexp, text string) ([]int, string) {
208	refull.longest = false
209	return refull.FindStringSubmatchIndex(text), "[full]"
210}
211
212func runPartial(re, refull *Regexp, text string) ([]int, string) {
213	re.longest = false
214	return re.FindStringSubmatchIndex(text), ""
215}
216
217func runFullLongest(re, refull *Regexp, text string) ([]int, string) {
218	refull.longest = true
219	return refull.FindStringSubmatchIndex(text), "[full,longest]"
220}
221
222func runPartialLongest(re, refull *Regexp, text string) ([]int, string) {
223	re.longest = true
224	return re.FindStringSubmatchIndex(text), "[longest]"
225}
226
227var match = []func(*Regexp, *Regexp, string) (bool, string){
228	matchFull,
229	matchPartial,
230	matchFullLongest,
231	matchPartialLongest,
232}
233
234func matchFull(re, refull *Regexp, text string) (bool, string) {
235	refull.longest = false
236	return refull.MatchString(text), "[full]"
237}
238
239func matchPartial(re, refull *Regexp, text string) (bool, string) {
240	re.longest = false
241	return re.MatchString(text), ""
242}
243
244func matchFullLongest(re, refull *Regexp, text string) (bool, string) {
245	refull.longest = true
246	return refull.MatchString(text), "[full,longest]"
247}
248
249func matchPartialLongest(re, refull *Regexp, text string) (bool, string) {
250	re.longest = true
251	return re.MatchString(text), "[longest]"
252}
253
254func isSingleBytes(s string) bool {
255	for _, c := range s {
256		if c >= utf8.RuneSelf {
257			return false
258		}
259	}
260	return true
261}
262
263func tryCompile(s string) (re *Regexp, err error) {
264	// Protect against panic during Compile.
265	defer func() {
266		if r := recover(); r != nil {
267			err = fmt.Errorf("panic: %v", r)
268		}
269	}()
270	return Compile(s)
271}
272
273func parseResult(t *testing.T, file string, lineno int, res string) []int {
274	// A single - indicates no match.
275	if res == "-" {
276		return nil
277	}
278	// Otherwise, a space-separated list of pairs.
279	n := 1
280	for j := 0; j < len(res); j++ {
281		if res[j] == ' ' {
282			n++
283		}
284	}
285	out := make([]int, 2*n)
286	i := 0
287	n = 0
288	for j := 0; j <= len(res); j++ {
289		if j == len(res) || res[j] == ' ' {
290			// Process a single pair.  - means no submatch.
291			pair := res[i:j]
292			if pair == "-" {
293				out[n] = -1
294				out[n+1] = -1
295			} else {
296				k := strings.Index(pair, "-")
297				if k < 0 {
298					t.Fatalf("%s:%d: invalid pair %s", file, lineno, pair)
299				}
300				lo, err1 := strconv.Atoi(pair[:k])
301				hi, err2 := strconv.Atoi(pair[k+1:])
302				if err1 != nil || err2 != nil || lo > hi {
303					t.Fatalf("%s:%d: invalid pair %s", file, lineno, pair)
304				}
305				out[n] = lo
306				out[n+1] = hi
307			}
308			n += 2
309			i = j + 1
310		}
311	}
312	return out
313}
314
315func same(x, y []int) bool {
316	if len(x) != len(y) {
317		return false
318	}
319	for i, xi := range x {
320		if xi != y[i] {
321			return false
322		}
323	}
324	return true
325}
326
327// TestFowler runs this package's regexp API against the
328// POSIX regular expression tests collected by Glenn Fowler
329// at http://www2.research.att.com/~astopen/testregex/testregex.html.
330func TestFowler(t *testing.T) {
331	files, err := filepath.Glob("testdata/*.dat")
332	if err != nil {
333		t.Fatal(err)
334	}
335	for _, file := range files {
336		t.Log(file)
337		testFowler(t, file)
338	}
339}
340
341var notab = MustCompilePOSIX(`[^\t]+`)
342
343func testFowler(t *testing.T, file string) {
344	f, err := os.Open(file)
345	if err != nil {
346		t.Error(err)
347		return
348	}
349	defer f.Close()
350	b := bufio.NewReader(f)
351	lineno := 0
352	lastRegexp := ""
353Reading:
354	for {
355		lineno++
356		line, err := b.ReadString('\n')
357		if err != nil {
358			if err != io.EOF {
359				t.Errorf("%s:%d: %v", file, lineno, err)
360			}
361			break Reading
362		}
363
364		// http://www2.research.att.com/~astopen/man/man1/testregex.html
365		//
366		// INPUT FORMAT
367		//   Input lines may be blank, a comment beginning with #, or a test
368		//   specification. A specification is five fields separated by one
369		//   or more tabs. NULL denotes the empty string and NIL denotes the
370		//   0 pointer.
371		if line[0] == '#' || line[0] == '\n' {
372			continue Reading
373		}
374		line = line[:len(line)-1]
375		field := notab.FindAllString(line, -1)
376		for i, f := range field {
377			if f == "NULL" {
378				field[i] = ""
379			}
380			if f == "NIL" {
381				t.Logf("%s:%d: skip: %s", file, lineno, line)
382				continue Reading
383			}
384		}
385		if len(field) == 0 {
386			continue Reading
387		}
388
389		//   Field 1: the regex(3) flags to apply, one character per REG_feature
390		//   flag. The test is skipped if REG_feature is not supported by the
391		//   implementation. If the first character is not [BEASKLP] then the
392		//   specification is a global control line. One or more of [BEASKLP] may be
393		//   specified; the test will be repeated for each mode.
394		//
395		//     B 	basic			BRE	(grep, ed, sed)
396		//     E 	REG_EXTENDED		ERE	(egrep)
397		//     A	REG_AUGMENTED		ARE	(egrep with negation)
398		//     S	REG_SHELL		SRE	(sh glob)
399		//     K	REG_SHELL|REG_AUGMENTED	KRE	(ksh glob)
400		//     L	REG_LITERAL		LRE	(fgrep)
401		//
402		//     a	REG_LEFT|REG_RIGHT	implicit ^...$
403		//     b	REG_NOTBOL		lhs does not match ^
404		//     c	REG_COMMENT		ignore space and #...\n
405		//     d	REG_SHELL_DOT		explicit leading . match
406		//     e	REG_NOTEOL		rhs does not match $
407		//     f	REG_MULTIPLE		multiple \n separated patterns
408		//     g	FNM_LEADING_DIR		testfnmatch only -- match until /
409		//     h	REG_MULTIREF		multiple digit backref
410		//     i	REG_ICASE		ignore case
411		//     j	REG_SPAN		. matches \n
412		//     k	REG_ESCAPE		\ to ecape [...] delimiter
413		//     l	REG_LEFT		implicit ^...
414		//     m	REG_MINIMAL		minimal match
415		//     n	REG_NEWLINE		explicit \n match
416		//     o	REG_ENCLOSED		(|&) magic inside [@|&](...)
417		//     p	REG_SHELL_PATH		explicit / match
418		//     q	REG_DELIMITED		delimited pattern
419		//     r	REG_RIGHT		implicit ...$
420		//     s	REG_SHELL_ESCAPED	\ not special
421		//     t	REG_MUSTDELIM		all delimiters must be specified
422		//     u	standard unspecified behavior -- errors not counted
423		//     v	REG_CLASS_ESCAPE	\ special inside [...]
424		//     w	REG_NOSUB		no subexpression match array
425		//     x	REG_LENIENT		let some errors slide
426		//     y	REG_LEFT		regexec() implicit ^...
427		//     z	REG_NULL		NULL subexpressions ok
428		//     $	                        expand C \c escapes in fields 2 and 3
429		//     /	                        field 2 is a regsubcomp() expression
430		//     =	                        field 3 is a regdecomp() expression
431		//
432		//   Field 1 control lines:
433		//
434		//     C		set LC_COLLATE and LC_CTYPE to locale in field 2
435		//
436		//     ?test ...	output field 5 if passed and != EXPECTED, silent otherwise
437		//     &test ...	output field 5 if current and previous passed
438		//     |test ...	output field 5 if current passed and previous failed
439		//     ; ...	output field 2 if previous failed
440		//     {test ...	skip if failed until }
441		//     }		end of skip
442		//
443		//     : comment		comment copied as output NOTE
444		//     :comment:test	:comment: ignored
445		//     N[OTE] comment	comment copied as output NOTE
446		//     T[EST] comment	comment
447		//
448		//     number		use number for nmatch (20 by default)
449		flag := field[0]
450		switch flag[0] {
451		case '?', '&', '|', ';', '{', '}':
452			// Ignore all the control operators.
453			// Just run everything.
454			flag = flag[1:]
455			if flag == "" {
456				continue Reading
457			}
458		case ':':
459			i := strings.Index(flag[1:], ":")
460			if i < 0 {
461				t.Logf("skip: %s", line)
462				continue Reading
463			}
464			flag = flag[1+i+1:]
465		case 'C', 'N', 'T', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
466			t.Logf("skip: %s", line)
467			continue Reading
468		}
469
470		// Can check field count now that we've handled the myriad comment formats.
471		if len(field) < 4 {
472			t.Errorf("%s:%d: too few fields: %s", file, lineno, line)
473			continue Reading
474		}
475
476		// Expand C escapes (a.k.a. Go escapes).
477		if strings.Contains(flag, "$") {
478			f := `"` + field[1] + `"`
479			if field[1], err = strconv.Unquote(f); err != nil {
480				t.Errorf("%s:%d: cannot unquote %s", file, lineno, f)
481			}
482			f = `"` + field[2] + `"`
483			if field[2], err = strconv.Unquote(f); err != nil {
484				t.Errorf("%s:%d: cannot unquote %s", file, lineno, f)
485			}
486		}
487
488		//   Field 2: the regular expression pattern; SAME uses the pattern from
489		//     the previous specification.
490		//
491		if field[1] == "SAME" {
492			field[1] = lastRegexp
493		}
494		lastRegexp = field[1]
495
496		//   Field 3: the string to match.
497		text := field[2]
498
499		//   Field 4: the test outcome...
500		ok, shouldCompile, shouldMatch, pos := parseFowlerResult(field[3])
501		if !ok {
502			t.Errorf("%s:%d: cannot parse result %#q", file, lineno, field[3])
503			continue Reading
504		}
505
506		//   Field 5: optional comment appended to the report.
507
508	Testing:
509		// Run test once for each specified capital letter mode that we support.
510		for _, c := range flag {
511			pattern := field[1]
512			syn := syntax.POSIX | syntax.ClassNL
513			switch c {
514			default:
515				continue Testing
516			case 'E':
517				// extended regexp (what we support)
518			case 'L':
519				// literal
520				pattern = QuoteMeta(pattern)
521			}
522
523			for _, c := range flag {
524				switch c {
525				case 'i':
526					syn |= syntax.FoldCase
527				}
528			}
529
530			re, err := compile(pattern, syn, true)
531			if err != nil {
532				if shouldCompile {
533					t.Errorf("%s:%d: %#q did not compile", file, lineno, pattern)
534				}
535				continue Testing
536			}
537			if !shouldCompile {
538				t.Errorf("%s:%d: %#q should not compile", file, lineno, pattern)
539				continue Testing
540			}
541			match := re.MatchString(text)
542			if match != shouldMatch {
543				t.Errorf("%s:%d: %#q.Match(%#q) = %v, want %v", file, lineno, pattern, text, match, shouldMatch)
544				continue Testing
545			}
546			have := re.FindStringSubmatchIndex(text)
547			if (len(have) > 0) != match {
548				t.Errorf("%s:%d: %#q.Match(%#q) = %v, but %#q.FindSubmatchIndex(%#q) = %v", file, lineno, pattern, text, match, pattern, text, have)
549				continue Testing
550			}
551			if len(have) > len(pos) {
552				have = have[:len(pos)]
553			}
554			if !same(have, pos) {
555				t.Errorf("%s:%d: %#q.FindSubmatchIndex(%#q) = %v, want %v", file, lineno, pattern, text, have, pos)
556			}
557		}
558	}
559}
560
561func parseFowlerResult(s string) (ok, compiled, matched bool, pos []int) {
562	//   Field 4: the test outcome. This is either one of the posix error
563	//     codes (with REG_ omitted) or the match array, a list of (m,n)
564	//     entries with m and n being first and last+1 positions in the
565	//     field 3 string, or NULL if REG_NOSUB is in effect and success
566	//     is expected. BADPAT is acceptable in place of any regcomp(3)
567	//     error code. The match[] array is initialized to (-2,-2) before
568	//     each test. All array elements from 0 to nmatch-1 must be specified
569	//     in the outcome. Unspecified endpoints (offset -1) are denoted by ?.
570	//     Unset endpoints (offset -2) are denoted by X. {x}(o:n) denotes a
571	//     matched (?{...}) expression, where x is the text enclosed by {...},
572	//     o is the expression ordinal counting from 1, and n is the length of
573	//     the unmatched portion of the subject string. If x starts with a
574	//     number then that is the return value of re_execf(), otherwise 0 is
575	//     returned.
576	switch {
577	case s == "":
578		// Match with no position information.
579		ok = true
580		compiled = true
581		matched = true
582		return
583	case s == "NOMATCH":
584		// Match failure.
585		ok = true
586		compiled = true
587		matched = false
588		return
589	case 'A' <= s[0] && s[0] <= 'Z':
590		// All the other error codes are compile errors.
591		ok = true
592		compiled = false
593		return
594	}
595	compiled = true
596
597	var x []int
598	for s != "" {
599		var end byte = ')'
600		if len(x)%2 == 0 {
601			if s[0] != '(' {
602				ok = false
603				return
604			}
605			s = s[1:]
606			end = ','
607		}
608		i := 0
609		for i < len(s) && s[i] != end {
610			i++
611		}
612		if i == 0 || i == len(s) {
613			ok = false
614			return
615		}
616		var v = -1
617		var err error
618		if s[:i] != "?" {
619			v, err = strconv.Atoi(s[:i])
620			if err != nil {
621				ok = false
622				return
623			}
624		}
625		x = append(x, v)
626		s = s[i+1:]
627	}
628	if len(x)%2 != 0 {
629		ok = false
630		return
631	}
632	ok = true
633	matched = true
634	pos = x
635	return
636}
637
638var text []byte
639
640func makeText(n int) []byte {
641	if len(text) >= n {
642		return text[:n]
643	}
644	text = make([]byte, n)
645	x := ^uint32(0)
646	for i := range text {
647		x += x
648		x ^= 1
649		if int32(x) < 0 {
650			x ^= 0x88888eef
651		}
652		if x%31 == 0 {
653			text[i] = '\n'
654		} else {
655			text[i] = byte(x%(0x7E+1-0x20) + 0x20)
656		}
657	}
658	return text
659}
660
661func benchmark(b *testing.B, re string, n int) {
662	r := MustCompile(re)
663	t := makeText(n)
664	b.ResetTimer()
665	b.SetBytes(int64(n))
666	for i := 0; i < b.N; i++ {
667		if r.Match(t) {
668			b.Fatal("match!")
669		}
670	}
671}
672
673const (
674	easy0  = "ABCDEFGHIJKLMNOPQRSTUVWXYZ$"
675	easy1  = "A[AB]B[BC]C[CD]D[DE]E[EF]F[FG]G[GH]H[HI]I[IJ]J$"
676	medium = "[XYZ]ABCDEFGHIJKLMNOPQRSTUVWXYZ$"
677	hard   = "[ -~]*ABCDEFGHIJKLMNOPQRSTUVWXYZ$"
678	parens = "([ -~])*(A)(B)(C)(D)(E)(F)(G)(H)(I)(J)(K)(L)(M)" +
679		"(N)(O)(P)(Q)(R)(S)(T)(U)(V)(W)(X)(Y)(Z)$"
680)
681
682func BenchmarkMatchEasy0_32(b *testing.B)   { benchmark(b, easy0, 32<<0) }
683func BenchmarkMatchEasy0_1K(b *testing.B)   { benchmark(b, easy0, 1<<10) }
684func BenchmarkMatchEasy0_32K(b *testing.B)  { benchmark(b, easy0, 32<<10) }
685func BenchmarkMatchEasy0_1M(b *testing.B)   { benchmark(b, easy0, 1<<20) }
686func BenchmarkMatchEasy0_32M(b *testing.B)  { benchmark(b, easy0, 32<<20) }
687func BenchmarkMatchEasy1_32(b *testing.B)   { benchmark(b, easy1, 32<<0) }
688func BenchmarkMatchEasy1_1K(b *testing.B)   { benchmark(b, easy1, 1<<10) }
689func BenchmarkMatchEasy1_32K(b *testing.B)  { benchmark(b, easy1, 32<<10) }
690func BenchmarkMatchEasy1_1M(b *testing.B)   { benchmark(b, easy1, 1<<20) }
691func BenchmarkMatchEasy1_32M(b *testing.B)  { benchmark(b, easy1, 32<<20) }
692func BenchmarkMatchMedium_32(b *testing.B)  { benchmark(b, medium, 32<<0) }
693func BenchmarkMatchMedium_1K(b *testing.B)  { benchmark(b, medium, 1<<10) }
694func BenchmarkMatchMedium_32K(b *testing.B) { benchmark(b, medium, 32<<10) }
695func BenchmarkMatchMedium_1M(b *testing.B)  { benchmark(b, medium, 1<<20) }
696func BenchmarkMatchMedium_32M(b *testing.B) { benchmark(b, medium, 32<<20) }
697func BenchmarkMatchHard_32(b *testing.B)    { benchmark(b, hard, 32<<0) }
698func BenchmarkMatchHard_1K(b *testing.B)    { benchmark(b, hard, 1<<10) }
699func BenchmarkMatchHard_32K(b *testing.B)   { benchmark(b, hard, 32<<10) }
700func BenchmarkMatchHard_1M(b *testing.B)    { benchmark(b, hard, 1<<20) }
701func BenchmarkMatchHard_32M(b *testing.B)   { benchmark(b, hard, 32<<20) }
702
703func TestLongest(t *testing.T) {
704	re, err := Compile(`a(|b)`)
705	if err != nil {
706		t.Fatal(err)
707	}
708	if g, w := re.FindString("ab"), "a"; g != w {
709		t.Errorf("first match was %q, want %q", g, w)
710	}
711	re.Longest()
712	if g, w := re.FindString("ab"), "ab"; g != w {
713		t.Errorf("longest match was %q, want %q", g, w)
714	}
715}
716
717// TestProgramTooLongForBacktrack tests that a regex which is too long
718// for the backtracker still executes properly.
719func TestProgramTooLongForBacktrack(t *testing.T) {
720	longRegex := MustCompile(`(one|two|three|four|five|six|seven|eight|nine|ten|eleven|twelve|thirteen|fourteen|fifteen|sixteen|seventeen|eighteen|nineteen|twenty|twentyone|twentytwo|twentythree|twentyfour|twentyfive|twentysix|twentyseven|twentyeight|twentynine|thirty|thirtyone|thirtytwo|thirtythree|thirtyfour|thirtyfive|thirtysix|thirtyseven|thirtyeight|thirtynine|forty|fortyone|fortytwo|fortythree|fortyfour|fortyfive|fortysix|fortyseven|fortyeight|fortynine|fifty|fiftyone|fiftytwo|fiftythree|fiftyfour|fiftyfive|fiftysix|fiftyseven|fiftyeight|fiftynine|sixty|sixtyone|sixtytwo|sixtythree|sixtyfour|sixtyfive|sixtysix|sixtyseven|sixtyeight|sixtynine|seventy|seventyone|seventytwo|seventythree|seventyfour|seventyfive|seventysix|seventyseven|seventyeight|seventynine|eighty|eightyone|eightytwo|eightythree|eightyfour|eightyfive|eightysix|eightyseven|eightyeight|eightynine|ninety|ninetyone|ninetytwo|ninetythree|ninetyfour|ninetyfive|ninetysix|ninetyseven|ninetyeight|ninetynine|onehundred)`)
721	if !longRegex.MatchString("two") {
722		t.Errorf("longRegex.MatchString(\"two\") was false, want true")
723	}
724	if longRegex.MatchString("xxx") {
725		t.Errorf("longRegex.MatchString(\"xxx\") was true, want false")
726	}
727}
728