1// Copyright 2009 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package scanner
6
7import (
8	"go/token"
9	"io/ioutil"
10	"os"
11	"path/filepath"
12	"runtime"
13	"testing"
14)
15
16var fset = token.NewFileSet()
17
18const /* class */ (
19	special = iota
20	literal
21	operator
22	keyword
23)
24
25func tokenclass(tok token.Token) int {
26	switch {
27	case tok.IsLiteral():
28		return literal
29	case tok.IsOperator():
30		return operator
31	case tok.IsKeyword():
32		return keyword
33	}
34	return special
35}
36
37type elt struct {
38	tok   token.Token
39	lit   string
40	class int
41}
42
43var tokens = [...]elt{
44	// Special tokens
45	{token.COMMENT, "/* a comment */", special},
46	{token.COMMENT, "// a comment \n", special},
47	{token.COMMENT, "/*\r*/", special},
48	{token.COMMENT, "/**\r/*/", special}, // issue 11151
49	{token.COMMENT, "/**\r\r/*/", special},
50	{token.COMMENT, "//\r\n", special},
51
52	// Identifiers and basic type literals
53	{token.IDENT, "foobar", literal},
54	{token.IDENT, "a۰۱۸", literal},
55	{token.IDENT, "foo६४", literal},
56	{token.IDENT, "bar9876", literal},
57	{token.IDENT, "ŝ", literal},    // was bug (issue 4000)
58	{token.IDENT, "ŝfoo", literal}, // was bug (issue 4000)
59	{token.INT, "0", literal},
60	{token.INT, "1", literal},
61	{token.INT, "123456789012345678890", literal},
62	{token.INT, "01234567", literal},
63	{token.INT, "0xcafebabe", literal},
64	{token.FLOAT, "0.", literal},
65	{token.FLOAT, ".0", literal},
66	{token.FLOAT, "3.14159265", literal},
67	{token.FLOAT, "1e0", literal},
68	{token.FLOAT, "1e+100", literal},
69	{token.FLOAT, "1e-100", literal},
70	{token.FLOAT, "2.71828e-1000", literal},
71	{token.IMAG, "0i", literal},
72	{token.IMAG, "1i", literal},
73	{token.IMAG, "012345678901234567889i", literal},
74	{token.IMAG, "123456789012345678890i", literal},
75	{token.IMAG, "0.i", literal},
76	{token.IMAG, ".0i", literal},
77	{token.IMAG, "3.14159265i", literal},
78	{token.IMAG, "1e0i", literal},
79	{token.IMAG, "1e+100i", literal},
80	{token.IMAG, "1e-100i", literal},
81	{token.IMAG, "2.71828e-1000i", literal},
82	{token.CHAR, "'a'", literal},
83	{token.CHAR, "'\\000'", literal},
84	{token.CHAR, "'\\xFF'", literal},
85	{token.CHAR, "'\\uff16'", literal},
86	{token.CHAR, "'\\U0000ff16'", literal},
87	{token.STRING, "`foobar`", literal},
88	{token.STRING, "`" + `foo
89	                        bar` +
90		"`",
91		literal,
92	},
93	{token.STRING, "`\r`", literal},
94	{token.STRING, "`foo\r\nbar`", literal},
95
96	// Operators and delimiters
97	{token.ADD, "+", operator},
98	{token.SUB, "-", operator},
99	{token.MUL, "*", operator},
100	{token.QUO, "/", operator},
101	{token.REM, "%", operator},
102
103	{token.AND, "&", operator},
104	{token.OR, "|", operator},
105	{token.XOR, "^", operator},
106	{token.SHL, "<<", operator},
107	{token.SHR, ">>", operator},
108	{token.AND_NOT, "&^", operator},
109
110	{token.ADD_ASSIGN, "+=", operator},
111	{token.SUB_ASSIGN, "-=", operator},
112	{token.MUL_ASSIGN, "*=", operator},
113	{token.QUO_ASSIGN, "/=", operator},
114	{token.REM_ASSIGN, "%=", operator},
115
116	{token.AND_ASSIGN, "&=", operator},
117	{token.OR_ASSIGN, "|=", operator},
118	{token.XOR_ASSIGN, "^=", operator},
119	{token.SHL_ASSIGN, "<<=", operator},
120	{token.SHR_ASSIGN, ">>=", operator},
121	{token.AND_NOT_ASSIGN, "&^=", operator},
122
123	{token.LAND, "&&", operator},
124	{token.LOR, "||", operator},
125	{token.ARROW, "<-", operator},
126	{token.INC, "++", operator},
127	{token.DEC, "--", operator},
128
129	{token.EQL, "==", operator},
130	{token.LSS, "<", operator},
131	{token.GTR, ">", operator},
132	{token.ASSIGN, "=", operator},
133	{token.NOT, "!", operator},
134
135	{token.NEQ, "!=", operator},
136	{token.LEQ, "<=", operator},
137	{token.GEQ, ">=", operator},
138	{token.DEFINE, ":=", operator},
139	{token.ELLIPSIS, "...", operator},
140
141	{token.LPAREN, "(", operator},
142	{token.LBRACK, "[", operator},
143	{token.LBRACE, "{", operator},
144	{token.COMMA, ",", operator},
145	{token.PERIOD, ".", operator},
146
147	{token.RPAREN, ")", operator},
148	{token.RBRACK, "]", operator},
149	{token.RBRACE, "}", operator},
150	{token.SEMICOLON, ";", operator},
151	{token.COLON, ":", operator},
152
153	// Keywords
154	{token.BREAK, "break", keyword},
155	{token.CASE, "case", keyword},
156	{token.CHAN, "chan", keyword},
157	{token.CONST, "const", keyword},
158	{token.CONTINUE, "continue", keyword},
159
160	{token.DEFAULT, "default", keyword},
161	{token.DEFER, "defer", keyword},
162	{token.ELSE, "else", keyword},
163	{token.FALLTHROUGH, "fallthrough", keyword},
164	{token.FOR, "for", keyword},
165
166	{token.FUNC, "func", keyword},
167	{token.GO, "go", keyword},
168	{token.GOTO, "goto", keyword},
169	{token.IF, "if", keyword},
170	{token.IMPORT, "import", keyword},
171
172	{token.INTERFACE, "interface", keyword},
173	{token.MAP, "map", keyword},
174	{token.PACKAGE, "package", keyword},
175	{token.RANGE, "range", keyword},
176	{token.RETURN, "return", keyword},
177
178	{token.SELECT, "select", keyword},
179	{token.STRUCT, "struct", keyword},
180	{token.SWITCH, "switch", keyword},
181	{token.TYPE, "type", keyword},
182	{token.VAR, "var", keyword},
183}
184
185const whitespace = "  \t  \n\n\n" // to separate tokens
186
187var source = func() []byte {
188	var src []byte
189	for _, t := range tokens {
190		src = append(src, t.lit...)
191		src = append(src, whitespace...)
192	}
193	return src
194}()
195
196func newlineCount(s string) int {
197	n := 0
198	for i := 0; i < len(s); i++ {
199		if s[i] == '\n' {
200			n++
201		}
202	}
203	return n
204}
205
206func checkPos(t *testing.T, lit string, p token.Pos, expected token.Position) {
207	pos := fset.Position(p)
208	// Check cleaned filenames so that we don't have to worry about
209	// different os.PathSeparator values.
210	if pos.Filename != expected.Filename && filepath.Clean(pos.Filename) != filepath.Clean(expected.Filename) {
211		t.Errorf("bad filename for %q: got %s, expected %s", lit, pos.Filename, expected.Filename)
212	}
213	if pos.Offset != expected.Offset {
214		t.Errorf("bad position for %q: got %d, expected %d", lit, pos.Offset, expected.Offset)
215	}
216	if pos.Line != expected.Line {
217		t.Errorf("bad line for %q: got %d, expected %d", lit, pos.Line, expected.Line)
218	}
219	if pos.Column != expected.Column {
220		t.Errorf("bad column for %q: got %d, expected %d", lit, pos.Column, expected.Column)
221	}
222}
223
224// Verify that calling Scan() provides the correct results.
225func TestScan(t *testing.T) {
226	whitespace_linecount := newlineCount(whitespace)
227
228	// error handler
229	eh := func(_ token.Position, msg string) {
230		t.Errorf("error handler called (msg = %s)", msg)
231	}
232
233	// verify scan
234	var s Scanner
235	s.Init(fset.AddFile("", fset.Base(), len(source)), source, eh, ScanComments|dontInsertSemis)
236
237	// set up expected position
238	epos := token.Position{
239		Filename: "",
240		Offset:   0,
241		Line:     1,
242		Column:   1,
243	}
244
245	index := 0
246	for {
247		pos, tok, lit := s.Scan()
248
249		// check position
250		if tok == token.EOF {
251			// correction for EOF
252			epos.Line = newlineCount(string(source))
253			epos.Column = 2
254		}
255		checkPos(t, lit, pos, epos)
256
257		// check token
258		e := elt{token.EOF, "", special}
259		if index < len(tokens) {
260			e = tokens[index]
261			index++
262		}
263		if tok != e.tok {
264			t.Errorf("bad token for %q: got %s, expected %s", lit, tok, e.tok)
265		}
266
267		// check token class
268		if tokenclass(tok) != e.class {
269			t.Errorf("bad class for %q: got %d, expected %d", lit, tokenclass(tok), e.class)
270		}
271
272		// check literal
273		elit := ""
274		switch e.tok {
275		case token.COMMENT:
276			// no CRs in comments
277			elit = string(stripCR([]byte(e.lit), e.lit[1] == '*'))
278			//-style comment literal doesn't contain newline
279			if elit[1] == '/' {
280				elit = elit[0 : len(elit)-1]
281			}
282		case token.IDENT:
283			elit = e.lit
284		case token.SEMICOLON:
285			elit = ";"
286		default:
287			if e.tok.IsLiteral() {
288				// no CRs in raw string literals
289				elit = e.lit
290				if elit[0] == '`' {
291					elit = string(stripCR([]byte(elit), false))
292				}
293			} else if e.tok.IsKeyword() {
294				elit = e.lit
295			}
296		}
297		if lit != elit {
298			t.Errorf("bad literal for %q: got %q, expected %q", lit, lit, elit)
299		}
300
301		if tok == token.EOF {
302			break
303		}
304
305		// update position
306		epos.Offset += len(e.lit) + len(whitespace)
307		epos.Line += newlineCount(e.lit) + whitespace_linecount
308
309	}
310
311	if s.ErrorCount != 0 {
312		t.Errorf("found %d errors", s.ErrorCount)
313	}
314}
315
316func TestStripCR(t *testing.T) {
317	for _, test := range []struct{ have, want string }{
318		{"//\n", "//\n"},
319		{"//\r\n", "//\n"},
320		{"//\r\r\r\n", "//\n"},
321		{"//\r*\r/\r\n", "//*/\n"},
322		{"/**/", "/**/"},
323		{"/*\r/*/", "/*/*/"},
324		{"/*\r*/", "/**/"},
325		{"/**\r/*/", "/**\r/*/"},
326		{"/*\r/\r*\r/*/", "/*/*\r/*/"},
327		{"/*\r\r\r\r*/", "/**/"},
328	} {
329		got := string(stripCR([]byte(test.have), len(test.have) >= 2 && test.have[1] == '*'))
330		if got != test.want {
331			t.Errorf("stripCR(%q) = %q; want %q", test.have, got, test.want)
332		}
333	}
334}
335
336func checkSemi(t *testing.T, line string, mode Mode) {
337	var S Scanner
338	file := fset.AddFile("TestSemis", fset.Base(), len(line))
339	S.Init(file, []byte(line), nil, mode)
340	pos, tok, lit := S.Scan()
341	for tok != token.EOF {
342		if tok == token.ILLEGAL {
343			// the illegal token literal indicates what
344			// kind of semicolon literal to expect
345			semiLit := "\n"
346			if lit[0] == '#' {
347				semiLit = ";"
348			}
349			// next token must be a semicolon
350			semiPos := file.Position(pos)
351			semiPos.Offset++
352			semiPos.Column++
353			pos, tok, lit = S.Scan()
354			if tok == token.SEMICOLON {
355				if lit != semiLit {
356					t.Errorf(`bad literal for %q: got %q, expected %q`, line, lit, semiLit)
357				}
358				checkPos(t, line, pos, semiPos)
359			} else {
360				t.Errorf("bad token for %q: got %s, expected ;", line, tok)
361			}
362		} else if tok == token.SEMICOLON {
363			t.Errorf("bad token for %q: got ;, expected no ;", line)
364		}
365		pos, tok, lit = S.Scan()
366	}
367}
368
369var lines = []string{
370	// # indicates a semicolon present in the source
371	// $ indicates an automatically inserted semicolon
372	"",
373	"\ufeff#;", // first BOM is ignored
374	"#;",
375	"foo$\n",
376	"123$\n",
377	"1.2$\n",
378	"'x'$\n",
379	`"x"` + "$\n",
380	"`x`$\n",
381
382	"+\n",
383	"-\n",
384	"*\n",
385	"/\n",
386	"%\n",
387
388	"&\n",
389	"|\n",
390	"^\n",
391	"<<\n",
392	">>\n",
393	"&^\n",
394
395	"+=\n",
396	"-=\n",
397	"*=\n",
398	"/=\n",
399	"%=\n",
400
401	"&=\n",
402	"|=\n",
403	"^=\n",
404	"<<=\n",
405	">>=\n",
406	"&^=\n",
407
408	"&&\n",
409	"||\n",
410	"<-\n",
411	"++$\n",
412	"--$\n",
413
414	"==\n",
415	"<\n",
416	">\n",
417	"=\n",
418	"!\n",
419
420	"!=\n",
421	"<=\n",
422	">=\n",
423	":=\n",
424	"...\n",
425
426	"(\n",
427	"[\n",
428	"{\n",
429	",\n",
430	".\n",
431
432	")$\n",
433	"]$\n",
434	"}$\n",
435	"#;\n",
436	":\n",
437
438	"break$\n",
439	"case\n",
440	"chan\n",
441	"const\n",
442	"continue$\n",
443
444	"default\n",
445	"defer\n",
446	"else\n",
447	"fallthrough$\n",
448	"for\n",
449
450	"func\n",
451	"go\n",
452	"goto\n",
453	"if\n",
454	"import\n",
455
456	"interface\n",
457	"map\n",
458	"package\n",
459	"range\n",
460	"return$\n",
461
462	"select\n",
463	"struct\n",
464	"switch\n",
465	"type\n",
466	"var\n",
467
468	"foo$//comment\n",
469	"foo$//comment",
470	"foo$/*comment*/\n",
471	"foo$/*\n*/",
472	"foo$/*comment*/    \n",
473	"foo$/*\n*/    ",
474
475	"foo    $// comment\n",
476	"foo    $// comment",
477	"foo    $/*comment*/\n",
478	"foo    $/*\n*/",
479	"foo    $/*  */ /* \n */ bar$/**/\n",
480	"foo    $/*0*/ /*1*/ /*2*/\n",
481
482	"foo    $/*comment*/    \n",
483	"foo    $/*0*/ /*1*/ /*2*/    \n",
484	"foo	$/**/ /*-------------*/       /*----\n*/bar       $/*  \n*/baa$\n",
485	"foo    $/* an EOF terminates a line */",
486	"foo    $/* an EOF terminates a line */ /*",
487	"foo    $/* an EOF terminates a line */ //",
488
489	"package main$\n\nfunc main() {\n\tif {\n\t\treturn /* */ }$\n}$\n",
490	"package main$",
491}
492
493func TestSemis(t *testing.T) {
494	for _, line := range lines {
495		checkSemi(t, line, 0)
496		checkSemi(t, line, ScanComments)
497
498		// if the input ended in newlines, the input must tokenize the
499		// same with or without those newlines
500		for i := len(line) - 1; i >= 0 && line[i] == '\n'; i-- {
501			checkSemi(t, line[0:i], 0)
502			checkSemi(t, line[0:i], ScanComments)
503		}
504	}
505}
506
507type segment struct {
508	srcline      string // a line of source text
509	filename     string // filename for current token; error message for invalid line directives
510	line, column int    // line and column for current token; error position for invalid line directives
511}
512
513var segments = []segment{
514	// exactly one token per line since the test consumes one token per segment
515	{"  line1", "TestLineDirectives", 1, 3},
516	{"\nline2", "TestLineDirectives", 2, 1},
517	{"\nline3  //line File1.go:100", "TestLineDirectives", 3, 1}, // bad line comment, ignored
518	{"\nline4", "TestLineDirectives", 4, 1},
519	{"\n//line File1.go:100\n  line100", "File1.go", 100, 0},
520	{"\n//line  \t :42\n  line1", " \t ", 42, 0},
521	{"\n//line File2.go:200\n  line200", "File2.go", 200, 0},
522	{"\n//line foo\t:42\n  line42", "foo\t", 42, 0},
523	{"\n //line foo:42\n  line43", "foo\t", 44, 0}, // bad line comment, ignored (use existing, prior filename)
524	{"\n//line foo 42\n  line44", "foo\t", 46, 0},  // bad line comment, ignored (use existing, prior filename)
525	{"\n//line /bar:42\n  line45", "/bar", 42, 0},
526	{"\n//line ./foo:42\n  line46", "foo", 42, 0},
527	{"\n//line a/b/c/File1.go:100\n  line100", "a/b/c/File1.go", 100, 0},
528	{"\n//line c:\\bar:42\n  line200", "c:\\bar", 42, 0},
529	{"\n//line c:\\dir\\File1.go:100\n  line201", "c:\\dir\\File1.go", 100, 0},
530
531	// tests for new line directive syntax
532	{"\n//line :100\na1", "", 100, 0}, // missing filename means empty filename
533	{"\n//line bar:100\nb1", "bar", 100, 0},
534	{"\n//line :100:10\nc1", "bar", 100, 10}, // missing filename means current filename
535	{"\n//line foo:100:10\nd1", "foo", 100, 10},
536
537	{"\n/*line :100*/a2", "", 100, 0}, // missing filename means empty filename
538	{"\n/*line bar:100*/b2", "bar", 100, 0},
539	{"\n/*line :100:10*/c2", "bar", 100, 10}, // missing filename means current filename
540	{"\n/*line foo:100:10*/d2", "foo", 100, 10},
541	{"\n/*line foo:100:10*/    e2", "foo", 100, 14}, // line-directive relative column
542	{"\n/*line foo:100:10*/\n\nf2", "foo", 102, 1},  // absolute column since on new line
543}
544
545var dirsegments = []segment{
546	// exactly one token per line since the test consumes one token per segment
547	{"  line1", "TestLineDir/TestLineDirectives", 1, 3},
548	{"\n//line File1.go:100\n  line100", "TestLineDir/File1.go", 100, 0},
549}
550
551var dirUnixSegments = []segment{
552	{"\n//line /bar:42\n  line42", "/bar", 42, 0},
553}
554
555var dirWindowsSegments = []segment{
556	{"\n//line c:\\bar:42\n  line42", "c:\\bar", 42, 0},
557}
558
559// Verify that line directives are interpreted correctly.
560func TestLineDirectives(t *testing.T) {
561	testSegments(t, segments, "TestLineDirectives")
562	testSegments(t, dirsegments, "TestLineDir/TestLineDirectives")
563	if runtime.GOOS == "windows" {
564		testSegments(t, dirWindowsSegments, "TestLineDir/TestLineDirectives")
565	} else {
566		testSegments(t, dirUnixSegments, "TestLineDir/TestLineDirectives")
567	}
568}
569
570func testSegments(t *testing.T, segments []segment, filename string) {
571	var src string
572	for _, e := range segments {
573		src += e.srcline
574	}
575
576	// verify scan
577	var S Scanner
578	file := fset.AddFile(filename, fset.Base(), len(src))
579	S.Init(file, []byte(src), func(pos token.Position, msg string) { t.Error(Error{pos, msg}) }, dontInsertSemis)
580	for _, s := range segments {
581		p, _, lit := S.Scan()
582		pos := file.Position(p)
583		checkPos(t, lit, p, token.Position{
584			Filename: s.filename,
585			Offset:   pos.Offset,
586			Line:     s.line,
587			Column:   s.column,
588		})
589	}
590
591	if S.ErrorCount != 0 {
592		t.Errorf("got %d errors", S.ErrorCount)
593	}
594}
595
596// The filename is used for the error message in these test cases.
597// The first line directive is valid and used to control the expected error line.
598var invalidSegments = []segment{
599	{"\n//line :1:1\n//line foo:42 extra text\ndummy", "invalid line number: 42 extra text", 1, 12},
600	{"\n//line :2:1\n//line foobar:\ndummy", "invalid line number: ", 2, 15},
601	{"\n//line :5:1\n//line :0\ndummy", "invalid line number: 0", 5, 9},
602	{"\n//line :10:1\n//line :1:0\ndummy", "invalid column number: 0", 10, 11},
603	{"\n//line :1:1\n//line :foo:0\ndummy", "invalid line number: 0", 1, 13}, // foo is considered part of the filename
604}
605
606// Verify that invalid line directives get the correct error message.
607func TestInvalidLineDirectives(t *testing.T) {
608	// make source
609	var src string
610	for _, e := range invalidSegments {
611		src += e.srcline
612	}
613
614	// verify scan
615	var S Scanner
616	var s segment // current segment
617	file := fset.AddFile(filepath.Join("dir", "TestInvalidLineDirectives"), fset.Base(), len(src))
618	S.Init(file, []byte(src), func(pos token.Position, msg string) {
619		if msg != s.filename {
620			t.Errorf("got error %q; want %q", msg, s.filename)
621		}
622		if pos.Line != s.line || pos.Column != s.column {
623			t.Errorf("got position %d:%d; want %d:%d", pos.Line, pos.Column, s.line, s.column)
624		}
625	}, dontInsertSemis)
626	for _, s = range invalidSegments {
627		S.Scan()
628	}
629
630	if S.ErrorCount != len(invalidSegments) {
631		t.Errorf("go %d errors; want %d", S.ErrorCount, len(invalidSegments))
632	}
633}
634
635// Verify that initializing the same scanner more than once works correctly.
636func TestInit(t *testing.T) {
637	var s Scanner
638
639	// 1st init
640	src1 := "if true { }"
641	f1 := fset.AddFile("src1", fset.Base(), len(src1))
642	s.Init(f1, []byte(src1), nil, dontInsertSemis)
643	if f1.Size() != len(src1) {
644		t.Errorf("bad file size: got %d, expected %d", f1.Size(), len(src1))
645	}
646	s.Scan()              // if
647	s.Scan()              // true
648	_, tok, _ := s.Scan() // {
649	if tok != token.LBRACE {
650		t.Errorf("bad token: got %s, expected %s", tok, token.LBRACE)
651	}
652
653	// 2nd init
654	src2 := "go true { ]"
655	f2 := fset.AddFile("src2", fset.Base(), len(src2))
656	s.Init(f2, []byte(src2), nil, dontInsertSemis)
657	if f2.Size() != len(src2) {
658		t.Errorf("bad file size: got %d, expected %d", f2.Size(), len(src2))
659	}
660	_, tok, _ = s.Scan() // go
661	if tok != token.GO {
662		t.Errorf("bad token: got %s, expected %s", tok, token.GO)
663	}
664
665	if s.ErrorCount != 0 {
666		t.Errorf("found %d errors", s.ErrorCount)
667	}
668}
669
670func TestStdErrorHander(t *testing.T) {
671	const src = "@\n" + // illegal character, cause an error
672		"@ @\n" + // two errors on the same line
673		"//line File2:20\n" +
674		"@\n" + // different file, but same line
675		"//line File2:1\n" +
676		"@ @\n" + // same file, decreasing line number
677		"//line File1:1\n" +
678		"@ @ @" // original file, line 1 again
679
680	var list ErrorList
681	eh := func(pos token.Position, msg string) { list.Add(pos, msg) }
682
683	var s Scanner
684	s.Init(fset.AddFile("File1", fset.Base(), len(src)), []byte(src), eh, dontInsertSemis)
685	for {
686		if _, tok, _ := s.Scan(); tok == token.EOF {
687			break
688		}
689	}
690
691	if len(list) != s.ErrorCount {
692		t.Errorf("found %d errors, expected %d", len(list), s.ErrorCount)
693	}
694
695	if len(list) != 9 {
696		t.Errorf("found %d raw errors, expected 9", len(list))
697		PrintError(os.Stderr, list)
698	}
699
700	list.Sort()
701	if len(list) != 9 {
702		t.Errorf("found %d sorted errors, expected 9", len(list))
703		PrintError(os.Stderr, list)
704	}
705
706	list.RemoveMultiples()
707	if len(list) != 4 {
708		t.Errorf("found %d one-per-line errors, expected 4", len(list))
709		PrintError(os.Stderr, list)
710	}
711}
712
713type errorCollector struct {
714	cnt int            // number of errors encountered
715	msg string         // last error message encountered
716	pos token.Position // last error position encountered
717}
718
719func checkError(t *testing.T, src string, tok token.Token, pos int, lit, err string) {
720	var s Scanner
721	var h errorCollector
722	eh := func(pos token.Position, msg string) {
723		h.cnt++
724		h.msg = msg
725		h.pos = pos
726	}
727	s.Init(fset.AddFile("", fset.Base(), len(src)), []byte(src), eh, ScanComments|dontInsertSemis)
728	_, tok0, lit0 := s.Scan()
729	if tok0 != tok {
730		t.Errorf("%q: got %s, expected %s", src, tok0, tok)
731	}
732	if tok0 != token.ILLEGAL && lit0 != lit {
733		t.Errorf("%q: got literal %q, expected %q", src, lit0, lit)
734	}
735	cnt := 0
736	if err != "" {
737		cnt = 1
738	}
739	if h.cnt != cnt {
740		t.Errorf("%q: got cnt %d, expected %d", src, h.cnt, cnt)
741	}
742	if h.msg != err {
743		t.Errorf("%q: got msg %q, expected %q", src, h.msg, err)
744	}
745	if h.pos.Offset != pos {
746		t.Errorf("%q: got offset %d, expected %d", src, h.pos.Offset, pos)
747	}
748}
749
750var errors = []struct {
751	src string
752	tok token.Token
753	pos int
754	lit string
755	err string
756}{
757	{"\a", token.ILLEGAL, 0, "", "illegal character U+0007"},
758	{`#`, token.ILLEGAL, 0, "", "illegal character U+0023 '#'"},
759	{`…`, token.ILLEGAL, 0, "", "illegal character U+2026 '…'"},
760	{"..", token.PERIOD, 0, "", ""}, // two periods, not invalid token (issue #28112)
761	{`' '`, token.CHAR, 0, `' '`, ""},
762	{`''`, token.CHAR, 0, `''`, "illegal rune literal"},
763	{`'12'`, token.CHAR, 0, `'12'`, "illegal rune literal"},
764	{`'123'`, token.CHAR, 0, `'123'`, "illegal rune literal"},
765	{`'\0'`, token.CHAR, 3, `'\0'`, "illegal character U+0027 ''' in escape sequence"},
766	{`'\07'`, token.CHAR, 4, `'\07'`, "illegal character U+0027 ''' in escape sequence"},
767	{`'\8'`, token.CHAR, 2, `'\8'`, "unknown escape sequence"},
768	{`'\08'`, token.CHAR, 3, `'\08'`, "illegal character U+0038 '8' in escape sequence"},
769	{`'\x'`, token.CHAR, 3, `'\x'`, "illegal character U+0027 ''' in escape sequence"},
770	{`'\x0'`, token.CHAR, 4, `'\x0'`, "illegal character U+0027 ''' in escape sequence"},
771	{`'\x0g'`, token.CHAR, 4, `'\x0g'`, "illegal character U+0067 'g' in escape sequence"},
772	{`'\u'`, token.CHAR, 3, `'\u'`, "illegal character U+0027 ''' in escape sequence"},
773	{`'\u0'`, token.CHAR, 4, `'\u0'`, "illegal character U+0027 ''' in escape sequence"},
774	{`'\u00'`, token.CHAR, 5, `'\u00'`, "illegal character U+0027 ''' in escape sequence"},
775	{`'\u000'`, token.CHAR, 6, `'\u000'`, "illegal character U+0027 ''' in escape sequence"},
776	{`'\u000`, token.CHAR, 6, `'\u000`, "escape sequence not terminated"},
777	{`'\u0000'`, token.CHAR, 0, `'\u0000'`, ""},
778	{`'\U'`, token.CHAR, 3, `'\U'`, "illegal character U+0027 ''' in escape sequence"},
779	{`'\U0'`, token.CHAR, 4, `'\U0'`, "illegal character U+0027 ''' in escape sequence"},
780	{`'\U00'`, token.CHAR, 5, `'\U00'`, "illegal character U+0027 ''' in escape sequence"},
781	{`'\U000'`, token.CHAR, 6, `'\U000'`, "illegal character U+0027 ''' in escape sequence"},
782	{`'\U0000'`, token.CHAR, 7, `'\U0000'`, "illegal character U+0027 ''' in escape sequence"},
783	{`'\U00000'`, token.CHAR, 8, `'\U00000'`, "illegal character U+0027 ''' in escape sequence"},
784	{`'\U000000'`, token.CHAR, 9, `'\U000000'`, "illegal character U+0027 ''' in escape sequence"},
785	{`'\U0000000'`, token.CHAR, 10, `'\U0000000'`, "illegal character U+0027 ''' in escape sequence"},
786	{`'\U0000000`, token.CHAR, 10, `'\U0000000`, "escape sequence not terminated"},
787	{`'\U00000000'`, token.CHAR, 0, `'\U00000000'`, ""},
788	{`'\Uffffffff'`, token.CHAR, 2, `'\Uffffffff'`, "escape sequence is invalid Unicode code point"},
789	{`'`, token.CHAR, 0, `'`, "rune literal not terminated"},
790	{`'\`, token.CHAR, 2, `'\`, "escape sequence not terminated"},
791	{"'\n", token.CHAR, 0, "'", "rune literal not terminated"},
792	{"'\n   ", token.CHAR, 0, "'", "rune literal not terminated"},
793	{`""`, token.STRING, 0, `""`, ""},
794	{`"abc`, token.STRING, 0, `"abc`, "string literal not terminated"},
795	{"\"abc\n", token.STRING, 0, `"abc`, "string literal not terminated"},
796	{"\"abc\n   ", token.STRING, 0, `"abc`, "string literal not terminated"},
797	{"``", token.STRING, 0, "``", ""},
798	{"`", token.STRING, 0, "`", "raw string literal not terminated"},
799	{"/**/", token.COMMENT, 0, "/**/", ""},
800	{"/*", token.COMMENT, 0, "/*", "comment not terminated"},
801	{"077", token.INT, 0, "077", ""},
802	{"078.", token.FLOAT, 0, "078.", ""},
803	{"07801234567.", token.FLOAT, 0, "07801234567.", ""},
804	{"078e0", token.FLOAT, 0, "078e0", ""},
805	{"0E", token.FLOAT, 0, "0E", "illegal floating-point exponent"}, // issue 17621
806	{"078", token.INT, 0, "078", "illegal octal number"},
807	{"07800000009", token.INT, 0, "07800000009", "illegal octal number"},
808	{"0x", token.INT, 0, "0x", "illegal hexadecimal number"},
809	{"0X", token.INT, 0, "0X", "illegal hexadecimal number"},
810	{"\"abc\x00def\"", token.STRING, 4, "\"abc\x00def\"", "illegal character NUL"},
811	{"\"abc\x80def\"", token.STRING, 4, "\"abc\x80def\"", "illegal UTF-8 encoding"},
812	{"\ufeff\ufeff", token.ILLEGAL, 3, "\ufeff\ufeff", "illegal byte order mark"},                        // only first BOM is ignored
813	{"//\ufeff", token.COMMENT, 2, "//\ufeff", "illegal byte order mark"},                                // only first BOM is ignored
814	{"'\ufeff" + `'`, token.CHAR, 1, "'\ufeff" + `'`, "illegal byte order mark"},                         // only first BOM is ignored
815	{`"` + "abc\ufeffdef" + `"`, token.STRING, 4, `"` + "abc\ufeffdef" + `"`, "illegal byte order mark"}, // only first BOM is ignored
816}
817
818func TestScanErrors(t *testing.T) {
819	for _, e := range errors {
820		checkError(t, e.src, e.tok, e.pos, e.lit, e.err)
821	}
822}
823
824// Verify that no comments show up as literal values when skipping comments.
825func TestIssue10213(t *testing.T) {
826	const src = `
827		var (
828			A = 1 // foo
829		)
830
831		var (
832			B = 2
833			// foo
834		)
835
836		var C = 3 // foo
837
838		var D = 4
839		// foo
840
841		func anycode() {
842		// foo
843		}
844	`
845	var s Scanner
846	s.Init(fset.AddFile("", fset.Base(), len(src)), []byte(src), nil, 0)
847	for {
848		pos, tok, lit := s.Scan()
849		class := tokenclass(tok)
850		if lit != "" && class != keyword && class != literal && tok != token.SEMICOLON {
851			t.Errorf("%s: tok = %s, lit = %q", fset.Position(pos), tok, lit)
852		}
853		if tok <= token.EOF {
854			break
855		}
856	}
857}
858
859func TestIssue28112(t *testing.T) {
860	const src = "... .. 0.. .." // make sure to have stand-alone ".." immediately before EOF to test EOF behavior
861	tokens := []token.Token{token.ELLIPSIS, token.PERIOD, token.PERIOD, token.FLOAT, token.PERIOD, token.PERIOD, token.PERIOD, token.EOF}
862	var s Scanner
863	s.Init(fset.AddFile("", fset.Base(), len(src)), []byte(src), nil, 0)
864	for _, want := range tokens {
865		pos, got, lit := s.Scan()
866		if got != want {
867			t.Errorf("%s: got %s, want %s", fset.Position(pos), got, want)
868		}
869		// literals expect to have a (non-empty) literal string and we don't care about other tokens for this test
870		if tokenclass(got) == literal && lit == "" {
871			t.Errorf("%s: for %s got empty literal string", fset.Position(pos), got)
872		}
873	}
874}
875
876func BenchmarkScan(b *testing.B) {
877	b.StopTimer()
878	fset := token.NewFileSet()
879	file := fset.AddFile("", fset.Base(), len(source))
880	var s Scanner
881	b.StartTimer()
882	for i := 0; i < b.N; i++ {
883		s.Init(file, source, nil, ScanComments)
884		for {
885			_, tok, _ := s.Scan()
886			if tok == token.EOF {
887				break
888			}
889		}
890	}
891}
892
893func BenchmarkScanFile(b *testing.B) {
894	b.StopTimer()
895	const filename = "scanner.go"
896	src, err := ioutil.ReadFile(filename)
897	if err != nil {
898		panic(err)
899	}
900	fset := token.NewFileSet()
901	file := fset.AddFile(filename, fset.Base(), len(src))
902	b.SetBytes(int64(len(src)))
903	var s Scanner
904	b.StartTimer()
905	for i := 0; i < b.N; i++ {
906		s.Init(file, src, nil, ScanComments)
907		for {
908			_, tok, _ := s.Scan()
909			if tok == token.EOF {
910				break
911			}
912		}
913	}
914}
915