1// Copyright 2009 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// Package scanner implements a scanner for Go source text.
6// It takes a []byte as source which can then be tokenized
7// through repeated calls to the Scan method.
8//
9package scanner
10
11import (
12	"bytes"
13	"fmt"
14	"go/token"
15	"path/filepath"
16	"strconv"
17	"unicode"
18	"unicode/utf8"
19)
20
21// An ErrorHandler may be provided to Scanner.Init. If a syntax error is
22// encountered and a handler was installed, the handler is called with a
23// position and an error message. The position points to the beginning of
24// the offending token.
25//
26type ErrorHandler func(pos token.Position, msg string)
27
28// A Scanner holds the scanner's internal state while processing
29// a given text. It can be allocated as part of another data
30// structure but must be initialized via Init before use.
31//
32type Scanner struct {
33	// immutable state
34	file *token.File  // source file handle
35	dir  string       // directory portion of file.Name()
36	src  []byte       // source
37	err  ErrorHandler // error reporting; or nil
38	mode Mode         // scanning mode
39
40	// scanning state
41	ch         rune // current character
42	offset     int  // character offset
43	rdOffset   int  // reading offset (position after current character)
44	lineOffset int  // current line offset
45	insertSemi bool // insert a semicolon before next newline
46
47	// public state - ok to modify
48	ErrorCount int // number of errors encountered
49}
50
51const bom = 0xFEFF // byte order mark, only permitted as very first character
52
53// Read the next Unicode char into s.ch.
54// s.ch < 0 means end-of-file.
55//
56func (s *Scanner) next() {
57	if s.rdOffset < len(s.src) {
58		s.offset = s.rdOffset
59		if s.ch == '\n' {
60			s.lineOffset = s.offset
61			s.file.AddLine(s.offset)
62		}
63		r, w := rune(s.src[s.rdOffset]), 1
64		switch {
65		case r == 0:
66			s.error(s.offset, "illegal character NUL")
67		case r >= utf8.RuneSelf:
68			// not ASCII
69			r, w = utf8.DecodeRune(s.src[s.rdOffset:])
70			if r == utf8.RuneError && w == 1 {
71				s.error(s.offset, "illegal UTF-8 encoding")
72			} else if r == bom && s.offset > 0 {
73				s.error(s.offset, "illegal byte order mark")
74			}
75		}
76		s.rdOffset += w
77		s.ch = r
78	} else {
79		s.offset = len(s.src)
80		if s.ch == '\n' {
81			s.lineOffset = s.offset
82			s.file.AddLine(s.offset)
83		}
84		s.ch = -1 // eof
85	}
86}
87
88// peek returns the byte following the most recently read character without
89// advancing the scanner. If the scanner is at EOF, peek returns 0.
90func (s *Scanner) peek() byte {
91	if s.rdOffset < len(s.src) {
92		return s.src[s.rdOffset]
93	}
94	return 0
95}
96
97// A mode value is a set of flags (or 0).
98// They control scanner behavior.
99//
100type Mode uint
101
102const (
103	ScanComments    Mode = 1 << iota // return comments as COMMENT tokens
104	dontInsertSemis                  // do not automatically insert semicolons - for testing only
105)
106
107// Init prepares the scanner s to tokenize the text src by setting the
108// scanner at the beginning of src. The scanner uses the file set file
109// for position information and it adds line information for each line.
110// It is ok to re-use the same file when re-scanning the same file as
111// line information which is already present is ignored. Init causes a
112// panic if the file size does not match the src size.
113//
114// Calls to Scan will invoke the error handler err if they encounter a
115// syntax error and err is not nil. Also, for each error encountered,
116// the Scanner field ErrorCount is incremented by one. The mode parameter
117// determines how comments are handled.
118//
119// Note that Init may call err if there is an error in the first character
120// of the file.
121//
122func (s *Scanner) Init(file *token.File, src []byte, err ErrorHandler, mode Mode) {
123	// Explicitly initialize all fields since a scanner may be reused.
124	if file.Size() != len(src) {
125		panic(fmt.Sprintf("file size (%d) does not match src len (%d)", file.Size(), len(src)))
126	}
127	s.file = file
128	s.dir, _ = filepath.Split(file.Name())
129	s.src = src
130	s.err = err
131	s.mode = mode
132
133	s.ch = ' '
134	s.offset = 0
135	s.rdOffset = 0
136	s.lineOffset = 0
137	s.insertSemi = false
138	s.ErrorCount = 0
139
140	s.next()
141	if s.ch == bom {
142		s.next() // ignore BOM at file beginning
143	}
144}
145
146func (s *Scanner) error(offs int, msg string) {
147	if s.err != nil {
148		s.err(s.file.Position(s.file.Pos(offs)), msg)
149	}
150	s.ErrorCount++
151}
152
153func (s *Scanner) errorf(offs int, format string, args ...interface{}) {
154	s.error(offs, fmt.Sprintf(format, args...))
155}
156
157func (s *Scanner) scanComment() string {
158	// initial '/' already consumed; s.ch == '/' || s.ch == '*'
159	offs := s.offset - 1 // position of initial '/'
160	next := -1           // position immediately following the comment; < 0 means invalid comment
161	numCR := 0
162
163	if s.ch == '/' {
164		//-style comment
165		// (the final '\n' is not considered part of the comment)
166		s.next()
167		for s.ch != '\n' && s.ch >= 0 {
168			if s.ch == '\r' {
169				numCR++
170			}
171			s.next()
172		}
173		// if we are at '\n', the position following the comment is afterwards
174		next = s.offset
175		if s.ch == '\n' {
176			next++
177		}
178		goto exit
179	}
180
181	/*-style comment */
182	s.next()
183	for s.ch >= 0 {
184		ch := s.ch
185		if ch == '\r' {
186			numCR++
187		}
188		s.next()
189		if ch == '*' && s.ch == '/' {
190			s.next()
191			next = s.offset
192			goto exit
193		}
194	}
195
196	s.error(offs, "comment not terminated")
197
198exit:
199	lit := s.src[offs:s.offset]
200
201	// On Windows, a (//-comment) line may end in "\r\n".
202	// Remove the final '\r' before analyzing the text for
203	// line directives (matching the compiler). Remove any
204	// other '\r' afterwards (matching the pre-existing be-
205	// havior of the scanner).
206	if numCR > 0 && len(lit) >= 2 && lit[1] == '/' && lit[len(lit)-1] == '\r' {
207		lit = lit[:len(lit)-1]
208		numCR--
209	}
210
211	// interpret line directives
212	// (//line directives must start at the beginning of the current line)
213	if next >= 0 /* implies valid comment */ && (lit[1] == '*' || offs == s.lineOffset) && bytes.HasPrefix(lit[2:], prefix) {
214		s.updateLineInfo(next, offs, lit)
215	}
216
217	if numCR > 0 {
218		lit = stripCR(lit, lit[1] == '*')
219	}
220
221	return string(lit)
222}
223
224var prefix = []byte("line ")
225
226// updateLineInfo parses the incoming comment text at offset offs
227// as a line directive. If successful, it updates the line info table
228// for the position next per the line directive.
229func (s *Scanner) updateLineInfo(next, offs int, text []byte) {
230	// extract comment text
231	if text[1] == '*' {
232		text = text[:len(text)-2] // lop off trailing "*/"
233	}
234	text = text[7:] // lop off leading "//line " or "/*line "
235	offs += 7
236
237	i, n, ok := trailingDigits(text)
238	if i == 0 {
239		return // ignore (not a line directive)
240	}
241	// i > 0
242
243	if !ok {
244		// text has a suffix :xxx but xxx is not a number
245		s.error(offs+i, "invalid line number: "+string(text[i:]))
246		return
247	}
248
249	var line, col int
250	i2, n2, ok2 := trailingDigits(text[:i-1])
251	if ok2 {
252		//line filename:line:col
253		i, i2 = i2, i
254		line, col = n2, n
255		if col == 0 {
256			s.error(offs+i2, "invalid column number: "+string(text[i2:]))
257			return
258		}
259		text = text[:i2-1] // lop off ":col"
260	} else {
261		//line filename:line
262		line = n
263	}
264
265	if line == 0 {
266		s.error(offs+i, "invalid line number: "+string(text[i:]))
267		return
268	}
269
270	// If we have a column (//line filename:line:col form),
271	// an empty filename means to use the previous filename.
272	filename := string(text[:i-1]) // lop off ":line", and trim white space
273	if filename == "" && ok2 {
274		filename = s.file.Position(s.file.Pos(offs)).Filename
275	} else if filename != "" {
276		// Put a relative filename in the current directory.
277		// This is for compatibility with earlier releases.
278		// See issue 26671.
279		filename = filepath.Clean(filename)
280		if !filepath.IsAbs(filename) {
281			filename = filepath.Join(s.dir, filename)
282		}
283	}
284
285	s.file.AddLineColumnInfo(next, filename, line, col)
286}
287
288func trailingDigits(text []byte) (int, int, bool) {
289	i := bytes.LastIndexByte(text, ':') // look from right (Windows filenames may contain ':')
290	if i < 0 {
291		return 0, 0, false // no ":"
292	}
293	// i >= 0
294	n, err := strconv.ParseUint(string(text[i+1:]), 10, 0)
295	return i + 1, int(n), err == nil
296}
297
298func (s *Scanner) findLineEnd() bool {
299	// initial '/' already consumed
300
301	defer func(offs int) {
302		// reset scanner state to where it was upon calling findLineEnd
303		s.ch = '/'
304		s.offset = offs
305		s.rdOffset = offs + 1
306		s.next() // consume initial '/' again
307	}(s.offset - 1)
308
309	// read ahead until a newline, EOF, or non-comment token is found
310	for s.ch == '/' || s.ch == '*' {
311		if s.ch == '/' {
312			//-style comment always contains a newline
313			return true
314		}
315		/*-style comment: look for newline */
316		s.next()
317		for s.ch >= 0 {
318			ch := s.ch
319			if ch == '\n' {
320				return true
321			}
322			s.next()
323			if ch == '*' && s.ch == '/' {
324				s.next()
325				break
326			}
327		}
328		s.skipWhitespace() // s.insertSemi is set
329		if s.ch < 0 || s.ch == '\n' {
330			return true
331		}
332		if s.ch != '/' {
333			// non-comment token
334			return false
335		}
336		s.next() // consume '/'
337	}
338
339	return false
340}
341
342func isLetter(ch rune) bool {
343	return 'a' <= lower(ch) && lower(ch) <= 'z' || ch == '_' || ch >= utf8.RuneSelf && unicode.IsLetter(ch)
344}
345
346func isDigit(ch rune) bool {
347	return isDecimal(ch) || ch >= utf8.RuneSelf && unicode.IsDigit(ch)
348}
349
350func (s *Scanner) scanIdentifier() string {
351	offs := s.offset
352	for isLetter(s.ch) || isDigit(s.ch) {
353		s.next()
354	}
355	return string(s.src[offs:s.offset])
356}
357
358func digitVal(ch rune) int {
359	switch {
360	case '0' <= ch && ch <= '9':
361		return int(ch - '0')
362	case 'a' <= lower(ch) && lower(ch) <= 'f':
363		return int(lower(ch) - 'a' + 10)
364	}
365	return 16 // larger than any legal digit val
366}
367
368func lower(ch rune) rune     { return ('a' - 'A') | ch } // returns lower-case ch iff ch is ASCII letter
369func isDecimal(ch rune) bool { return '0' <= ch && ch <= '9' }
370func isHex(ch rune) bool     { return '0' <= ch && ch <= '9' || 'a' <= lower(ch) && lower(ch) <= 'f' }
371
372// digits accepts the sequence { digit | '_' }.
373// If base <= 10, digits accepts any decimal digit but records
374// the offset (relative to the source start) of a digit >= base
375// in *invalid, if *invalid < 0.
376// digits returns a bitset describing whether the sequence contained
377// digits (bit 0 is set), or separators '_' (bit 1 is set).
378func (s *Scanner) digits(base int, invalid *int) (digsep int) {
379	if base <= 10 {
380		max := rune('0' + base)
381		for isDecimal(s.ch) || s.ch == '_' {
382			ds := 1
383			if s.ch == '_' {
384				ds = 2
385			} else if s.ch >= max && *invalid < 0 {
386				*invalid = int(s.offset) // record invalid rune offset
387			}
388			digsep |= ds
389			s.next()
390		}
391	} else {
392		for isHex(s.ch) || s.ch == '_' {
393			ds := 1
394			if s.ch == '_' {
395				ds = 2
396			}
397			digsep |= ds
398			s.next()
399		}
400	}
401	return
402}
403
404func (s *Scanner) scanNumber() (token.Token, string) {
405	offs := s.offset
406	tok := token.ILLEGAL
407
408	base := 10        // number base
409	prefix := rune(0) // one of 0 (decimal), '0' (0-octal), 'x', 'o', or 'b'
410	digsep := 0       // bit 0: digit present, bit 1: '_' present
411	invalid := -1     // index of invalid digit in literal, or < 0
412
413	// integer part
414	if s.ch != '.' {
415		tok = token.INT
416		if s.ch == '0' {
417			s.next()
418			switch lower(s.ch) {
419			case 'x':
420				s.next()
421				base, prefix = 16, 'x'
422			case 'o':
423				s.next()
424				base, prefix = 8, 'o'
425			case 'b':
426				s.next()
427				base, prefix = 2, 'b'
428			default:
429				base, prefix = 8, '0'
430				digsep = 1 // leading 0
431			}
432		}
433		digsep |= s.digits(base, &invalid)
434	}
435
436	// fractional part
437	if s.ch == '.' {
438		tok = token.FLOAT
439		if prefix == 'o' || prefix == 'b' {
440			s.error(s.offset, "invalid radix point in "+litname(prefix))
441		}
442		s.next()
443		digsep |= s.digits(base, &invalid)
444	}
445
446	if digsep&1 == 0 {
447		s.error(s.offset, litname(prefix)+" has no digits")
448	}
449
450	// exponent
451	if e := lower(s.ch); e == 'e' || e == 'p' {
452		switch {
453		case e == 'e' && prefix != 0 && prefix != '0':
454			s.errorf(s.offset, "%q exponent requires decimal mantissa", s.ch)
455		case e == 'p' && prefix != 'x':
456			s.errorf(s.offset, "%q exponent requires hexadecimal mantissa", s.ch)
457		}
458		s.next()
459		tok = token.FLOAT
460		if s.ch == '+' || s.ch == '-' {
461			s.next()
462		}
463		ds := s.digits(10, nil)
464		digsep |= ds
465		if ds&1 == 0 {
466			s.error(s.offset, "exponent has no digits")
467		}
468	} else if prefix == 'x' && tok == token.FLOAT {
469		s.error(s.offset, "hexadecimal mantissa requires a 'p' exponent")
470	}
471
472	// suffix 'i'
473	if s.ch == 'i' {
474		tok = token.IMAG
475		s.next()
476	}
477
478	lit := string(s.src[offs:s.offset])
479	if tok == token.INT && invalid >= 0 {
480		s.errorf(invalid, "invalid digit %q in %s", lit[invalid-offs], litname(prefix))
481	}
482	if digsep&2 != 0 {
483		if i := invalidSep(lit); i >= 0 {
484			s.error(offs+i, "'_' must separate successive digits")
485		}
486	}
487
488	return tok, lit
489}
490
491func litname(prefix rune) string {
492	switch prefix {
493	case 'x':
494		return "hexadecimal literal"
495	case 'o', '0':
496		return "octal literal"
497	case 'b':
498		return "binary literal"
499	}
500	return "decimal literal"
501}
502
503// invalidSep returns the index of the first invalid separator in x, or -1.
504func invalidSep(x string) int {
505	x1 := ' ' // prefix char, we only care if it's 'x'
506	d := '.'  // digit, one of '_', '0' (a digit), or '.' (anything else)
507	i := 0
508
509	// a prefix counts as a digit
510	if len(x) >= 2 && x[0] == '0' {
511		x1 = lower(rune(x[1]))
512		if x1 == 'x' || x1 == 'o' || x1 == 'b' {
513			d = '0'
514			i = 2
515		}
516	}
517
518	// mantissa and exponent
519	for ; i < len(x); i++ {
520		p := d // previous digit
521		d = rune(x[i])
522		switch {
523		case d == '_':
524			if p != '0' {
525				return i
526			}
527		case isDecimal(d) || x1 == 'x' && isHex(d):
528			d = '0'
529		default:
530			if p == '_' {
531				return i - 1
532			}
533			d = '.'
534		}
535	}
536	if d == '_' {
537		return len(x) - 1
538	}
539
540	return -1
541}
542
543// scanEscape parses an escape sequence where rune is the accepted
544// escaped quote. In case of a syntax error, it stops at the offending
545// character (without consuming it) and returns false. Otherwise
546// it returns true.
547func (s *Scanner) scanEscape(quote rune) bool {
548	offs := s.offset
549
550	var n int
551	var base, max uint32
552	switch s.ch {
553	case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', quote:
554		s.next()
555		return true
556	case '0', '1', '2', '3', '4', '5', '6', '7':
557		n, base, max = 3, 8, 255
558	case 'x':
559		s.next()
560		n, base, max = 2, 16, 255
561	case 'u':
562		s.next()
563		n, base, max = 4, 16, unicode.MaxRune
564	case 'U':
565		s.next()
566		n, base, max = 8, 16, unicode.MaxRune
567	default:
568		msg := "unknown escape sequence"
569		if s.ch < 0 {
570			msg = "escape sequence not terminated"
571		}
572		s.error(offs, msg)
573		return false
574	}
575
576	var x uint32
577	for n > 0 {
578		d := uint32(digitVal(s.ch))
579		if d >= base {
580			msg := fmt.Sprintf("illegal character %#U in escape sequence", s.ch)
581			if s.ch < 0 {
582				msg = "escape sequence not terminated"
583			}
584			s.error(s.offset, msg)
585			return false
586		}
587		x = x*base + d
588		s.next()
589		n--
590	}
591
592	if x > max || 0xD800 <= x && x < 0xE000 {
593		s.error(offs, "escape sequence is invalid Unicode code point")
594		return false
595	}
596
597	return true
598}
599
600func (s *Scanner) scanRune() string {
601	// '\'' opening already consumed
602	offs := s.offset - 1
603
604	valid := true
605	n := 0
606	for {
607		ch := s.ch
608		if ch == '\n' || ch < 0 {
609			// only report error if we don't have one already
610			if valid {
611				s.error(offs, "rune literal not terminated")
612				valid = false
613			}
614			break
615		}
616		s.next()
617		if ch == '\'' {
618			break
619		}
620		n++
621		if ch == '\\' {
622			if !s.scanEscape('\'') {
623				valid = false
624			}
625			// continue to read to closing quote
626		}
627	}
628
629	if valid && n != 1 {
630		s.error(offs, "illegal rune literal")
631	}
632
633	return string(s.src[offs:s.offset])
634}
635
636func (s *Scanner) scanString() string {
637	// '"' opening already consumed
638	offs := s.offset - 1
639
640	for {
641		ch := s.ch
642		if ch == '\n' || ch < 0 {
643			s.error(offs, "string literal not terminated")
644			break
645		}
646		s.next()
647		if ch == '"' {
648			break
649		}
650		if ch == '\\' {
651			s.scanEscape('"')
652		}
653	}
654
655	return string(s.src[offs:s.offset])
656}
657
658func stripCR(b []byte, comment bool) []byte {
659	c := make([]byte, len(b))
660	i := 0
661	for j, ch := range b {
662		// In a /*-style comment, don't strip \r from *\r/ (incl.
663		// sequences of \r from *\r\r...\r/) since the resulting
664		// */ would terminate the comment too early unless the \r
665		// is immediately following the opening /* in which case
666		// it's ok because /*/ is not closed yet (issue #11151).
667		if ch != '\r' || comment && i > len("/*") && c[i-1] == '*' && j+1 < len(b) && b[j+1] == '/' {
668			c[i] = ch
669			i++
670		}
671	}
672	return c[:i]
673}
674
675func (s *Scanner) scanRawString() string {
676	// '`' opening already consumed
677	offs := s.offset - 1
678
679	hasCR := false
680	for {
681		ch := s.ch
682		if ch < 0 {
683			s.error(offs, "raw string literal not terminated")
684			break
685		}
686		s.next()
687		if ch == '`' {
688			break
689		}
690		if ch == '\r' {
691			hasCR = true
692		}
693	}
694
695	lit := s.src[offs:s.offset]
696	if hasCR {
697		lit = stripCR(lit, false)
698	}
699
700	return string(lit)
701}
702
703func (s *Scanner) skipWhitespace() {
704	for s.ch == ' ' || s.ch == '\t' || s.ch == '\n' && !s.insertSemi || s.ch == '\r' {
705		s.next()
706	}
707}
708
709// Helper functions for scanning multi-byte tokens such as >> += >>= .
710// Different routines recognize different length tok_i based on matches
711// of ch_i. If a token ends in '=', the result is tok1 or tok3
712// respectively. Otherwise, the result is tok0 if there was no other
713// matching character, or tok2 if the matching character was ch2.
714
715func (s *Scanner) switch2(tok0, tok1 token.Token) token.Token {
716	if s.ch == '=' {
717		s.next()
718		return tok1
719	}
720	return tok0
721}
722
723func (s *Scanner) switch3(tok0, tok1 token.Token, ch2 rune, tok2 token.Token) token.Token {
724	if s.ch == '=' {
725		s.next()
726		return tok1
727	}
728	if s.ch == ch2 {
729		s.next()
730		return tok2
731	}
732	return tok0
733}
734
735func (s *Scanner) switch4(tok0, tok1 token.Token, ch2 rune, tok2, tok3 token.Token) token.Token {
736	if s.ch == '=' {
737		s.next()
738		return tok1
739	}
740	if s.ch == ch2 {
741		s.next()
742		if s.ch == '=' {
743			s.next()
744			return tok3
745		}
746		return tok2
747	}
748	return tok0
749}
750
751// Scan scans the next token and returns the token position, the token,
752// and its literal string if applicable. The source end is indicated by
753// token.EOF.
754//
755// If the returned token is a literal (token.IDENT, token.INT, token.FLOAT,
756// token.IMAG, token.CHAR, token.STRING) or token.COMMENT, the literal string
757// has the corresponding value.
758//
759// If the returned token is a keyword, the literal string is the keyword.
760//
761// If the returned token is token.SEMICOLON, the corresponding
762// literal string is ";" if the semicolon was present in the source,
763// and "\n" if the semicolon was inserted because of a newline or
764// at EOF.
765//
766// If the returned token is token.ILLEGAL, the literal string is the
767// offending character.
768//
769// In all other cases, Scan returns an empty literal string.
770//
771// For more tolerant parsing, Scan will return a valid token if
772// possible even if a syntax error was encountered. Thus, even
773// if the resulting token sequence contains no illegal tokens,
774// a client may not assume that no error occurred. Instead it
775// must check the scanner's ErrorCount or the number of calls
776// of the error handler, if there was one installed.
777//
778// Scan adds line information to the file added to the file
779// set with Init. Token positions are relative to that file
780// and thus relative to the file set.
781//
782func (s *Scanner) Scan() (pos token.Pos, tok token.Token, lit string) {
783scanAgain:
784	s.skipWhitespace()
785
786	// current token start
787	pos = s.file.Pos(s.offset)
788
789	// determine token value
790	insertSemi := false
791	switch ch := s.ch; {
792	case isLetter(ch):
793		lit = s.scanIdentifier()
794		if len(lit) > 1 {
795			// keywords are longer than one letter - avoid lookup otherwise
796			tok = token.Lookup(lit)
797			switch tok {
798			case token.IDENT, token.BREAK, token.CONTINUE, token.FALLTHROUGH, token.RETURN:
799				insertSemi = true
800			}
801		} else {
802			insertSemi = true
803			tok = token.IDENT
804		}
805	case isDecimal(ch) || ch == '.' && isDecimal(rune(s.peek())):
806		insertSemi = true
807		tok, lit = s.scanNumber()
808	default:
809		s.next() // always make progress
810		switch ch {
811		case -1:
812			if s.insertSemi {
813				s.insertSemi = false // EOF consumed
814				return pos, token.SEMICOLON, "\n"
815			}
816			tok = token.EOF
817		case '\n':
818			// we only reach here if s.insertSemi was
819			// set in the first place and exited early
820			// from s.skipWhitespace()
821			s.insertSemi = false // newline consumed
822			return pos, token.SEMICOLON, "\n"
823		case '"':
824			insertSemi = true
825			tok = token.STRING
826			lit = s.scanString()
827		case '\'':
828			insertSemi = true
829			tok = token.CHAR
830			lit = s.scanRune()
831		case '`':
832			insertSemi = true
833			tok = token.STRING
834			lit = s.scanRawString()
835		case ':':
836			tok = s.switch2(token.COLON, token.DEFINE)
837		case '.':
838			// fractions starting with a '.' are handled by outer switch
839			tok = token.PERIOD
840			if s.ch == '.' && s.peek() == '.' {
841				s.next()
842				s.next() // consume last '.'
843				tok = token.ELLIPSIS
844			}
845		case ',':
846			tok = token.COMMA
847		case ';':
848			tok = token.SEMICOLON
849			lit = ";"
850		case '(':
851			tok = token.LPAREN
852		case ')':
853			insertSemi = true
854			tok = token.RPAREN
855		case '[':
856			tok = token.LBRACK
857		case ']':
858			insertSemi = true
859			tok = token.RBRACK
860		case '{':
861			tok = token.LBRACE
862		case '}':
863			insertSemi = true
864			tok = token.RBRACE
865		case '+':
866			tok = s.switch3(token.ADD, token.ADD_ASSIGN, '+', token.INC)
867			if tok == token.INC {
868				insertSemi = true
869			}
870		case '-':
871			tok = s.switch3(token.SUB, token.SUB_ASSIGN, '-', token.DEC)
872			if tok == token.DEC {
873				insertSemi = true
874			}
875		case '*':
876			tok = s.switch2(token.MUL, token.MUL_ASSIGN)
877		case '/':
878			if s.ch == '/' || s.ch == '*' {
879				// comment
880				if s.insertSemi && s.findLineEnd() {
881					// reset position to the beginning of the comment
882					s.ch = '/'
883					s.offset = s.file.Offset(pos)
884					s.rdOffset = s.offset + 1
885					s.insertSemi = false // newline consumed
886					return pos, token.SEMICOLON, "\n"
887				}
888				comment := s.scanComment()
889				if s.mode&ScanComments == 0 {
890					// skip comment
891					s.insertSemi = false // newline consumed
892					goto scanAgain
893				}
894				tok = token.COMMENT
895				lit = comment
896			} else {
897				tok = s.switch2(token.QUO, token.QUO_ASSIGN)
898			}
899		case '%':
900			tok = s.switch2(token.REM, token.REM_ASSIGN)
901		case '^':
902			tok = s.switch2(token.XOR, token.XOR_ASSIGN)
903		case '<':
904			if s.ch == '-' {
905				s.next()
906				tok = token.ARROW
907			} else {
908				tok = s.switch4(token.LSS, token.LEQ, '<', token.SHL, token.SHL_ASSIGN)
909			}
910		case '>':
911			tok = s.switch4(token.GTR, token.GEQ, '>', token.SHR, token.SHR_ASSIGN)
912		case '=':
913			tok = s.switch2(token.ASSIGN, token.EQL)
914		case '!':
915			tok = s.switch2(token.NOT, token.NEQ)
916		case '&':
917			if s.ch == '^' {
918				s.next()
919				tok = s.switch2(token.AND_NOT, token.AND_NOT_ASSIGN)
920			} else {
921				tok = s.switch3(token.AND, token.AND_ASSIGN, '&', token.LAND)
922			}
923		case '|':
924			tok = s.switch3(token.OR, token.OR_ASSIGN, '|', token.LOR)
925		default:
926			// next reports unexpected BOMs - don't repeat
927			if ch != bom {
928				s.errorf(s.file.Offset(pos), "illegal character %#U", ch)
929			}
930			insertSemi = s.insertSemi // preserve insertSemi info
931			tok = token.ILLEGAL
932			lit = string(ch)
933		}
934	}
935	if s.mode&dontInsertSemis == 0 {
936		s.insertSemi = insertSemi
937	}
938
939	return
940}
941