1package lexer
2
3import (
4	"bytes"
5	"fmt"
6	"io"
7	"strconv"
8	"strings"
9	"text/scanner"
10	"unicode/utf8"
11)
12
13// TextScannerLexer is a lexer that uses the text/scanner module.
14var (
15	TextScannerLexer Definition = &defaultDefinition{}
16
17	// DefaultDefinition defines properties for the default lexer.
18	DefaultDefinition = TextScannerLexer
19)
20
21type defaultDefinition struct{}
22
23func (d *defaultDefinition) Lex(r io.Reader) (Lexer, error) {
24	return Lex(r), nil
25}
26
27func (d *defaultDefinition) Symbols() map[string]rune {
28	return map[string]rune{
29		"EOF":       scanner.EOF,
30		"Char":      scanner.Char,
31		"Ident":     scanner.Ident,
32		"Int":       scanner.Int,
33		"Float":     scanner.Float,
34		"String":    scanner.String,
35		"RawString": scanner.RawString,
36		"Comment":   scanner.Comment,
37	}
38}
39
40// textScannerLexer is a Lexer based on text/scanner.Scanner
41type textScannerLexer struct {
42	scanner  *scanner.Scanner
43	filename string
44	err      error
45}
46
47// Lex an io.Reader with text/scanner.Scanner.
48//
49// This provides very fast lexing of source code compatible with Go tokens.
50//
51// Note that this differs from text/scanner.Scanner in that string tokens will be unquoted.
52func Lex(r io.Reader) Lexer {
53	s := &scanner.Scanner{}
54	s.Init(r)
55	lexer := lexWithScanner(r, s)
56	lexer.scanner.Error = func(s *scanner.Scanner, msg string) {
57		// This is to support single quoted strings. Hacky.
58		if !strings.HasSuffix(msg, "char literal") {
59			lexer.err = Errorf(Position(lexer.scanner.Pos()), msg)
60		}
61	}
62	return lexer
63}
64
65// LexWithScanner creates a Lexer from a user-provided scanner.Scanner.
66//
67// Useful if you need to customise the Scanner.
68func LexWithScanner(r io.Reader, scan *scanner.Scanner) Lexer {
69	return lexWithScanner(r, scan)
70}
71
72func lexWithScanner(r io.Reader, scan *scanner.Scanner) *textScannerLexer {
73	lexer := &textScannerLexer{
74		filename: NameOfReader(r),
75		scanner:  scan,
76	}
77	return lexer
78}
79
80// LexBytes returns a new default lexer over bytes.
81func LexBytes(b []byte) Lexer {
82	return Lex(bytes.NewReader(b))
83}
84
85// LexString returns a new default lexer over a string.
86func LexString(s string) Lexer {
87	return Lex(strings.NewReader(s))
88}
89
90func (t *textScannerLexer) Next() (Token, error) {
91	typ := t.scanner.Scan()
92	text := t.scanner.TokenText()
93	pos := Position(t.scanner.Position)
94	pos.Filename = t.filename
95	if t.err != nil {
96		return Token{}, t.err
97	}
98	return textScannerTransform(Token{
99		Type:  typ,
100		Value: text,
101		Pos:   pos,
102	})
103}
104
105func textScannerTransform(token Token) (Token, error) {
106	// Unquote strings.
107	switch token.Type {
108	case scanner.Char:
109		// FIXME(alec): This is pretty hacky...we convert a single quoted char into a double
110		// quoted string in order to support single quoted strings.
111		token.Value = fmt.Sprintf("\"%s\"", token.Value[1:len(token.Value)-1])
112		fallthrough
113	case scanner.String:
114		s, err := strconv.Unquote(token.Value)
115		if err != nil {
116			return Token{}, Errorf(token.Pos, "%s: %q", err.Error(), token.Value)
117		}
118		token.Value = s
119		if token.Type == scanner.Char && utf8.RuneCountInString(s) > 1 {
120			token.Type = scanner.String
121		}
122	case scanner.RawString:
123		token.Value = token.Value[1 : len(token.Value)-1]
124	}
125	return token, nil
126}
127