1package lexer 2 3import ( 4 "bytes" 5 "fmt" 6 "io" 7 "strconv" 8 "strings" 9 "text/scanner" 10 "unicode/utf8" 11) 12 13// TextScannerLexer is a lexer that uses the text/scanner module. 14var ( 15 TextScannerLexer Definition = &defaultDefinition{} 16 17 // DefaultDefinition defines properties for the default lexer. 18 DefaultDefinition = TextScannerLexer 19) 20 21type defaultDefinition struct{} 22 23func (d *defaultDefinition) Lex(r io.Reader) (Lexer, error) { 24 return Lex(r), nil 25} 26 27func (d *defaultDefinition) Symbols() map[string]rune { 28 return map[string]rune{ 29 "EOF": scanner.EOF, 30 "Char": scanner.Char, 31 "Ident": scanner.Ident, 32 "Int": scanner.Int, 33 "Float": scanner.Float, 34 "String": scanner.String, 35 "RawString": scanner.RawString, 36 "Comment": scanner.Comment, 37 } 38} 39 40// textScannerLexer is a Lexer based on text/scanner.Scanner 41type textScannerLexer struct { 42 scanner *scanner.Scanner 43 filename string 44 err error 45} 46 47// Lex an io.Reader with text/scanner.Scanner. 48// 49// This provides very fast lexing of source code compatible with Go tokens. 50// 51// Note that this differs from text/scanner.Scanner in that string tokens will be unquoted. 52func Lex(r io.Reader) Lexer { 53 s := &scanner.Scanner{} 54 s.Init(r) 55 lexer := lexWithScanner(r, s) 56 lexer.scanner.Error = func(s *scanner.Scanner, msg string) { 57 // This is to support single quoted strings. Hacky. 58 if !strings.HasSuffix(msg, "char literal") { 59 lexer.err = Errorf(Position(lexer.scanner.Pos()), msg) 60 } 61 } 62 return lexer 63} 64 65// LexWithScanner creates a Lexer from a user-provided scanner.Scanner. 66// 67// Useful if you need to customise the Scanner. 68func LexWithScanner(r io.Reader, scan *scanner.Scanner) Lexer { 69 return lexWithScanner(r, scan) 70} 71 72func lexWithScanner(r io.Reader, scan *scanner.Scanner) *textScannerLexer { 73 lexer := &textScannerLexer{ 74 filename: NameOfReader(r), 75 scanner: scan, 76 } 77 return lexer 78} 79 80// LexBytes returns a new default lexer over bytes. 81func LexBytes(b []byte) Lexer { 82 return Lex(bytes.NewReader(b)) 83} 84 85// LexString returns a new default lexer over a string. 86func LexString(s string) Lexer { 87 return Lex(strings.NewReader(s)) 88} 89 90func (t *textScannerLexer) Next() (Token, error) { 91 typ := t.scanner.Scan() 92 text := t.scanner.TokenText() 93 pos := Position(t.scanner.Position) 94 pos.Filename = t.filename 95 if t.err != nil { 96 return Token{}, t.err 97 } 98 return textScannerTransform(Token{ 99 Type: typ, 100 Value: text, 101 Pos: pos, 102 }) 103} 104 105func textScannerTransform(token Token) (Token, error) { 106 // Unquote strings. 107 switch token.Type { 108 case scanner.Char: 109 // FIXME(alec): This is pretty hacky...we convert a single quoted char into a double 110 // quoted string in order to support single quoted strings. 111 token.Value = fmt.Sprintf("\"%s\"", token.Value[1:len(token.Value)-1]) 112 fallthrough 113 case scanner.String: 114 s, err := strconv.Unquote(token.Value) 115 if err != nil { 116 return Token{}, Errorf(token.Pos, "%s: %q", err.Error(), token.Value) 117 } 118 token.Value = s 119 if token.Type == scanner.Char && utf8.RuneCountInString(s) > 1 { 120 token.Type = scanner.String 121 } 122 case scanner.RawString: 123 token.Value = token.Value[1 : len(token.Value)-1] 124 } 125 return token, nil 126} 127