1// Copyright (c) 2017 Ernest Micklei
2//
3// MIT License
4//
5// Permission is hereby granted, free of charge, to any person obtaining
6// a copy of this software and associated documentation files (the
7// "Software"), to deal in the Software without restriction, including
8// without limitation the rights to use, copy, modify, merge, publish,
9// distribute, sublicense, and/or sell copies of the Software, and to
10// permit persons to whom the Software is furnished to do so, subject to
11// the following conditions:
12//
13// The above copyright notice and this permission notice shall be
14// included in all copies or substantial portions of the Software.
15//
16// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23
24package proto
25
26import (
27	"strings"
28)
29
30// token represents a lexical token.
31type token int
32
33const (
34	// Special tokens
35	tILLEGAL token = iota
36	tEOF
37	tWS
38
39	// Literals
40	tIDENT
41
42	// Misc characters
43	tSEMICOLON   // ;
44	tCOLON       // :
45	tEQUALS      // =
46	tQUOTE       // "
47	tSINGLEQUOTE // '
48	tLEFTPAREN   // (
49	tRIGHTPAREN  // )
50	tLEFTCURLY   // {
51	tRIGHTCURLY  // }
52	tLEFTSQUARE  // [
53	tRIGHTSQUARE // ]
54	tCOMMENT     // /
55	tLESS        // <
56	tGREATER     // >
57	tCOMMA       // ,
58	tDOT         // .
59
60	// Keywords
61	keywordsStart
62	tSYNTAX
63	tSERVICE
64	tRPC
65	tRETURNS
66	tMESSAGE
67	tIMPORT
68	tPACKAGE
69	tOPTION
70	tREPEATED
71	tWEAK
72	tPUBLIC
73
74	// special fields
75	tONEOF
76	tMAP
77	tRESERVED
78	tENUM
79	tSTREAM
80
81	// BEGIN proto2
82	tOPTIONAL
83	tGROUP
84	tEXTENSIONS
85	tEXTEND
86	tREQUIRED
87	// END proto2
88	keywordsEnd
89)
90
91// typeTokens exists for future validation
92const typeTokens = "double float int32 int64 uint32 uint64 sint32 sint64 fixed32 sfixed32 sfixed64 bool string bytes"
93
94// isKeyword returns if tok is in the keywords range
95func isKeyword(tok token) bool {
96	return keywordsStart < tok && tok < keywordsEnd
97}
98
99// isWhitespace checks for space,tab and newline
100func isWhitespace(r rune) bool {
101	return r == ' ' || r == '\t' || r == '\n'
102}
103
104// isDigit returns true if the rune is a digit.
105func isDigit(ch rune) bool { return (ch >= '0' && ch <= '9') }
106
107// isString checks if the literal is quoted (single or double).
108func isString(lit string) bool {
109	return (strings.HasPrefix(lit, "\"") &&
110		strings.HasSuffix(lit, "\"")) ||
111		(strings.HasPrefix(lit, "'") &&
112			strings.HasSuffix(lit, "'"))
113}
114
115func isComment(lit string) bool {
116	return strings.HasPrefix(lit, "//") || strings.HasPrefix(lit, "/*")
117}
118
119func unQuote(lit string) string {
120	return strings.Trim(lit, "\"'")
121}
122
123func asToken(literal string) token {
124	switch literal {
125	// delimiters
126	case ";":
127		return tSEMICOLON
128	case ":":
129		return tCOLON
130	case "=":
131		return tEQUALS
132	case "\"":
133		return tQUOTE
134	case "'":
135		return tSINGLEQUOTE
136	case "(":
137		return tLEFTPAREN
138	case ")":
139		return tRIGHTPAREN
140	case "{":
141		return tLEFTCURLY
142	case "}":
143		return tRIGHTCURLY
144	case "[":
145		return tLEFTSQUARE
146	case "]":
147		return tRIGHTSQUARE
148	case "<":
149		return tLESS
150	case ">":
151		return tGREATER
152	case ",":
153		return tCOMMA
154	case ".":
155		return tDOT
156	// words
157	case "syntax":
158		return tSYNTAX
159	case "service":
160		return tSERVICE
161	case "rpc":
162		return tRPC
163	case "returns":
164		return tRETURNS
165	case "option":
166		return tOPTION
167	case "message":
168		return tMESSAGE
169	case "import":
170		return tIMPORT
171	case "package":
172		return tPACKAGE
173	case "oneof":
174		return tONEOF
175	// special fields
176	case "map":
177		return tMAP
178	case "reserved":
179		return tRESERVED
180	case "enum":
181		return tENUM
182	case "repeated":
183		return tREPEATED
184	case "weak":
185		return tWEAK
186	case "public":
187		return tPUBLIC
188	case "stream":
189		return tSTREAM
190	// proto2
191	case "optional":
192		return tOPTIONAL
193	case "group":
194		return tGROUP
195	case "extensions":
196		return tEXTENSIONS
197	case "extend":
198		return tEXTEND
199	case "required":
200		return tREQUIRED
201	default:
202		// special cases
203		if isComment(literal) {
204			return tCOMMENT
205		}
206		return tIDENT
207	}
208}
209