1// Copyright (c) 2017 Ernest Micklei
2//
3// MIT License
4//
5// Permission is hereby granted, free of charge, to any person obtaining
6// a copy of this software and associated documentation files (the
7// "Software"), to deal in the Software without restriction, including
8// without limitation the rights to use, copy, modify, merge, publish,
9// distribute, sublicense, and/or sell copies of the Software, and to
10// permit persons to whom the Software is furnished to do so, subject to
11// the following conditions:
12//
13// The above copyright notice and this permission notice shall be
14// included in all copies or substantial portions of the Software.
15//
16// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23
24package proto
25
26import (
27	"strings"
28)
29
30// token represents a lexical token.
31type token int
32
33const (
34	// Special tokens
35	tILLEGAL token = iota
36	tEOF
37	tWS
38
39	// Literals
40	tIDENT
41
42	// Misc characters
43	tSEMICOLON   // ;
44	tCOLON       // :
45	tEQUALS      // =
46	tQUOTE       // "
47	tSINGLEQUOTE // '
48	tLEFTPAREN   // (
49	tRIGHTPAREN  // )
50	tLEFTCURLY   // {
51	tRIGHTCURLY  // }
52	tLEFTSQUARE  // [
53	tRIGHTSQUARE // ]
54	tCOMMENT     // /
55	tLESS        // <
56	tGREATER     // >
57	tCOMMA       // ,
58	tDOT         // .
59
60	// Keywords
61	keywordsStart
62	tSYNTAX
63	tSERVICE
64	tRPC
65	tRETURNS
66	tMESSAGE
67	tIMPORT
68	tPACKAGE
69	tOPTION
70	tREPEATED
71	tWEAK
72	tPUBLIC
73
74	// special fields
75	tONEOF
76	tMAP
77	tRESERVED
78	tENUM
79	tSTREAM
80
81	// BEGIN proto2
82	tOPTIONAL
83	tGROUP
84	tEXTENSIONS
85	tEXTEND
86	tREQUIRED
87	// END proto2
88	keywordsEnd
89)
90
91// typeTokens exists for future validation
92const typeTokens = "double float int32 int64 uint32 uint64 sint32 sint64 fixed32 sfixed32 sfixed64 bool string bytes"
93
94// isKeyword returns if tok is in the keywords range
95func isKeyword(tok token) bool {
96	return keywordsStart < tok && tok < keywordsEnd
97}
98
99// isWhitespace checks for space,tab and newline
100func isWhitespace(r rune) bool {
101	return r == ' ' || r == '\t' || r == '\n'
102}
103
104// isDigit returns true if the rune is a digit.
105func isDigit(ch rune) bool { return (ch >= '0' && ch <= '9') }
106
107// isString checks if the literal is quoted (single or double).
108func isString(lit string) bool {
109	if lit == "'" {
110		return false
111	}
112	return (strings.HasPrefix(lit, "\"") &&
113		strings.HasSuffix(lit, "\"")) ||
114		(strings.HasPrefix(lit, "'") &&
115			strings.HasSuffix(lit, "'"))
116}
117
118func isComment(lit string) bool {
119	return strings.HasPrefix(lit, "//") || strings.HasPrefix(lit, "/*")
120}
121
122const doubleQuoteRune = rune('"')
123
124// unQuote removes one matching leading and trailing single or double quote.
125//
126// https://github.com/emicklei/proto/issues/103
127// cannot use strconv.Unquote as this unescapes quotes.
128func unQuote(lit string) (string, rune) {
129	if len(lit) < 2 {
130		return lit, doubleQuoteRune
131	}
132	chars := []rune(lit)
133	first, last := chars[0], chars[len(chars)-1]
134	if first != last {
135		return lit, doubleQuoteRune
136	}
137	if s := string(chars[0]); s == "\"" || s == stringWithSingleQuote {
138		return string(chars[1 : len(chars)-1]), chars[0]
139	}
140	return lit, doubleQuoteRune
141}
142
143func asToken(literal string) token {
144	switch literal {
145	// delimiters
146	case ";":
147		return tSEMICOLON
148	case ":":
149		return tCOLON
150	case "=":
151		return tEQUALS
152	case "\"":
153		return tQUOTE
154	case "'":
155		return tSINGLEQUOTE
156	case "(":
157		return tLEFTPAREN
158	case ")":
159		return tRIGHTPAREN
160	case "{":
161		return tLEFTCURLY
162	case "}":
163		return tRIGHTCURLY
164	case "[":
165		return tLEFTSQUARE
166	case "]":
167		return tRIGHTSQUARE
168	case "<":
169		return tLESS
170	case ">":
171		return tGREATER
172	case ",":
173		return tCOMMA
174	case ".":
175		return tDOT
176	// words
177	case "syntax":
178		return tSYNTAX
179	case "service":
180		return tSERVICE
181	case "rpc":
182		return tRPC
183	case "returns":
184		return tRETURNS
185	case "option":
186		return tOPTION
187	case "message":
188		return tMESSAGE
189	case "import":
190		return tIMPORT
191	case "package":
192		return tPACKAGE
193	case "oneof":
194		return tONEOF
195	// special fields
196	case "map":
197		return tMAP
198	case "reserved":
199		return tRESERVED
200	case "enum":
201		return tENUM
202	case "repeated":
203		return tREPEATED
204	case "weak":
205		return tWEAK
206	case "public":
207		return tPUBLIC
208	case "stream":
209		return tSTREAM
210	// proto2
211	case "optional":
212		return tOPTIONAL
213	case "group":
214		return tGROUP
215	case "extensions":
216		return tEXTENSIONS
217	case "extend":
218		return tEXTEND
219	case "required":
220		return tREQUIRED
221	default:
222		// special cases
223		if isComment(literal) {
224			return tCOMMENT
225		}
226		return tIDENT
227	}
228}
229