1// Copyright (c) 2017 Ernest Micklei 2// 3// MIT License 4// 5// Permission is hereby granted, free of charge, to any person obtaining 6// a copy of this software and associated documentation files (the 7// "Software"), to deal in the Software without restriction, including 8// without limitation the rights to use, copy, modify, merge, publish, 9// distribute, sublicense, and/or sell copies of the Software, and to 10// permit persons to whom the Software is furnished to do so, subject to 11// the following conditions: 12// 13// The above copyright notice and this permission notice shall be 14// included in all copies or substantial portions of the Software. 15// 16// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 19// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 20// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 21// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 22// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 23 24package proto 25 26import ( 27 "strings" 28) 29 30// token represents a lexical token. 31type token int 32 33const ( 34 // Special tokens 35 tILLEGAL token = iota 36 tEOF 37 tWS 38 39 // Literals 40 tIDENT 41 42 // Misc characters 43 tSEMICOLON // ; 44 tCOLON // : 45 tEQUALS // = 46 tQUOTE // " 47 tSINGLEQUOTE // ' 48 tLEFTPAREN // ( 49 tRIGHTPAREN // ) 50 tLEFTCURLY // { 51 tRIGHTCURLY // } 52 tLEFTSQUARE // [ 53 tRIGHTSQUARE // ] 54 tCOMMENT // / 55 tLESS // < 56 tGREATER // > 57 tCOMMA // , 58 tDOT // . 59 60 // Keywords 61 keywordsStart 62 tSYNTAX 63 tSERVICE 64 tRPC 65 tRETURNS 66 tMESSAGE 67 tIMPORT 68 tPACKAGE 69 tOPTION 70 tREPEATED 71 tWEAK 72 tPUBLIC 73 74 // special fields 75 tONEOF 76 tMAP 77 tRESERVED 78 tENUM 79 tSTREAM 80 81 // BEGIN proto2 82 tOPTIONAL 83 tGROUP 84 tEXTENSIONS 85 tEXTEND 86 tREQUIRED 87 // END proto2 88 keywordsEnd 89) 90 91// typeTokens exists for future validation 92const typeTokens = "double float int32 int64 uint32 uint64 sint32 sint64 fixed32 sfixed32 sfixed64 bool string bytes" 93 94// isKeyword returns if tok is in the keywords range 95func isKeyword(tok token) bool { 96 return keywordsStart < tok && tok < keywordsEnd 97} 98 99// isWhitespace checks for space,tab and newline 100func isWhitespace(r rune) bool { 101 return r == ' ' || r == '\t' || r == '\n' 102} 103 104// isDigit returns true if the rune is a digit. 105func isDigit(ch rune) bool { return (ch >= '0' && ch <= '9') } 106 107// isString checks if the literal is quoted (single or double). 108func isString(lit string) bool { 109 if lit == "'" { 110 return false 111 } 112 return (strings.HasPrefix(lit, "\"") && 113 strings.HasSuffix(lit, "\"")) || 114 (strings.HasPrefix(lit, "'") && 115 strings.HasSuffix(lit, "'")) 116} 117 118func isComment(lit string) bool { 119 return strings.HasPrefix(lit, "//") || strings.HasPrefix(lit, "/*") 120} 121 122const doubleQuoteRune = rune('"') 123 124// unQuote removes one matching leading and trailing single or double quote. 125// 126// https://github.com/emicklei/proto/issues/103 127// cannot use strconv.Unquote as this unescapes quotes. 128func unQuote(lit string) (string, rune) { 129 if len(lit) < 2 { 130 return lit, doubleQuoteRune 131 } 132 chars := []rune(lit) 133 first, last := chars[0], chars[len(chars)-1] 134 if first != last { 135 return lit, doubleQuoteRune 136 } 137 if s := string(chars[0]); s == "\"" || s == stringWithSingleQuote { 138 return string(chars[1 : len(chars)-1]), chars[0] 139 } 140 return lit, doubleQuoteRune 141} 142 143func asToken(literal string) token { 144 switch literal { 145 // delimiters 146 case ";": 147 return tSEMICOLON 148 case ":": 149 return tCOLON 150 case "=": 151 return tEQUALS 152 case "\"": 153 return tQUOTE 154 case "'": 155 return tSINGLEQUOTE 156 case "(": 157 return tLEFTPAREN 158 case ")": 159 return tRIGHTPAREN 160 case "{": 161 return tLEFTCURLY 162 case "}": 163 return tRIGHTCURLY 164 case "[": 165 return tLEFTSQUARE 166 case "]": 167 return tRIGHTSQUARE 168 case "<": 169 return tLESS 170 case ">": 171 return tGREATER 172 case ",": 173 return tCOMMA 174 case ".": 175 return tDOT 176 // words 177 case "syntax": 178 return tSYNTAX 179 case "service": 180 return tSERVICE 181 case "rpc": 182 return tRPC 183 case "returns": 184 return tRETURNS 185 case "option": 186 return tOPTION 187 case "message": 188 return tMESSAGE 189 case "import": 190 return tIMPORT 191 case "package": 192 return tPACKAGE 193 case "oneof": 194 return tONEOF 195 // special fields 196 case "map": 197 return tMAP 198 case "reserved": 199 return tRESERVED 200 case "enum": 201 return tENUM 202 case "repeated": 203 return tREPEATED 204 case "weak": 205 return tWEAK 206 case "public": 207 return tPUBLIC 208 case "stream": 209 return tSTREAM 210 // proto2 211 case "optional": 212 return tOPTIONAL 213 case "group": 214 return tGROUP 215 case "extensions": 216 return tEXTENSIONS 217 case "extend": 218 return tEXTEND 219 case "required": 220 return tREQUIRED 221 default: 222 // special cases 223 if isComment(literal) { 224 return tCOMMENT 225 } 226 return tIDENT 227 } 228} 229