1// Copyright (c) 2017 Ernest Micklei 2// 3// MIT License 4// 5// Permission is hereby granted, free of charge, to any person obtaining 6// a copy of this software and associated documentation files (the 7// "Software"), to deal in the Software without restriction, including 8// without limitation the rights to use, copy, modify, merge, publish, 9// distribute, sublicense, and/or sell copies of the Software, and to 10// permit persons to whom the Software is furnished to do so, subject to 11// the following conditions: 12// 13// The above copyright notice and this permission notice shall be 14// included in all copies or substantial portions of the Software. 15// 16// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 19// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 20// LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 21// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 22// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 23 24package proto 25 26import ( 27 "strings" 28) 29 30// token represents a lexical token. 31type token int 32 33const ( 34 // Special tokens 35 tILLEGAL token = iota 36 tEOF 37 tWS 38 39 // Literals 40 tIDENT 41 42 // Misc characters 43 tSEMICOLON // ; 44 tCOLON // : 45 tEQUALS // = 46 tQUOTE // " 47 tSINGLEQUOTE // ' 48 tLEFTPAREN // ( 49 tRIGHTPAREN // ) 50 tLEFTCURLY // { 51 tRIGHTCURLY // } 52 tLEFTSQUARE // [ 53 tRIGHTSQUARE // ] 54 tCOMMENT // / 55 tLESS // < 56 tGREATER // > 57 tCOMMA // , 58 tDOT // . 59 60 // Keywords 61 keywordsStart 62 tSYNTAX 63 tSERVICE 64 tRPC 65 tRETURNS 66 tMESSAGE 67 tIMPORT 68 tPACKAGE 69 tOPTION 70 tREPEATED 71 tWEAK 72 tPUBLIC 73 74 // special fields 75 tONEOF 76 tMAP 77 tRESERVED 78 tENUM 79 tSTREAM 80 81 // BEGIN proto2 82 tOPTIONAL 83 tGROUP 84 tEXTENSIONS 85 tEXTEND 86 tREQUIRED 87 // END proto2 88 keywordsEnd 89) 90 91// typeTokens exists for future validation 92const typeTokens = "double float int32 int64 uint32 uint64 sint32 sint64 fixed32 sfixed32 sfixed64 bool string bytes" 93 94// isKeyword returns if tok is in the keywords range 95func isKeyword(tok token) bool { 96 return keywordsStart < tok && tok < keywordsEnd 97} 98 99// isWhitespace checks for space,tab and newline 100func isWhitespace(r rune) bool { 101 return r == ' ' || r == '\t' || r == '\n' 102} 103 104// isDigit returns true if the rune is a digit. 105func isDigit(ch rune) bool { return (ch >= '0' && ch <= '9') } 106 107// isString checks if the literal is quoted (single or double). 108func isString(lit string) bool { 109 return (strings.HasPrefix(lit, "\"") && 110 strings.HasSuffix(lit, "\"")) || 111 (strings.HasPrefix(lit, "'") && 112 strings.HasSuffix(lit, "'")) 113} 114 115func isComment(lit string) bool { 116 return strings.HasPrefix(lit, "//") || strings.HasPrefix(lit, "/*") 117} 118 119func unQuote(lit string) string { 120 return strings.Trim(lit, "\"'") 121} 122 123func asToken(literal string) token { 124 switch literal { 125 // delimiters 126 case ";": 127 return tSEMICOLON 128 case ":": 129 return tCOLON 130 case "=": 131 return tEQUALS 132 case "\"": 133 return tQUOTE 134 case "'": 135 return tSINGLEQUOTE 136 case "(": 137 return tLEFTPAREN 138 case ")": 139 return tRIGHTPAREN 140 case "{": 141 return tLEFTCURLY 142 case "}": 143 return tRIGHTCURLY 144 case "[": 145 return tLEFTSQUARE 146 case "]": 147 return tRIGHTSQUARE 148 case "<": 149 return tLESS 150 case ">": 151 return tGREATER 152 case ",": 153 return tCOMMA 154 case ".": 155 return tDOT 156 // words 157 case "syntax": 158 return tSYNTAX 159 case "service": 160 return tSERVICE 161 case "rpc": 162 return tRPC 163 case "returns": 164 return tRETURNS 165 case "option": 166 return tOPTION 167 case "message": 168 return tMESSAGE 169 case "import": 170 return tIMPORT 171 case "package": 172 return tPACKAGE 173 case "oneof": 174 return tONEOF 175 // special fields 176 case "map": 177 return tMAP 178 case "reserved": 179 return tRESERVED 180 case "enum": 181 return tENUM 182 case "repeated": 183 return tREPEATED 184 case "weak": 185 return tWEAK 186 case "public": 187 return tPUBLIC 188 case "stream": 189 return tSTREAM 190 // proto2 191 case "optional": 192 return tOPTIONAL 193 case "group": 194 return tGROUP 195 case "extensions": 196 return tEXTENSIONS 197 case "extend": 198 return tEXTEND 199 case "required": 200 return tREQUIRED 201 default: 202 // special cases 203 if isComment(literal) { 204 return tCOMMENT 205 } 206 return tIDENT 207 } 208} 209