1 // lex.h -- Go frontend lexer. -*- C++ -*- 2 3 // Copyright 2009 The Go Authors. All rights reserved. 4 // Use of this source code is governed by a BSD-style 5 // license that can be found in the LICENSE file. 6 7 #ifndef GO_LEX_H 8 #define GO_LEX_H 9 10 #include <mpfr.h> 11 12 #include "operator.h" 13 #include "go-linemap.h" 14 15 struct Unicode_range; 16 17 // The keywords. These must be in sorted order, other than 18 // KEYWORD_INVALID. They must match the Keywords::mapping_ array in 19 // lex.cc. 20 21 enum Keyword 22 { 23 KEYWORD_INVALID, // Not a keyword. 24 KEYWORD_ASM, 25 KEYWORD_BREAK, 26 KEYWORD_CASE, 27 KEYWORD_CHAN, 28 KEYWORD_CONST, 29 KEYWORD_CONTINUE, 30 KEYWORD_DEFAULT, 31 KEYWORD_DEFER, 32 KEYWORD_ELSE, 33 KEYWORD_FALLTHROUGH, 34 KEYWORD_FOR, 35 KEYWORD_FUNC, 36 KEYWORD_GO, 37 KEYWORD_GOTO, 38 KEYWORD_IF, 39 KEYWORD_IMPORT, 40 KEYWORD_INTERFACE, 41 KEYWORD_MAP, 42 KEYWORD_PACKAGE, 43 KEYWORD_RANGE, 44 KEYWORD_RETURN, 45 KEYWORD_SELECT, 46 KEYWORD_STRUCT, 47 KEYWORD_SWITCH, 48 KEYWORD_TYPE, 49 KEYWORD_VAR 50 }; 51 52 // Pragmas built from magic comments and recorded for functions. 53 // These are used as bits in a bitmask. 54 // The set of values is intended to be the same as the gc compiler. 55 56 enum GoPragma 57 { 58 GOPRAGMA_NOINTERFACE = 1 << 0, // Method not in type descriptor. 59 GOPRAGMA_NOESCAPE = 1 << 1, // Args do not escape. 60 GOPRAGMA_NORACE = 1 << 2, // No race detector. 61 GOPRAGMA_NOSPLIT = 1 << 3, // Do not split stack. 62 GOPRAGMA_NOINLINE = 1 << 4, // Do not inline. 63 GOPRAGMA_SYSTEMSTACK = 1 << 5, // Must run on system stack. 64 GOPRAGMA_NOWRITEBARRIER = 1 << 6, // No write barriers. 65 GOPRAGMA_NOWRITEBARRIERREC = 1 << 7, // No write barriers here or callees. 66 GOPRAGMA_CGOUNSAFEARGS = 1 << 8, // Pointer to arg is pointer to all. 67 GOPRAGMA_UINTPTRESCAPES = 1 << 9, // uintptr(p) escapes. 68 GOPRAGMA_NOTINHEAP = 1 << 10 // type is not in heap. 69 }; 70 71 // A token returned from the lexer. 72 73 class Token 74 { 75 public: 76 // Token classification. 77 enum Classification 78 { 79 // Token is invalid. 80 TOKEN_INVALID, 81 // Token indicates end of input. 82 TOKEN_EOF, 83 // Token is a keyword. 84 TOKEN_KEYWORD, 85 // Token is an identifier. 86 TOKEN_IDENTIFIER, 87 // Token is a string of characters. 88 TOKEN_STRING, 89 // Token is an operator. 90 TOKEN_OPERATOR, 91 // Token is a character constant. 92 TOKEN_CHARACTER, 93 // Token is an integer. 94 TOKEN_INTEGER, 95 // Token is a floating point number. 96 TOKEN_FLOAT, 97 // Token is an imaginary number. 98 TOKEN_IMAGINARY 99 }; 100 101 ~Token(); 102 Token(const Token&); 103 Token& operator=(const Token&); 104 105 // Get token classification. 106 Classification classification()107 classification() const 108 { return this->classification_; } 109 110 // Make a token for an invalid value. 111 static Token make_invalid_token(Location location)112 make_invalid_token(Location location) 113 { return Token(TOKEN_INVALID, location); } 114 115 // Make a token representing end of file. 116 static Token make_eof_token(Location location)117 make_eof_token(Location location) 118 { return Token(TOKEN_EOF, location); } 119 120 // Make a keyword token. 121 static Token make_keyword_token(Keyword keyword,Location location)122 make_keyword_token(Keyword keyword, Location location) 123 { 124 Token tok(TOKEN_KEYWORD, location); 125 tok.u_.keyword = keyword; 126 return tok; 127 } 128 129 // Make an identifier token. 130 static Token make_identifier_token(const std::string & value,bool is_exported,Location location)131 make_identifier_token(const std::string& value, bool is_exported, 132 Location location) 133 { 134 Token tok(TOKEN_IDENTIFIER, location); 135 tok.u_.identifier_value.name = new std::string(value); 136 tok.u_.identifier_value.is_exported = is_exported; 137 return tok; 138 } 139 140 // Make a quoted string token. 141 static Token make_string_token(const std::string & value,Location location)142 make_string_token(const std::string& value, Location location) 143 { 144 Token tok(TOKEN_STRING, location); 145 tok.u_.string_value = new std::string(value); 146 return tok; 147 } 148 149 // Make an operator token. 150 static Token make_operator_token(Operator op,Location location)151 make_operator_token(Operator op, Location location) 152 { 153 Token tok(TOKEN_OPERATOR, location); 154 tok.u_.op = op; 155 return tok; 156 } 157 158 // Make a character constant token. 159 static Token make_character_token(mpz_t val,Location location)160 make_character_token(mpz_t val, Location location) 161 { 162 Token tok(TOKEN_CHARACTER, location); 163 mpz_init(tok.u_.integer_value); 164 mpz_swap(tok.u_.integer_value, val); 165 return tok; 166 } 167 168 // Make an integer token. 169 static Token make_integer_token(mpz_t val,Location location)170 make_integer_token(mpz_t val, Location location) 171 { 172 Token tok(TOKEN_INTEGER, location); 173 mpz_init(tok.u_.integer_value); 174 mpz_swap(tok.u_.integer_value, val); 175 return tok; 176 } 177 178 // Make a float token. 179 static Token make_float_token(mpfr_t val,Location location)180 make_float_token(mpfr_t val, Location location) 181 { 182 Token tok(TOKEN_FLOAT, location); 183 mpfr_init(tok.u_.float_value); 184 mpfr_swap(tok.u_.float_value, val); 185 return tok; 186 } 187 188 // Make a token for an imaginary number. 189 static Token make_imaginary_token(mpfr_t val,Location location)190 make_imaginary_token(mpfr_t val, Location location) 191 { 192 Token tok(TOKEN_IMAGINARY, location); 193 mpfr_init(tok.u_.float_value); 194 mpfr_swap(tok.u_.float_value, val); 195 return tok; 196 } 197 198 // Get the location of the token. 199 Location location()200 location() const 201 { return this->location_; } 202 203 // Return whether this is an invalid token. 204 bool is_invalid()205 is_invalid() const 206 { return this->classification_ == TOKEN_INVALID; } 207 208 // Return whether this is the EOF token. 209 bool is_eof()210 is_eof() const 211 { return this->classification_ == TOKEN_EOF; } 212 213 // Return the keyword value for a keyword token. 214 Keyword keyword()215 keyword() const 216 { 217 go_assert(this->classification_ == TOKEN_KEYWORD); 218 return this->u_.keyword; 219 } 220 221 // Return whether this is an identifier. 222 bool is_identifier()223 is_identifier() const 224 { return this->classification_ == TOKEN_IDENTIFIER; } 225 226 // Return the identifier. 227 const std::string& identifier()228 identifier() const 229 { 230 go_assert(this->classification_ == TOKEN_IDENTIFIER); 231 return *this->u_.identifier_value.name; 232 } 233 234 // Return whether the identifier is exported. 235 bool is_identifier_exported()236 is_identifier_exported() const 237 { 238 go_assert(this->classification_ == TOKEN_IDENTIFIER); 239 return this->u_.identifier_value.is_exported; 240 } 241 242 // Return whether this is a string. 243 bool is_string()244 is_string() const 245 { 246 return this->classification_ == TOKEN_STRING; 247 } 248 249 // Return the value of a string. The returned value is a string of 250 // UTF-8 characters. 251 std::string string_value()252 string_value() const 253 { 254 go_assert(this->classification_ == TOKEN_STRING); 255 return *this->u_.string_value; 256 } 257 258 // Return the value of a character constant. 259 const mpz_t* character_value()260 character_value() const 261 { 262 go_assert(this->classification_ == TOKEN_CHARACTER); 263 return &this->u_.integer_value; 264 } 265 266 // Return the value of an integer. 267 const mpz_t* integer_value()268 integer_value() const 269 { 270 go_assert(this->classification_ == TOKEN_INTEGER); 271 return &this->u_.integer_value; 272 } 273 274 // Return the value of a float. 275 const mpfr_t* float_value()276 float_value() const 277 { 278 go_assert(this->classification_ == TOKEN_FLOAT); 279 return &this->u_.float_value; 280 } 281 282 // Return the value of an imaginary number. 283 const mpfr_t* imaginary_value()284 imaginary_value() const 285 { 286 go_assert(this->classification_ == TOKEN_IMAGINARY); 287 return &this->u_.float_value; 288 } 289 290 // Return the operator value for an operator token. 291 Operator op()292 op() const 293 { 294 go_assert(this->classification_ == TOKEN_OPERATOR); 295 return this->u_.op; 296 } 297 298 // Return whether this token is KEYWORD. 299 bool is_keyword(Keyword keyword)300 is_keyword(Keyword keyword) const 301 { 302 return (this->classification_ == TOKEN_KEYWORD 303 && this->u_.keyword == keyword); 304 } 305 306 // Return whether this token is OP. 307 bool is_op(Operator op)308 is_op(Operator op) const 309 { return this->classification_ == TOKEN_OPERATOR && this->u_.op == op; } 310 311 // Print the token for debugging. 312 void 313 print(FILE*) const; 314 315 private: 316 // Private constructor used by make_..._token functions above. 317 Token(Classification, Location); 318 319 // Clear the token. 320 void 321 clear(); 322 323 // The token classification. 324 Classification classification_; 325 union 326 { 327 // The keyword value for TOKEN_KEYWORD. 328 Keyword keyword; 329 // The token value for TOKEN_IDENTIFIER. 330 struct 331 { 332 // The name of the identifier. This has been mangled to only 333 // include ASCII characters. 334 std::string* name; 335 // Whether this name should be exported. This is true if the 336 // first letter in the name is upper case. 337 bool is_exported; 338 } identifier_value; 339 // The string value for TOKEN_STRING. 340 std::string* string_value; 341 // The token value for TOKEN_CHARACTER or TOKEN_INTEGER. 342 mpz_t integer_value; 343 // The token value for TOKEN_FLOAT or TOKEN_IMAGINARY. 344 mpfr_t float_value; 345 // The token value for TOKEN_OPERATOR or the keyword value 346 Operator op; 347 } u_; 348 // The source location. 349 Location location_; 350 }; 351 352 // The lexer itself. 353 354 class Lex 355 { 356 public: 357 Lex(const char* input_file_name, FILE* input_file, Linemap *linemap); 358 359 ~Lex(); 360 361 // Return the next token. 362 Token 363 next_token(); 364 365 // Return the contents of any current //extern comment. 366 const std::string& extern_name()367 extern_name() const 368 { return this->extern_; } 369 370 // Return the current set of pragmas, and clear them. 371 unsigned int get_and_clear_pragmas()372 get_and_clear_pragmas() 373 { 374 unsigned int ret = this->pragmas_; 375 this->pragmas_ = 0; 376 return ret; 377 } 378 379 struct Linkname 380 { 381 std::string ext_name; // External name. 382 bool is_exported; // Whether the internal name is exported. 383 Location loc; // Location of go:linkname directive. 384 LinknameLinkname385 Linkname() 386 : ext_name(), is_exported(false), loc() 387 { } 388 LinknameLinkname389 Linkname(const std::string& ext_name_a, bool is_exported_a, Location loc_a) 390 : ext_name(ext_name_a), is_exported(is_exported_a), loc(loc_a) 391 { } 392 }; 393 394 typedef std::map<std::string, Linkname> Linknames; 395 396 // Return the linknames seen so far, or NULL if none, and clear the 397 // set. These are from go:linkname compiler directives. 398 Linknames* get_and_clear_linknames()399 get_and_clear_linknames() 400 { 401 Linknames* ret = this->linknames_; 402 this->linknames_ = NULL; 403 return ret; 404 } 405 406 // Return whether the identifier NAME should be exported. NAME is a 407 // mangled name which includes only ASCII characters. 408 static bool 409 is_exported_name(const std::string& name); 410 411 // Return whether the identifier NAME is invalid. When we see an 412 // invalid character we still build an identifier, but we use a 413 // magic string to indicate that the identifier is invalid. We then 414 // use this to avoid knockon errors. 415 static bool 416 is_invalid_identifier(const std::string& name); 417 418 // A helper function. Append V to STR. IS_CHARACTER is true if V 419 // is a Unicode character which should be converted into UTF-8, 420 // false if it is a byte value to be appended directly. The 421 // location is used to warn about an out of range character. 422 static void 423 append_char(unsigned int v, bool is_charater, std::string* str, 424 Location); 425 426 // A helper function. Fetch a UTF-8 character from STR and store it 427 // in *VALUE. Return the number of bytes read from STR. Return 0 428 // if STR does not point to a valid UTF-8 character. 429 static int 430 fetch_char(const char* str, unsigned int *value); 431 432 // Return whether C is a Unicode or "C" locale space character. 433 static bool 434 is_unicode_space(unsigned int c); 435 436 private: 437 ssize_t 438 get_line(); 439 440 bool 441 require_line(); 442 443 // The current location. 444 Location 445 location() const; 446 447 // A position CHARS column positions before the current location. 448 Location 449 earlier_location(int chars) const; 450 451 static bool 452 is_hex_digit(char); 453 454 static unsigned char octal_value(char c)455 octal_value(char c) 456 { return c - '0'; } 457 458 static unsigned 459 hex_val(char c); 460 461 Token make_invalid_token()462 make_invalid_token() 463 { return Token::make_invalid_token(this->location()); } 464 465 Token make_eof_token()466 make_eof_token() 467 { return Token::make_eof_token(this->location()); } 468 469 Token make_operator(Operator op,int chars)470 make_operator(Operator op, int chars) 471 { return Token::make_operator_token(op, this->earlier_location(chars)); } 472 473 Token 474 gather_identifier(); 475 476 static bool 477 could_be_exponent(const char*, const char*); 478 479 Token 480 gather_number(); 481 482 Token 483 gather_character(); 484 485 Token 486 gather_string(); 487 488 Token 489 gather_raw_string(); 490 491 const char* 492 advance_one_utf8_char(const char*, unsigned int*, bool*); 493 494 const char* 495 advance_one_char(const char*, bool, unsigned int*, bool*); 496 497 static bool 498 is_unicode_digit(unsigned int c); 499 500 static bool 501 is_unicode_letter(unsigned int c); 502 503 static bool 504 is_unicode_uppercase(unsigned int c); 505 506 static bool 507 is_in_unicode_range(unsigned int C, const Unicode_range* ranges, 508 size_t range_size); 509 510 Operator 511 three_character_operator(char, char, char); 512 513 Operator 514 two_character_operator(char, char); 515 516 Operator 517 one_character_operator(char); 518 519 bool 520 skip_c_comment(bool* found_newline); 521 522 void 523 skip_cpp_comment(); 524 525 // The input file name. 526 const char* input_file_name_; 527 // The input file. 528 FILE* input_file_; 529 // The object used to keep track of file names and line numbers. 530 Linemap* linemap_; 531 // The line buffer. This holds the current line. 532 char* linebuf_; 533 // The size of the line buffer. 534 size_t linebufsize_; 535 // The nmber of characters in the current line. 536 size_t linesize_; 537 // The current offset in linebuf_. 538 size_t lineoff_; 539 // The current line number. 540 size_t lineno_; 541 // Whether to add a semicolon if we see a newline now. 542 bool add_semi_at_eol_; 543 // Pragmas for the next function, from magic comments. 544 unsigned int pragmas_; 545 // The external name to use for a function declaration, from a magic 546 // //extern comment. 547 std::string extern_; 548 // The list of //go:linkname comments, if any. 549 Linknames* linknames_; 550 }; 551 552 #endif // !defined(GO_LEX_H) 553