1 /* 2 * Lexer defines. 3 */ 4 5 #if !defined(DUK_LEXER_H_INCLUDED) 6 #define DUK_LEXER_H_INCLUDED 7 8 typedef void (*duk_re_range_callback)(void *user, duk_codepoint_t r1, duk_codepoint_t r2, duk_bool_t direct); 9 10 /* 11 * A token is interpreted as any possible production of InputElementDiv 12 * and InputElementRegExp, see E5 Section 7 in its entirety. Note that 13 * the E5 "Token" production does not cover all actual tokens of the 14 * language (which is explicitly stated in the specification, Section 7.5). 15 * Null and boolean literals are defined as part of both ReservedWord 16 * (E5 Section 7.6.1) and Literal (E5 Section 7.8) productions. Here, 17 * null and boolean values have literal tokens, and are not reserved 18 * words. 19 * 20 * Decimal literal negative/positive sign is -not- part of DUK_TOK_NUMBER. 21 * The number tokens always have a non-negative value. The unary minus 22 * operator in "-1.0" is optimized during compilation to yield a single 23 * negative constant. 24 * 25 * Token numbering is free except that reserved words are required to be 26 * in a continuous range and in a particular order. See genstrings.py. 27 */ 28 29 #define DUK_LEXER_INITCTX(ctx) duk_lexer_initctx((ctx)) 30 31 #define DUK_LEXER_SETPOINT(ctx,pt) duk_lexer_setpoint((ctx), (pt)) 32 33 #define DUK_LEXER_GETPOINT(ctx,pt) duk_lexer_getpoint((ctx), (pt)) 34 35 /* Currently 6 characters of lookup are actually needed (duk_lexer.c). */ 36 #define DUK_LEXER_WINDOW_SIZE 6 37 #if defined(DUK_USE_LEXER_SLIDING_WINDOW) 38 #define DUK_LEXER_BUFFER_SIZE 64 39 #endif 40 41 #define DUK_TOK_MINVAL 0 42 43 /* returned after EOF (infinite amount) */ 44 #define DUK_TOK_EOF 0 45 46 /* identifier names (E5 Section 7.6) */ 47 #define DUK_TOK_IDENTIFIER 1 48 49 /* reserved words: keywords */ 50 #define DUK_TOK_START_RESERVED 2 51 #define DUK_TOK_BREAK 2 52 #define DUK_TOK_CASE 3 53 #define DUK_TOK_CATCH 4 54 #define DUK_TOK_CONTINUE 5 55 #define DUK_TOK_DEBUGGER 6 56 #define DUK_TOK_DEFAULT 7 57 #define DUK_TOK_DELETE 8 58 #define DUK_TOK_DO 9 59 #define DUK_TOK_ELSE 10 60 #define DUK_TOK_FINALLY 11 61 #define DUK_TOK_FOR 12 62 #define DUK_TOK_FUNCTION 13 63 #define DUK_TOK_IF 14 64 #define DUK_TOK_IN 15 65 #define DUK_TOK_INSTANCEOF 16 66 #define DUK_TOK_NEW 17 67 #define DUK_TOK_RETURN 18 68 #define DUK_TOK_SWITCH 19 69 #define DUK_TOK_THIS 20 70 #define DUK_TOK_THROW 21 71 #define DUK_TOK_TRY 22 72 #define DUK_TOK_TYPEOF 23 73 #define DUK_TOK_VAR 24 74 #define DUK_TOK_CONST 25 75 #define DUK_TOK_VOID 26 76 #define DUK_TOK_WHILE 27 77 #define DUK_TOK_WITH 28 78 79 /* reserved words: future reserved words */ 80 #define DUK_TOK_CLASS 29 81 #define DUK_TOK_ENUM 30 82 #define DUK_TOK_EXPORT 31 83 #define DUK_TOK_EXTENDS 32 84 #define DUK_TOK_IMPORT 33 85 #define DUK_TOK_SUPER 34 86 87 /* "null", "true", and "false" are always reserved words. 88 * Note that "get" and "set" are not! 89 */ 90 #define DUK_TOK_NULL 35 91 #define DUK_TOK_TRUE 36 92 #define DUK_TOK_FALSE 37 93 94 /* reserved words: additional future reserved words in strict mode */ 95 #define DUK_TOK_START_STRICT_RESERVED 38 /* inclusive */ 96 #define DUK_TOK_IMPLEMENTS 38 97 #define DUK_TOK_INTERFACE 39 98 #define DUK_TOK_LET 40 99 #define DUK_TOK_PACKAGE 41 100 #define DUK_TOK_PRIVATE 42 101 #define DUK_TOK_PROTECTED 43 102 #define DUK_TOK_PUBLIC 44 103 #define DUK_TOK_STATIC 45 104 #define DUK_TOK_YIELD 46 105 106 #define DUK_TOK_END_RESERVED 47 /* exclusive */ 107 108 /* "get" and "set" are tokens but NOT ReservedWords. They are currently 109 * parsed and identifiers and these defines are actually now unused. 110 */ 111 #define DUK_TOK_GET 47 112 #define DUK_TOK_SET 48 113 114 /* punctuators (unlike the spec, also includes "/" and "/=") */ 115 #define DUK_TOK_LCURLY 49 116 #define DUK_TOK_RCURLY 50 117 #define DUK_TOK_LBRACKET 51 118 #define DUK_TOK_RBRACKET 52 119 #define DUK_TOK_LPAREN 53 120 #define DUK_TOK_RPAREN 54 121 #define DUK_TOK_PERIOD 55 122 #define DUK_TOK_SEMICOLON 56 123 #define DUK_TOK_COMMA 57 124 #define DUK_TOK_LT 58 125 #define DUK_TOK_GT 59 126 #define DUK_TOK_LE 60 127 #define DUK_TOK_GE 61 128 #define DUK_TOK_EQ 62 129 #define DUK_TOK_NEQ 63 130 #define DUK_TOK_SEQ 64 131 #define DUK_TOK_SNEQ 65 132 #define DUK_TOK_ADD 66 133 #define DUK_TOK_SUB 67 134 #define DUK_TOK_MUL 68 135 #define DUK_TOK_DIV 69 136 #define DUK_TOK_MOD 70 137 #define DUK_TOK_EXP 71 138 #define DUK_TOK_INCREMENT 72 139 #define DUK_TOK_DECREMENT 73 140 #define DUK_TOK_ALSHIFT 74 /* named "arithmetic" because result is signed */ 141 #define DUK_TOK_ARSHIFT 75 142 #define DUK_TOK_RSHIFT 76 143 #define DUK_TOK_BAND 77 144 #define DUK_TOK_BOR 78 145 #define DUK_TOK_BXOR 79 146 #define DUK_TOK_LNOT 80 147 #define DUK_TOK_BNOT 81 148 #define DUK_TOK_LAND 82 149 #define DUK_TOK_LOR 83 150 #define DUK_TOK_QUESTION 84 151 #define DUK_TOK_COLON 85 152 #define DUK_TOK_EQUALSIGN 86 153 #define DUK_TOK_ADD_EQ 87 154 #define DUK_TOK_SUB_EQ 88 155 #define DUK_TOK_MUL_EQ 89 156 #define DUK_TOK_DIV_EQ 90 157 #define DUK_TOK_MOD_EQ 91 158 #define DUK_TOK_EXP_EQ 92 159 #define DUK_TOK_ALSHIFT_EQ 93 160 #define DUK_TOK_ARSHIFT_EQ 94 161 #define DUK_TOK_RSHIFT_EQ 95 162 #define DUK_TOK_BAND_EQ 96 163 #define DUK_TOK_BOR_EQ 97 164 #define DUK_TOK_BXOR_EQ 98 165 166 /* literals (E5 Section 7.8), except null, true, false, which are treated 167 * like reserved words (above). 168 */ 169 #define DUK_TOK_NUMBER 99 170 #define DUK_TOK_STRING 100 171 #define DUK_TOK_REGEXP 101 172 173 #define DUK_TOK_MAXVAL 101 /* inclusive */ 174 175 #define DUK_TOK_INVALID DUK_SMALL_UINT_MAX 176 177 /* Convert heap string index to a token (reserved words) */ 178 #define DUK_STRIDX_TO_TOK(x) ((x) - DUK_STRIDX_START_RESERVED + DUK_TOK_START_RESERVED) 179 180 /* Sanity check */ 181 #if (DUK_TOK_MAXVAL > 255) 182 #error DUK_TOK_MAXVAL too large, code assumes it fits into 8 bits 183 #endif 184 185 /* Sanity checks for string and token defines */ 186 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_BREAK) != DUK_TOK_BREAK) 187 #error mismatch in token defines 188 #endif 189 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_CASE) != DUK_TOK_CASE) 190 #error mismatch in token defines 191 #endif 192 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_CATCH) != DUK_TOK_CATCH) 193 #error mismatch in token defines 194 #endif 195 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_CONTINUE) != DUK_TOK_CONTINUE) 196 #error mismatch in token defines 197 #endif 198 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_DEBUGGER) != DUK_TOK_DEBUGGER) 199 #error mismatch in token defines 200 #endif 201 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_DEFAULT) != DUK_TOK_DEFAULT) 202 #error mismatch in token defines 203 #endif 204 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_DELETE) != DUK_TOK_DELETE) 205 #error mismatch in token defines 206 #endif 207 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_DO) != DUK_TOK_DO) 208 #error mismatch in token defines 209 #endif 210 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_ELSE) != DUK_TOK_ELSE) 211 #error mismatch in token defines 212 #endif 213 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_FINALLY) != DUK_TOK_FINALLY) 214 #error mismatch in token defines 215 #endif 216 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_FOR) != DUK_TOK_FOR) 217 #error mismatch in token defines 218 #endif 219 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_LC_FUNCTION) != DUK_TOK_FUNCTION) 220 #error mismatch in token defines 221 #endif 222 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_IF) != DUK_TOK_IF) 223 #error mismatch in token defines 224 #endif 225 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_IN) != DUK_TOK_IN) 226 #error mismatch in token defines 227 #endif 228 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_INSTANCEOF) != DUK_TOK_INSTANCEOF) 229 #error mismatch in token defines 230 #endif 231 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_NEW) != DUK_TOK_NEW) 232 #error mismatch in token defines 233 #endif 234 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_RETURN) != DUK_TOK_RETURN) 235 #error mismatch in token defines 236 #endif 237 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_SWITCH) != DUK_TOK_SWITCH) 238 #error mismatch in token defines 239 #endif 240 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_THIS) != DUK_TOK_THIS) 241 #error mismatch in token defines 242 #endif 243 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_THROW) != DUK_TOK_THROW) 244 #error mismatch in token defines 245 #endif 246 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_TRY) != DUK_TOK_TRY) 247 #error mismatch in token defines 248 #endif 249 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_TYPEOF) != DUK_TOK_TYPEOF) 250 #error mismatch in token defines 251 #endif 252 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_VAR) != DUK_TOK_VAR) 253 #error mismatch in token defines 254 #endif 255 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_VOID) != DUK_TOK_VOID) 256 #error mismatch in token defines 257 #endif 258 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_WHILE) != DUK_TOK_WHILE) 259 #error mismatch in token defines 260 #endif 261 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_WITH) != DUK_TOK_WITH) 262 #error mismatch in token defines 263 #endif 264 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_CLASS) != DUK_TOK_CLASS) 265 #error mismatch in token defines 266 #endif 267 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_CONST) != DUK_TOK_CONST) 268 #error mismatch in token defines 269 #endif 270 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_ENUM) != DUK_TOK_ENUM) 271 #error mismatch in token defines 272 #endif 273 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_EXPORT) != DUK_TOK_EXPORT) 274 #error mismatch in token defines 275 #endif 276 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_EXTENDS) != DUK_TOK_EXTENDS) 277 #error mismatch in token defines 278 #endif 279 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_IMPORT) != DUK_TOK_IMPORT) 280 #error mismatch in token defines 281 #endif 282 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_SUPER) != DUK_TOK_SUPER) 283 #error mismatch in token defines 284 #endif 285 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_LC_NULL) != DUK_TOK_NULL) 286 #error mismatch in token defines 287 #endif 288 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_TRUE) != DUK_TOK_TRUE) 289 #error mismatch in token defines 290 #endif 291 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_FALSE) != DUK_TOK_FALSE) 292 #error mismatch in token defines 293 #endif 294 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_IMPLEMENTS) != DUK_TOK_IMPLEMENTS) 295 #error mismatch in token defines 296 #endif 297 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_INTERFACE) != DUK_TOK_INTERFACE) 298 #error mismatch in token defines 299 #endif 300 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_LET) != DUK_TOK_LET) 301 #error mismatch in token defines 302 #endif 303 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_PACKAGE) != DUK_TOK_PACKAGE) 304 #error mismatch in token defines 305 #endif 306 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_PRIVATE) != DUK_TOK_PRIVATE) 307 #error mismatch in token defines 308 #endif 309 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_PROTECTED) != DUK_TOK_PROTECTED) 310 #error mismatch in token defines 311 #endif 312 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_PUBLIC) != DUK_TOK_PUBLIC) 313 #error mismatch in token defines 314 #endif 315 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_STATIC) != DUK_TOK_STATIC) 316 #error mismatch in token defines 317 #endif 318 #if (DUK_STRIDX_TO_TOK(DUK_STRIDX_YIELD) != DUK_TOK_YIELD) 319 #error mismatch in token defines 320 #endif 321 322 /* Regexp tokens */ 323 #define DUK_RETOK_EOF 0 324 #define DUK_RETOK_DISJUNCTION 1 325 #define DUK_RETOK_QUANTIFIER 2 326 #define DUK_RETOK_ASSERT_START 3 327 #define DUK_RETOK_ASSERT_END 4 328 #define DUK_RETOK_ASSERT_WORD_BOUNDARY 5 329 #define DUK_RETOK_ASSERT_NOT_WORD_BOUNDARY 6 330 #define DUK_RETOK_ASSERT_START_POS_LOOKAHEAD 7 331 #define DUK_RETOK_ASSERT_START_NEG_LOOKAHEAD 8 332 #define DUK_RETOK_ATOM_PERIOD 9 333 #define DUK_RETOK_ATOM_CHAR 10 334 #define DUK_RETOK_ATOM_DIGIT 11 /* assumptions in regexp compiler */ 335 #define DUK_RETOK_ATOM_NOT_DIGIT 12 /* -""- */ 336 #define DUK_RETOK_ATOM_WHITE 13 /* -""- */ 337 #define DUK_RETOK_ATOM_NOT_WHITE 14 /* -""- */ 338 #define DUK_RETOK_ATOM_WORD_CHAR 15 /* -""- */ 339 #define DUK_RETOK_ATOM_NOT_WORD_CHAR 16 /* -""- */ 340 #define DUK_RETOK_ATOM_BACKREFERENCE 17 341 #define DUK_RETOK_ATOM_START_CAPTURE_GROUP 18 342 #define DUK_RETOK_ATOM_START_NONCAPTURE_GROUP 19 343 #define DUK_RETOK_ATOM_START_CHARCLASS 20 344 #define DUK_RETOK_ATOM_START_CHARCLASS_INVERTED 21 345 #define DUK_RETOK_ATOM_END_GROUP 22 346 347 /* Constants for duk_lexer_ctx.buf. */ 348 #define DUK_LEXER_TEMP_BUF_LIMIT 256 349 350 /* A token value. Can be memcpy()'d, but note that slot1/slot2 values are on the valstack. 351 * Some fields (like num, str1, str2) are only valid for specific token types and may have 352 * stale values otherwise. 353 */ 354 struct duk_token { 355 duk_small_uint_t t; /* token type (with reserved word identification) */ 356 duk_small_uint_t t_nores; /* token type (with reserved words as DUK_TOK_IDENTIFER) */ 357 duk_double_t num; /* numeric value of token */ 358 duk_hstring *str1; /* string 1 of token (borrowed, stored to ctx->slot1_idx) */ 359 duk_hstring *str2; /* string 2 of token (borrowed, stored to ctx->slot2_idx) */ 360 duk_size_t start_offset; /* start byte offset of token in lexer input */ 361 duk_int_t start_line; /* start line of token (first char) */ 362 duk_int_t num_escapes; /* number of escapes and line continuations (for directive prologue) */ 363 duk_bool_t lineterm; /* token was preceded by a lineterm */ 364 duk_bool_t allow_auto_semi; /* token allows automatic semicolon insertion (eof or preceded by newline) */ 365 }; 366 367 #define DUK_RE_QUANTIFIER_INFINITE ((duk_uint32_t) 0xffffffffUL) 368 369 /* A regexp token value. */ 370 struct duk_re_token { 371 duk_small_uint_t t; /* token type */ 372 duk_small_uint_t greedy; 373 duk_uint32_t num; /* numeric value (character, count) */ 374 duk_uint32_t qmin; 375 duk_uint32_t qmax; 376 }; 377 378 /* A structure for 'snapshotting' a point for rewinding */ 379 struct duk_lexer_point { 380 duk_size_t offset; 381 duk_int_t line; 382 }; 383 384 /* Lexer codepoint with additional info like offset/line number */ 385 struct duk_lexer_codepoint { 386 duk_codepoint_t codepoint; 387 duk_size_t offset; 388 duk_int_t line; 389 }; 390 391 /* Lexer context. Same context is used for ECMAScript and Regexp parsing. */ 392 struct duk_lexer_ctx { 393 #if defined(DUK_USE_LEXER_SLIDING_WINDOW) 394 duk_lexer_codepoint *window; /* unicode code points, window[0] is always next, points to 'buffer' */ 395 duk_lexer_codepoint buffer[DUK_LEXER_BUFFER_SIZE]; 396 #else 397 duk_lexer_codepoint window[DUK_LEXER_WINDOW_SIZE]; /* unicode code points, window[0] is always next */ 398 #endif 399 400 duk_hthread *thr; /* thread; minimizes argument passing */ 401 402 const duk_uint8_t *input; /* input string (may be a user pointer) */ 403 duk_size_t input_length; /* input byte length */ 404 duk_size_t input_offset; /* input offset for window leading edge (not window[0]) */ 405 duk_int_t input_line; /* input linenumber at input_offset (not window[0]), init to 1 */ 406 407 duk_idx_t slot1_idx; /* valstack slot for 1st token value */ 408 duk_idx_t slot2_idx; /* valstack slot for 2nd token value */ 409 duk_idx_t buf_idx; /* valstack slot for temp buffer */ 410 duk_hbuffer_dynamic *buf; /* temp accumulation buffer */ 411 duk_bufwriter_ctx bw; /* bufwriter for temp accumulation */ 412 413 duk_int_t token_count; /* number of tokens parsed */ 414 duk_int_t token_limit; /* maximum token count before error (sanity backstop) */ 415 416 duk_small_uint_t flags; /* lexer flags, use compiler flag defines for now */ 417 }; 418 419 /* 420 * Prototypes 421 */ 422 423 DUK_INTERNAL_DECL void duk_lexer_initctx(duk_lexer_ctx *lex_ctx); 424 425 DUK_INTERNAL_DECL void duk_lexer_getpoint(duk_lexer_ctx *lex_ctx, duk_lexer_point *pt); 426 DUK_INTERNAL_DECL void duk_lexer_setpoint(duk_lexer_ctx *lex_ctx, duk_lexer_point *pt); 427 428 DUK_INTERNAL_DECL 429 void duk_lexer_parse_js_input_element(duk_lexer_ctx *lex_ctx, 430 duk_token *out_token, 431 duk_bool_t strict_mode, 432 duk_bool_t regexp_mode); 433 #if defined(DUK_USE_REGEXP_SUPPORT) 434 DUK_INTERNAL_DECL void duk_lexer_parse_re_token(duk_lexer_ctx *lex_ctx, duk_re_token *out_token); 435 DUK_INTERNAL_DECL void duk_lexer_parse_re_ranges(duk_lexer_ctx *lex_ctx, duk_re_range_callback gen_range, void *userdata); 436 #endif /* DUK_USE_REGEXP_SUPPORT */ 437 438 #endif /* DUK_LEXER_H_INCLUDED */ 439