1 /* $Header: d:/cvsroot/tads/tads3/tcprs.h,v 1.5 1999/07/11 00:46:58 MJRoberts Exp $ */
2
3 /*
4 * Copyright (c) 1999, 2002 Michael J. Roberts. All Rights Reserved.
5 *
6 * Please see the accompanying license file, LICENSE.TXT, for information
7 * on using and copying this software.
8 */
9 /*
10 Name
11 tcprs.h - TADS 3 Compiler - parser
12 Function
13
14 Notes
15
16 Modified
17 04/29/99 MJRoberts - Creation
18 */
19
20 #ifndef TCPRS_H
21 #define TCPRS_H
22
23 #include <assert.h>
24
25 #include "tcglob.h"
26 #include "tctok.h"
27 #include "tctargty.h"
28 #include "tcprstyp.h"
29
30
31 /* ------------------------------------------------------------------------ */
32 /*
33 * Object ID type
34 */
35 typedef ulong tc_obj_id;
36
37 /*
38 * Property ID type
39 */
40 typedef uint tc_prop_id;
41
42
43 /* ------------------------------------------------------------------------ */
44 /*
45 * scope data structure
46 */
47 struct tcprs_scope_t
48 {
49 /* local symbol table */
50 class CTcPrsSymtab *local_symtab;
51
52 /* enclosing scope's local symbol table */
53 class CTcPrsSymtab *enclosing_symtab;
54
55 /* number of locals allocated in scope */
56 int local_cnt;
57 };
58
59 /* ------------------------------------------------------------------------ */
60 /*
61 * Code body parsing types. Each type of code body is essentially the
62 * same with minor variations, so we use a common code body parser that
63 * checks the parsing type to apply the variations.
64 */
65 enum tcprs_codebodytype
66 {
67 /* a standard function or method code body */
68 TCPRS_CB_NORMAL,
69
70 /* anonymous function */
71 TCPRS_CB_ANON_FN,
72
73 /* short-form anonymous function */
74 TCPRS_CB_SHORT_ANON_FN
75 };
76
77
78 /* ------------------------------------------------------------------------ */
79 /*
80 * the saved method context is always at index 1 in local variable context
81 * arrays, when we're using local variable context arrays
82 */
83 #define TCPRS_LOCAL_CTX_METHODCTX 1
84
85
86 /* ------------------------------------------------------------------------ */
87 /*
88 * Parser
89 */
90 class CTcParser
91 {
92 public:
93 CTcParser();
94 ~CTcParser();
95
96 /* initialize - call this after the code generator is set up */
97 void init();
98
99 /*
100 * Write an exported symbol file. An exported symbol file
101 * facilitates separate compilation by providing a listing of the
102 * symbols defined in another module. If module A depends on the
103 * symbols from module B, the user can first create an exported
104 * symbol file for module B, then can compile module A in the
105 * presence of B's symbol file, without actually loading B, and
106 * without manually entering a set of external definitions in module
107 * A's source code.
108 */
109 void write_symbol_file(class CVmFile *fp, class CTcMake *make_obj);
110
111 /*
112 * Seek to the start of the build configuration information in a symbol
113 * file. The return value is the number of bytes stored in the build
114 * configuration block; on return, the file object will have its seek
115 * offset set to the first byte of the build configuration data.
116 * Returns zero if the symbol file is invalid or does not contain any
117 * configuration data.
118 */
119 static ulong seek_sym_file_build_config_info(class CVmFile *fp);
120
121 /*
122 * Write the global table to an object file.
123 */
124 void write_to_object_file(class CVmFile *fp);
125
126 /*
127 * Read an object file and load it into the global symbol table. We
128 * will fill in the object and property ID translation tables
129 * provided with the translated values for the object and property
130 * symbols that we find in the object file.
131 *
132 * Returns zero on success; logs error messages and returns non-zero
133 * on error. Note that a non-zero value should be returned only
134 * when the file appears to be corrupted or an I/O error occurs;
135 * errors involving conflicting symbols, or other problems that do
136 * not prevent us from continuing to read the file in an orderly
137 * fashion, should not return failure but should simply log the
138 * error and continue; this way, we can detect any additional symbol
139 * conflicts or other errors. This routine should return failure
140 * only when it is not possible to continue reading the file.
141 */
142 int load_object_file(class CVmFile *fp,
143 const textchar_t *fname,
144 tctarg_obj_id_t *obj_xlat,
145 tctarg_prop_id_t *prop_xlat,
146 ulong *enum_xlat);
147
148 /*
149 * Apply internal object/property ID fixups. This traverses the
150 * symbol table and calls each symbol's apply_internal_fixups()
151 * method. This can be called once after loading all object files.
152 */
153 void apply_internal_fixups();
154
155 /*
156 * Read an exported symbol file. Reads the file and loads the
157 * global symbol table with the symbols in the file, with each
158 * symbol marked as external.
159 *
160 * This can be used for separate compilation. If module A depends
161 * on symbols in module B, first create a symbol file for module B,
162 * then module A can be compiled simply be pre-loading B's symbol
163 * file. Any symbol files that a module depends upon must be loaded
164 * before the module is compiled - symbol file loading must precede
165 * parsing.
166 *
167 * If any errors occur, we'll log the errors and return non-zero.
168 * We'll return zero on success.
169 */
170 int read_symbol_file(class CVmFile *fp);
171
172 /* get the global symbol table */
get_global_symtab()173 class CTcPrsSymtab *get_global_symtab() const { return global_symtab_; }
174
175 /* get the current local symbol table */
get_local_symtab()176 class CTcPrsSymtab *get_local_symtab() const { return local_symtab_; }
177
178 /* get the 'goto' symbol table */
get_goto_symtab()179 class CTcPrsSymtab *get_goto_symtab() const { return goto_symtab_; }
180
181 /* set the current pragma C mode */
182 void set_pragma_c(int mode);
183
184 /* turn preprocess expression mode on or off */
set_pp_expr_mode(int f)185 void set_pp_expr_mode(int f) { pp_expr_mode_ = f; }
186
187 /* get the current preprocess expression mode flag */
get_pp_expr_mode()188 int get_pp_expr_mode() const { return pp_expr_mode_; }
189
190 /* get/set the syntax-only mode flag */
get_syntax_only()191 int get_syntax_only() const { return syntax_only_; }
set_syntax_only(int f)192 void set_syntax_only(int f) { syntax_only_ = f; }
193
194 /*
195 * Get the constructor and finalize property ID's - all constructors
196 * and finalizers have these property ID's respectively
197 */
get_constructor_prop()198 tc_prop_id get_constructor_prop() const { return constructor_prop_; }
get_finalize_prop()199 tc_prop_id get_finalize_prop() const { return finalize_prop_; }
200
201 /* get the constructor property symbol */
get_constructor_sym()202 class CTcSymProp *get_constructor_sym() const { return constructor_sym_; }
203
204 /* get the object-call property */
get_objcall_prop()205 tc_prop_id get_objcall_prop() const { return objcall_prop_; }
206
207 /*
208 * Check for unresolved external symbols. Scans the global symbol
209 * table and logs an error for each unresolved external. Returns
210 * true if any unresolved externals exist, false if not.
211 */
212 int check_unresolved_externs();
213
214 /*
215 * build the dictionaries - scans the global symbol table, and
216 * inserts each object symbol's dictionary words into its
217 * corresponding dictionary
218 */
219 void build_dictionaries();
220
221 /* build the grammar productions */
222 void build_grammar_productions();
223
224 /*
225 * Top-level parser. Parse functions, objects, and other top-level
226 * definitions and declarations.
227 */
228 class CTPNStmProg *parse_top();
229
230 /*
231 * Parse a required semicolon. If the semicolon is present, we'll
232 * simply skip it. If it's missing, we'll log an error and try to
233 * resynchronize. If we find something that looks like it should go
234 * at the end of an expression, we'll try to skip up to the next
235 * semicolon; otherwise, we'll simply stay put.
236 *
237 * Returns zero if the caller should proceed, non-zero if we're at
238 * end of file, in which case there's nothing more for the caller to
239 * parse.
240 */
241 static int parse_req_sem();
242
243 /*
244 * Skip to the next semicolon, ignoring any tokens up to that point.
245 * This can be used when the caller encounters an error that makes
246 * it impossible to process the current statement further, and wants
247 * to find the next semicolon in the hope that it will be a good
248 * place to start again with the next statement.
249 *
250 * Returns zero if the caller should proceed, non-zero if we reach
251 * the end of the file.
252 */
253 static int skip_to_sem();
254
255 /*
256 * Parse an expression. This parses a top-level "comma" expression.
257 */
258 class CTcPrsNode *parse_expr();
259
260 /*
261 * Parse a condition expression. This parses a top-level "comma"
262 * expression, but displays a warning if the outermost operator in
263 * the expression is an assignment, because such expressions are
264 * very frequently meant as comparisons, but the '=' operator was
265 * inadvertantly used instead of '=='.
266 */
267 class CTcPrsNode *parse_cond_expr();
268
269 /*
270 * Parse a value expression or a double-quoted string expression
271 * (including a double-quoted string with embedded expressions). If
272 * allow_comma_expr is true, we'll parse a comma expression;
273 * otherwise, we'll parse an assignment expression. (A comma
274 * expression is broader than an assignment expression, since the
275 * comma separates assignment expressions.)
276 */
277 class CTcPrsNode *parse_expr_or_dstr(int allow_comma_expr);
278
279 /*
280 * Parse an assignment expression - this is the next precedence
281 * level down from comma expressions. In certain contexts, a
282 * top-level comma expression is not allowed because a comma has a
283 * separate meaning (in the initializer clause of a 'for' statement,
284 * for example, or in a list element).
285 */
286 class CTcPrsNode *parse_asi_expr();
287
288 /* parse an 'enum' top-level statement */
289 void parse_enum(int *err);
290
291 /* parse a 'dictionary' top-level statement */
292 class CTPNStmTop *parse_dict(int *err);
293
294 /* parse a 'grammar' top-level statement */
295 class CTPNStmTop *parse_grammar(int *err, int replace, int modify);
296
297 /* parse and flatten a set of grammar rules */
298 class CTcPrsGramNode *flatten_gram_rule(int *err);
299
300 /* parse a 'grammar' OR node */
301 class CTcPrsGramNode *parse_gram_or(int *err, int level);
302
303 /* parse a 'grammar' CAT node */
304 class CTcPrsGramNode *parse_gram_cat(int *err, int level);
305
306 /* parse a 'grammar' qualifier int value */
307 int parse_gram_qual_int(int *err, const char *qual_name, int *stm_end);
308
309 /* skip to the end of a mal-formed grammar qualifier */
310 void parse_gram_qual_skip(int *err, int *stm_end);
311
312 /*
313 * Parse a 'function' top-level statement. If 'is_extern' is true,
314 * the function is being defined externally, so it should have no
315 * code body defined here (just the prototype). If 'replace' is
316 * true, we're replacing an existing function.
317 *
318 * If 'func_kw_present' is true, the 'function' keyword is present
319 * and must be skipped; otherwise, the function definition elides
320 * the 'function' keyword and starts directly with the function name
321 * symbol.
322 */
323 class CTPNStmTop *parse_function(int *err, int is_extern,
324 int replace, int modify,
325 int func_kw_present);
326
327 /* parse an 'intrinsic' top-level statement */
328 class CTPNStmTop *parse_intrinsic(int *err);
329
330 /* parse an 'intrinsic class' top-level statement */
331 class CTPNStmTop *parse_intrinsic_class(int *err);
332
333 /* parse an 'extern' top-level statement */
334 void parse_extern(int *err);
335
336 /*
337 * parse an object or function defintion (this is called when the
338 * first thing in a statement is a symbol; we must check what
339 * follows to determine what type of definition it is)
340 */
341 class CTPNStmTop *parse_object_or_func(int *err, int replace,
342 int suppress_error,
343 int *suppress_next_error);
344
345 /* parse a template definition statement */
346 class CTPNStmTop *parse_template_def(int *err,
347 const class CTcToken *class_tok);
348
349 /* add a template definition */
350 void add_template_def(class CTcSymObj *class_sym,
351 class CTcObjTemplateItem *item_head,
352 size_t item_cnt);
353
354 /* add inherited template definitions */
355 void add_inherited_templates(class CTcSymObj *sc_sym,
356 class CTcObjTemplateItem *item_head,
357 size_t item_cnt);
358
359 /*
360 * expand the 'inherited' keyword in a template for the given
361 * superclass template and add the result to the template list for the
362 * class
363 */
364 void expand_and_add_inherited_template(class CTcSymObj *sc_sym,
365 class CTcObjTemplateItem *items,
366 class CTcObjTemplate *sc_tpl);
367
368 /*
369 * build a list of superclass templates, for expanding an 'inherited'
370 * token in a template definition
371 */
372 void build_super_template_list(struct inh_tpl_entry **list_head,
373 struct inh_tpl_entry **list_tail,
374 class CTcSymObj *sc_sym);
375
376 /* parse an 'object' statement */
377 class CTPNStmTop *parse_object_stm(int *err, int is_transient);
378
379 /*
380 * parse an object definition that starts with a '+' string; this
381 * also parses '+ property' statements
382 */
383 class CTPNStmTop *parse_plus_object(int *err);
384
385 /*
386 * Parse an object definition. If 'replace' is true, this
387 * definition is to replace a previous definition of the same
388 * object; if 'modify' is true, this definition is to modify a
389 * previous definition. If 'is_class' is true, the definition is
390 * for a class, otherwise it's for a static instance.
391 *
392 * If the definition uses the '+' notation to set the location,
393 * plus_cnt gives the number of '+' signs preceding the object
394 * definition.
395 */
396 class CTPNStmTop *parse_object(int *err, int replace, int modify,
397 int is_class, int plus_cnt,
398 int is_transient);
399
400 /* find or define an object symbol */
401 CTcSymObj *find_or_def_obj(const char *tok_txt, size_t tok_len,
402 int replace, int modify, int *is_class,
403 class CTcSymObj **mod_orig_sym,
404 class CTcSymMetaclass **meta_sym,
405 int *is_transient);
406
407 /* parse an anonymous object */
408 class CTPNStmObject *parse_anon_object(int *err, int plus_cnt,
409 int is_nested,
410 struct tcprs_term_info *term_info,
411 int is_transient);
412
413 /*
414 * Parse an object body. We start parsing from the colon that
415 * introduces the class list, and parse the class list and the
416 * property list for the object.
417 *
418 * If 'is_anon' is true, this is an anonymous object. 'obj_sym'
419 * should be null in this case.
420 *
421 * If 'is_nested' is true, this is a nested object defined in-line in
422 * an object's property list. Note that is_nested implies is_anon,
423 * since nested objects are always anonymous.
424 *
425 * If this is a 'modify' definition, 'mod_orig_tok' should be set up
426 * with the synthesized symbol for the modified base object;
427 * otherwise, 'mod_orig_tok' should be null.
428 *
429 * If 'meta_sym' is non-null, we're modifying an intrinsic class.
430 * This imposes certain restrictions; in particular, we cannot modify
431 * a method defined in the native interface to the class.
432 */
433 class CTPNStmObject *parse_object_body(int *err, class CTcSymObj *obj_sym,
434 int is_class, int is_anon,
435 int is_grammar,
436 int is_nested, int modify,
437 class CTcSymObj *mod_orig_sym,
438 int plus_cnt,
439 class CTcSymMetaclass *meta_sym,
440 struct tcprs_term_info *term_info,
441 int is_transient);
442
443 /* parse an object template instance in an object body */
444 void parse_obj_template(int *err, class CTPNStmObject *obj_stm);
445
446 /* search a superclass list for a template match */
447 const class CTcObjTemplate
448 *find_class_template(const class CTPNSuperclass *first_sc,
449 class CTcObjTemplateInst *src,
450 size_t src_cnt, const CTPNSuperclass **def_sc,
451 int *undescribed_class);
452
453 /* find a match for a given template in the given list */
454 const class CTcObjTemplate
455 *find_template_match(const class CTcObjTemplate *first_tpl,
456 class CTcObjTemplateInst *src,
457 size_t src_cnt);
458
459 /*
460 * Match a template to a given actual template parameter list. Returns
461 * true if we match, false if not. We'll fill in the actual list with
462 * the property symbols that we matched; these values are only
463 * meaningful if we return true to indicate a match.
464 */
465 int match_template(const class CTcObjTemplateItem *tpl_head,
466 class CTcObjTemplateInst *src, size_t src_cnt);
467
468 /* parse property definition within an object */
469 void parse_obj_prop(int *err, class CTPNStmObject *obj_stm, int replace,
470 class CTcSymMetaclass *meta_sym,
471 struct tcprs_term_info *term_info,
472 struct propset_def *propset_stack, int propset_depth,
473 int enclosing_obj_is_nested);
474
475 /* parse a class definition */
476 class CTPNStmTop *parse_class(int *err);
477
478 /* parse a 'modify' definition */
479 class CTPNStmTop *parse_modify(int *err);
480
481 /* parse a 'replace' definition */
482 class CTPNStmTop *parse_replace(int *err);
483
484 /* parse a 'property' statement */
485 void parse_property(int *err);
486
487 /* parse an 'export' statement */
488 void parse_export(int *err);
489
490 /* add an export for the given symbol; returns the new export record */
491 class CTcPrsExport *add_export(const char *sym, size_t sym_len);
492
493 /* add an export record to our list */
494 void add_export_to_list(class CTcPrsExport *exp);
495
496 /* get the head of the export list */
get_exp_head()497 class CTcPrsExport *get_exp_head() const { return exp_head_; }
498
499 /*
500 * Parse a function or method body, starting with the formal parameter
501 * list. If 'eq_before_brace' is set, we expect an '=' before the
502 * opening brace of the code body, and we allow the expression syntax,
503 * where an expression enclosed in parentheses can be used.
504 * 'self_valid' indicates whether or not 'self' is valid in the context
505 * of the code being compiled; for an object method, 'self' is usually
506 * valid, while for a stand-alone function it isn't.
507 */
508 class CTPNCodeBody *parse_code_body(int eq_before_brace, int is_obj_prop,
509 int self_valid,
510 int *p_argc, int *p_varargs,
511 int *p_varargs_list,
512 class CTcSymLocal **
513 p_varargs_list_local,
514 int *has_retval, int *err,
515 class CTcPrsSymtab *local_symtab,
516 tcprs_codebodytype cb_type,
517 struct propset_def *propset_stack,
518 int propset_depth,
519 struct CTcCodeBodyRef *enclosing);
520
521 /* parse a nested code body (such as an anonymous function) */
522 class CTPNCodeBody *parse_nested_code_body(
523 int eq_before_brace,
524 int self_valid,
525 int *p_argc, int *p_varargs,
526 int *p_varargs_list,
527 class CTcSymLocal **p_varargs_list_local,
528 int *has_retval, int *err,
529 class CTcPrsSymtab *local_symtab,
530 tcprs_codebodytype cb_type);
531
532 /* parse a formal parameter list */
533 void parse_formal_list(int count_only, int opt_allowed,
534 int *argc, int *opt_argc, int *varargs,
535 int *varargs_list,
536 class CTcSymLocal **varargs_list_local,
537 int *err, int base_formal_num,
538 int for_short_anon_func);
539
540 /*
541 * Parse a compound statement. The caller must skip the opening
542 * '{'; on return, we'll have skipped the closing '}'.
543 * enclosing_symtab is the enclosing scope's symbol table, and
544 * local_symtab is the symbol table for the new scope within the
545 * compound statement; if the caller has not already allocated a new
546 * symbol table for the inner scope, it should simply pass the same
547 * value for both symbol tables.
548 *
549 * 'enclosing_switch' is the immediately enclosing switch statement,
550 * if any. This is only set when we're parsing the immediate body
551 * of a switch statement.
552 */
553 class CTPNStmComp *parse_compound(int *err, int skip_lbrace,
554 class CTPNStmSwitch *enclosing_switch,
555 int use_enclosing_scope);
556
557 /* parse a local variable definition */
558 class CTPNStm *parse_local(int *err);
559
560 /* parse a local initializer */
561 class CTcPrsNode *parse_local_initializer(class CTcSymLocal *lcl,
562 int *err);
563
564 /*
565 * Parse an individual statement.
566 *
567 * If 'compound_use_enclosing_scope' is true, then if the statement
568 * is a compound statement (i.e., the current token is a left
569 * brace), the compound statement will use the current scope rather
570 * than creating its own scope. Normally, a compound statement
571 * establishes its own scope, so that local variables can hide
572 * locals and parameters defined outside the braces. In certain
573 * cases, however, locals defined within the braces should share the
574 * enclosing scope: at the top level of a function or method, for
575 * example, the formal parameters and the locals within the function
576 * body go in the same scope, so the function body's compound
577 * statement doesn't create its own scope.
578 */
579 class CTPNStm *parse_stm(int *err, class CTPNStmSwitch *enclosing_switch,
580 int compound_use_enclosing_scope);
581
582 /* parse a 'case' label */
583 class CTPNStm *parse_case(int *err,
584 class CTPNStmSwitch *enclosing_switch);
585
586 /* parse a 'default' label */
587 class CTPNStm *parse_default(int *err,
588 class CTPNStmSwitch *enclosing_switch);
589
590 /* parse an 'if' statement */
591 class CTPNStm *parse_if(int *err);
592
593 /* parse a 'return' statement */
594 class CTPNStm *parse_return(int *err);
595
596 /* parse a 'for' statement */
597 class CTPNStm *parse_for(int *err);
598
599 /* parse a 'foreach' statement */
600 class CTPNStm *parse_foreach(int *err);
601
602 /* parse a 'break' statement */
603 class CTPNStm *parse_break(int *err);
604
605 /* parse a 'continue' statement */
606 class CTPNStm *parse_continue(int *err);
607
608 /* parse a 'while' */
609 class CTPNStm *parse_while(int *err);
610
611 /* parse a 'do-while' */
612 class CTPNStm *parse_do_while(int *err);
613
614 /* parse a 'switch' */
615 class CTPNStm *parse_switch(int *err);
616
617 /* parse a 'goto' */
618 class CTPNStm *parse_goto(int *err);
619
620 /* parse a 'try' */
621 class CTPNStm *parse_try(int *err);
622
623 /* parse a 'throw' */
624 class CTPNStm *parse_throw(int *err);
625
626 /*
627 * Create a symbol node. We'll look up the symbol in local scope.
628 * If we find the symbol in local scope, we'll return a resolved
629 * symbol node for the local scope item. If the symbol isn't
630 * defined in local scope, we'll return an unresolved symbol node,
631 * so that the symbol's resolution can be deferred until code
632 * generation.
633 */
634 class CTcPrsNode *create_sym_node(const textchar_t *sym, size_t sym_len);
635
636 /*
637 * Get the source file descriptor and line number for the current
638 * source line. We note this at the start of each statement, so
639 * that a statement node constructed when we finish parsing the
640 * statement can record the location of the start of the statement.
641 */
get_cur_desc()642 class CTcTokFileDesc *get_cur_desc() const { return cur_desc_; }
get_cur_linenum()643 long get_cur_linenum() const { return cur_linenum_; }
644
645 /*
646 * Get/set the current enclosing statement. An enclosing statement
647 * is a 'try' or 'label:' container. At certain times, we need to
648 * know the current enclosing statement, or one of its enclosing
649 * statements; for example, a 'break' with a label must find the
650 * label in the enclosing statement list to know where to jump to
651 * after the 'break', and must also know about all of the enclosing
652 * 'try' blocks our to that point so that it can invoke their
653 * 'finally' blocks.
654 */
get_enclosing_stm()655 class CTPNStmEnclosing *get_enclosing_stm() const
656 { return enclosing_stm_; }
set_enclosing_stm(class CTPNStmEnclosing * stm)657 class CTPNStmEnclosing *set_enclosing_stm(class CTPNStmEnclosing *stm)
658 {
659 class CTPNStmEnclosing *old_enclosing;
660
661 /* remember the current enclosing statement for a moment */
662 old_enclosing = enclosing_stm_;
663
664 /* set the new enclosing statement */
665 enclosing_stm_ = stm;
666
667 /*
668 * return the previous enclosing statement - this allows the
669 * caller to restore the previous enclosing statement upon
670 * leaving a nested block, if that's why the caller is setting a
671 * new enclosing statement
672 */
673 return old_enclosing;
674 }
675
676 /* get the current code body reference object */
get_cur_code_body()677 struct CTcCodeBodyRef *get_cur_code_body() const
678 { return cur_code_body_; }
679
680 /* determine if 'self' is valid in the current context */
is_self_valid()681 int is_self_valid() const { return self_valid_; }
682
683 /*
684 * get/set the 'self' reference status - this indicates whether or not
685 * 'self' has been referenced, explicitly via the 'self'
686 * pseudo-variable or implicitly (such as via a property reference or
687 * method call), in the code body currently being parsed
688 */
self_referenced()689 int self_referenced() const { return self_referenced_; }
set_self_referenced(int f)690 void set_self_referenced(int f) { self_referenced_ = f; }
691
692 /*
693 * get/set the full method context reference status - this indicates
694 * whether or not any of the method context variables (self,
695 * targetprop, targetobj, definingobj) have been referenced, explicitly
696 * or implicitly, in the code body currently being parsed
697 */
full_method_ctx_referenced()698 int full_method_ctx_referenced() const
699 { return full_method_ctx_referenced_; }
set_full_method_ctx_referenced(int f)700 void set_full_method_ctx_referenced(int f)
701 { full_method_ctx_referenced_ = f; }
702
703 /*
704 * Get/set the flag indicating whether or not the local context of the
705 * outermost code body needs 'self'. The outer code body needs 'self'
706 * in the local context if any lexically nested code body requires
707 * access to 'self'.
708 */
local_ctx_needs_self()709 int local_ctx_needs_self() const { return local_ctx_needs_self_; }
set_local_ctx_needs_self(int f)710 void set_local_ctx_needs_self(int f) { local_ctx_needs_self_ = f; }
711
712 /*
713 * Get/set the flag indicating whether or not the local context of the
714 * outermost code body needs the full method context stored in its
715 * local context. The outer code body needs the full context stored if
716 * any lexically nested code body requires access to any of the method
717 * context variables besides 'self' (targetprop, targetobj,
718 * definingobj).
719 */
local_ctx_needs_full_method_ctx()720 int local_ctx_needs_full_method_ctx() const
721 { return local_ctx_needs_full_method_ctx_; }
set_local_ctx_needs_full_method_ctx(int f)722 void set_local_ctx_needs_full_method_ctx(int f)
723 { local_ctx_needs_full_method_ctx_ = f; }
724
725 /*
726 * Add a code label. This creates a 'goto' symbol table for the
727 * current code body if one doesn't already exist
728 */
729 class CTcSymLabel *add_code_label(const class CTcToken *tok);
730
731 /*
732 * Set the debugger local symbol table. Returns the previous symbol
733 * table so that it can be restored if desired.
734 */
set_debug_symtab(class CTcPrsDbgSymtab * tab)735 class CTcPrsDbgSymtab *set_debug_symtab(class CTcPrsDbgSymtab *tab)
736 {
737 class CTcPrsDbgSymtab *old_tab;
738
739 /* remember the original for later use */
740 old_tab = debug_symtab_;
741
742 /* set the new table */
743 debug_symtab_ = tab;
744
745 /* return the original */
746 return old_tab;
747 }
748
749 /*
750 * given a (1-based) object file symbol index, get the symbol
751 */
get_objfile_sym(uint idx)752 class CTcSymbol *get_objfile_sym(uint idx)
753 { return (idx == 0 ? 0 : obj_sym_list_[idx - 1]); }
754
755 /*
756 * given a 1-based object file symbol index, get an object symbol;
757 * if the symbol does not refer to an object, we'll return null
758 */
759 class CTcSymObj *get_objfile_objsym(uint idx);
760
761 /*
762 * given an object file (1-based) object file dictionary index, get
763 * the dictionary entry
764 */
get_obj_dict(uint idx)765 class CTcDictEntry *get_obj_dict(uint idx)
766 { return (idx == 0 ? 0 : obj_dict_list_[idx - 1]); }
767
768 /* add a dictionary object loaded from the object file */
769 void add_dict_from_obj_file(class CTcSymObj *sym);
770
771 /* add a symbol object loaded from the object file */
772 void add_sym_from_obj_file(uint idx, class CTcSymbol *sym);
773
774 /*
775 * Get the next object file symbol index. Object file symbol
776 * indices are used to relate symbols stored in the object file to
777 * the corresponding symbol object in memory when the object file is
778 * reloaded.
779 */
get_next_obj_file_sym_idx()780 uint get_next_obj_file_sym_idx()
781 {
782 /* return the next index, consuming the index value */
783 return obj_file_sym_idx_++;
784 }
785
786 /*
787 * Get the next object file dictionary index.
788 */
get_next_obj_file_dict_idx()789 uint get_next_obj_file_dict_idx()
790 {
791 /* return the next index, consuming the index value */
792 return obj_file_dict_idx_++;
793 }
794
795 /*
796 * add an anonymous function or other anonymous top-level statement
797 * to our list of nested top-level statements
798 */
799 void add_nested_stm(class CTPNStmTop *stm);
800
801 /* add an anonymous object to our list */
802 void add_anon_obj(class CTcSymObj *obj);
803
804 /* add a non-symbolic object ID */
805 void add_nonsym_obj(tctarg_obj_id_t id);
806
807 /* determine if the current code body has a local context */
has_local_ctx()808 int has_local_ctx() const { return has_local_ctx_ != 0; }
809
810 /* get the local context variable number */
get_local_ctx_var()811 int get_local_ctx_var() const { return local_ctx_var_num_; }
812
813 /* set up a local context */
814 void init_local_ctx();
815
816 /* allocate a context variable property ID */
817 tctarg_prop_id_t alloc_ctx_var_prop();
818
819 /*
820 * allocate a context variable index - this assigns an array index
821 * for a context variable within the context object that contains
822 * the shared locals for its scope
823 */
824 int alloc_ctx_arr_idx();
825
826 /* allocate a local for use as a local context holder */
alloc_ctx_holder_var()827 int alloc_ctx_holder_var() { return alloc_local(); }
828
829 /* get the maximum number of locals required in the function */
get_max_local_cnt()830 int get_max_local_cnt() const { return max_local_cnt_; }
831
832 /*
833 * find a grammar production symbol, adding a new one if needed,
834 * returning the grammar production list entry for the object
835 */
836 class CTcGramProdEntry *declare_gramprod(const char *sym, size_t len);
837
838 /* find a grammar production list entry for a given object */
839 class CTcGramProdEntry *get_gramprod_entry(class CTcSymObj *sym);
840
841 /* find a grammar production symbol, adding a new one if needed */
842 class CTcSymObj *find_or_def_gramprod(const char *txt, size_t len,
843 class CTcGramProdEntry **entryp);
844
845 /* allocate a new enumerator ID */
new_enum_id()846 ulong new_enum_id() { return next_enum_id_++; }
847
848 /* get the number of enumerator ID's allocated */
get_enum_count()849 ulong get_enum_count() const { return next_enum_id_; }
850
851 /*
852 * Look up a property symbol, adding it if not yet defined. If the
853 * symbol is defined as another type, we'll show an error if
854 * show_err is true, and return null.
855 */
856 CTcSymProp *look_up_prop(const class CTcToken *tok, int show_err);
857
858 /* get the '+' property for tracking the location graph */
get_plus_prop()859 CTcSymProp *get_plus_prop() const { return plus_prop_; }
860
861 /*
862 * Read a length-prefixed string from a file. Copies the string into
863 * tokenizer space (which is guaranteed valid throughout compilation),
864 * and returns a pointer to the tokenizer copy. If ret_len is null,
865 * we'll return a null-terminated string; otherwise, we'll return a
866 * non-null-terminated string and set *ret_len to the length of the
867 * string.
868 *
869 * The string must fit in the temporary buffer to be read, but the
870 * permanent tokenizer copy is returned rather than the temp buffer.
871 * If the string doesn't fit in the temp buffer (with null
872 * termination, if null termination is requested), we'll log the given
873 * error.
874 */
875 static const char *read_len_prefix_str
876 (CVmFile *fp, char *tmp_buf, size_t tmp_buf_len, size_t *ret_len,
877 int err_if_too_long);
878
879 /*
880 * Read a length-prefixed string into the given buffer, null
881 * terminating the result. If the string is too long for the buffer,
882 * we'll flag the given error code and return non-zero. If
883 * successful, we'll return zero.
884 */
885 static int read_len_prefix_str(CVmFile *fp, char *buf, size_t buf_len,
886 int err_if_too_long);
887
888
889 /* get the miscVocab property symbol */
get_miscvocab_prop()890 tctarg_prop_id_t get_miscvocab_prop() const { return miscvocab_prop_; }
891
892 private:
893 /* clear the anonymous function local context information */
894 void clear_local_ctx();
895
896 /*
897 * begin a property expression, saving parser state for later
898 * restoration with finish_prop_expr
899 */
900 void begin_prop_expr(class CTcPrsPropExprSave *save_info);
901
902 /*
903 * Finish a property expression, wrapping it in a code body if
904 * necessary to allow for an embedded anonymous function. Returns
905 * null if no wrapping is required, in which case the original
906 * expression should continue to be used, or the non-null code body
907 * wrapper if needed, in which case the original expression should be
908 * discarded in favor of the fully wrapped code body.
909 */
910 class CTPNCodeBody *finish_prop_expr(class CTcPrsPropExprSave *save_info,
911 class CTcPrsNode *expr,
912 int is_static,
913 class CTcSymProp *prop_sym);
914
915 /*
916 * callback for symbol table enumeration for writing a symbol export
917 * file
918 */
919 static void write_sym_cb(void *ctx, class CTcSymbol *sym);
920
921 /* callback for symbol table enumeration for writing an object file */
922 static void write_obj_cb(void *ctx, class CTcSymbol *sym);
923
924 /* callback for symbol table enumeration for writing cross references */
925 static void write_obj_ref_cb(void *ctx, class CTcSymbol *sym);
926
927 /* callback for symbol table enumeration for named grammar rules */
928 static void write_obj_gram_cb(void *ctx, class CTcSymbol *sym);
929
930 /* callback for symbol table enumeration for merging grammar rules */
931 static void build_grammar_cb(void *ctx, class CTcSymbol *sym);
932
933
934 /*
935 * Enter a scope. Upon entering, we'll remember the current local
936 * variable data; on leaving, we'll restore the enclosing scope.
937 */
enter_scope(struct tcprs_scope_t * info)938 void enter_scope(struct tcprs_scope_t *info)
939 {
940 /* remember the current scope information */
941 info->local_symtab = local_symtab_;
942 info->enclosing_symtab = enclosing_local_symtab_;
943 info->local_cnt = local_cnt_;
944
945 /*
946 * We haven't yet allocated a symbol table local to the new
947 * scope -- we defer this until we actually need to insert a
948 * symbol into the new scope. In order to detect when we need
949 * to create our own local symbol table, we keep track of the
950 * enclosing symbol table; when the local table is the same as
951 * the enclosing table, and we need to insert a symbol, it means
952 * that we must create a new table for the current scope.
953 */
954 enclosing_local_symtab_ = local_symtab_;
955 }
956
957 /* leave a scope */
leave_scope(struct tcprs_scope_t * info)958 void leave_scope(struct tcprs_scope_t *info)
959 {
960 /* restore enclosing scope information */
961 local_symtab_ = info->local_symtab;
962 enclosing_local_symtab_ = info->enclosing_symtab;
963
964 /* return to the local count in the enclosing scope */
965 // $$$ we can't actually do this because variables could
966 // be allocated after this scope ends, but need lifetimes
967 // that overlap with the enclosed scope; what we actually
968 // need to do, if we wanted to optimize things, would be
969 // to allow this block of variables to be used in *disjoint*
970 // scopes, but not again in enclosing scopes. We can easily,
971 // though suboptimally, handle this by simply not allowing
972 // the variables in the enclosed scope to be re-used at all
973 // in the current code block.
974 // local_cnt_ = info->local_cnt;
975 }
976
977 /*
978 * Create a local symbol table in the current scope, if necessary.
979 * If we've already created a local symbol table for the current
980 * scope, this has no effect.
981 */
982 void create_scope_local_symtab();
983
984 /* allocate a new local variable ID */
alloc_local()985 int alloc_local()
986 {
987 /*
988 * if this exceeds the maximum depth in the block so far, note
989 * the new maximum depth
990 */
991 if (local_cnt_ + 1 > max_local_cnt_)
992 max_local_cnt_ = local_cnt_ + 1;
993
994 /* return the local number, and increment the counter */
995 return local_cnt_++;
996 }
997
998 /* find a dictionary symbol, adding a new one if needed */
999 class CTcDictEntry *declare_dict(const char *sym, size_t len);
1000
1001 /* create a new dictionary list entry */
1002 class CTcDictEntry *create_dict_entry(class CTcSymObj *sym);
1003
1004 /* find a dictionary list entry for a given object */
1005 class CTcDictEntry *get_dict_entry(class CTcSymObj *sym);
1006
1007 /* create a new grammar production list entry */
1008 class CTcGramProdEntry *create_gramprod_entry(class CTcSymObj *sym);
1009
1010 /* symbol enumerator - look for unresolved external references */
1011 static void enum_sym_extref(void *ctx, class CTcSymbol *sym);
1012
1013 /* symbol enumerator - apply internal fixups */
1014 static void enum_sym_internal_fixup(void *ctx, class CTcSymbol *sym);
1015
1016 /* symbol enumerator - build dictionary */
1017 static void enum_sym_dict(void *ctx, class CTcSymbol *sym);
1018
1019 /* enumeration callback - context local conversion */
1020 static void enum_for_ctx_locals(void *ctx, class CTcSymbol *sym);
1021
1022 /* global symbol table */
1023 class CTcPrsSymtab *global_symtab_;
1024
1025 /* the constructor property ID and symbol */
1026 tc_prop_id constructor_prop_;
1027 class CTcSymProp *constructor_sym_;
1028
1029 /* the finalizer property ID */
1030 tc_prop_id finalize_prop_;
1031
1032 /* object-call property ID */
1033 tc_prop_id objcall_prop_;
1034
1035 /* grammarInfo property symbol */
1036 class CTcSymProp *graminfo_prop_;
1037
1038 /* miscVocab property ID */
1039 tc_prop_id miscvocab_prop_;
1040
1041 /* lexicalParent property symbol */
1042 class CTcSymProp *lexical_parent_sym_;
1043
1044 /* sourceTextOrder property symbol */
1045 class CTcSymProp *src_order_sym_;
1046
1047 /*
1048 * Source text order index. Each time we encounter an object
1049 * definition in the source code, we assign the current index value to
1050 * the object's 'sourceTextOrder' property, then we increment the
1051 * index. This provides the game program with information on the order
1052 * in which static objects appear in the source code, so that the
1053 * program can sort a collection of objects into their source file
1054 * order if desired.
1055 */
1056 long src_order_idx_;
1057
1058 /*
1059 * flag: in preprocessor constant expression mode; double-quoted
1060 * strings should be treated the same as single-quoted strings for
1061 * concatenation and comparisons
1062 */
1063 uint pp_expr_mode_ : 1;
1064
1065 /*
1066 * Flag: syntax-only mode. We use this mode to analyze the syntax
1067 * of the file without building the image; this is used, for
1068 * example, to build the exported symbol file for a source file. In
1069 * this mode, we'll suppress certain warnings and avoid doing work
1070 * that's not necessary for syntactic analysis; for example, we
1071 * won't show "unreachable code" errors.
1072 */
1073 uint syntax_only_ : 1;
1074
1075 /*
1076 * Code block parsing state
1077 */
1078
1079 /*
1080 * 'goto' symbol table for the current code block - there's only one
1081 * 'goto' scope for an entire code block, so this never changes over
1082 * the course of a code block
1083 */
1084 class CTcPrsSymtab *goto_symtab_;
1085
1086 /*
1087 * Current local symbol table. Each inner scope that defines its
1088 * own local variables has its own local symbol table, nested within
1089 * the enclosing scope's. When leaving an inner scope, this should
1090 * always be restored to the local symbol table of the enclosing
1091 * scope.
1092 */
1093 class CTcPrsSymtab *local_symtab_;
1094
1095 /*
1096 * Enclosing local symbol table. If this is the same as
1097 * local_symtab_, it means that the current scope has not yet
1098 * created its own local symbol table. We defer this creation until
1099 * we find we actually need a local symbol table in a scope, since
1100 * most scopes don't define any of their own local variables.
1101 */
1102 class CTcPrsSymtab *enclosing_local_symtab_;
1103
1104 /*
1105 * Current debugger local symbol table. When we're compiling a
1106 * debugger expression, this will provide access to the current
1107 * local scope in the debug records.
1108 */
1109 class CTcPrsDbgSymtab *debug_symtab_;
1110
1111 /*
1112 * Number of local variables allocated so far in current code block
1113 * -- this reflects nesting to the current innermost scope, because
1114 * variables in inner scope are allocated in the same stack frame as
1115 * the enclosing scopes. When leaving an inner scope, this should
1116 * be restored
1117 */
1118 int local_cnt_;
1119
1120 /*
1121 * maximum local variable depth for the current code block -- this
1122 * reflects the maximum depth, including all inner scopes so far
1123 */
1124 int max_local_cnt_;
1125
1126 /*
1127 * Enclosing statement - this is the innermost 'try' or 'label:'
1128 * enclosing the current code.
1129 */
1130 class CTPNStmEnclosing *enclosing_stm_;
1131
1132 /* file descriptor and line number at start of current statement */
1133 class CTcTokFileDesc *cur_desc_;
1134 long cur_linenum_;
1135
1136 /* currently active dictionary */
1137 class CTcDictEntry *dict_cur_;
1138
1139 /* head and tail of dictionary list */
1140 class CTcDictEntry *dict_head_;
1141 class CTcDictEntry *dict_tail_;
1142
1143 /* head and tail of grammar production entry list */
1144 class CTcGramProdEntry *gramprod_head_;
1145 class CTcGramProdEntry *gramprod_tail_;
1146
1147 /*
1148 * array of symbols loaded from the object file - these are indexed
1149 * by the object file symbol index stored in symbol references in
1150 * the object file, allowing us to fix up references from one symbol
1151 * to another during loading
1152 */
1153 class CTcSymbol **obj_sym_list_;
1154
1155 /*
1156 * array of dictionary objects for the object file being loaded -
1157 * these are indexed by the dictionary index stored in symbol
1158 * references in the object file, allowing us to fix up references
1159 * from an object to its dictionary
1160 */
1161 class CTcDictEntry **obj_dict_list_;
1162
1163 /* next available object file dictionary index */
1164 uint obj_file_dict_idx_;
1165
1166 /* next available object file symbol index */
1167 uint obj_file_sym_idx_;
1168
1169 /* dictionary property list head */
1170 class CTcDictPropEntry *dict_prop_head_;
1171
1172 /*
1173 * Head and tail of list of nested top-level statements parsed for the
1174 * current top-level statement. This list includes anonymous
1175 * functions and nested objects, since these statements must
1176 * ultimately be linked into the top-level statement queue, but can't
1177 * be linked in while they're being parsed because of their nested
1178 * location in the recursive descent. We'll throw each new nested
1179 * top-level statement into this list as we parse them, then add this
1180 * list to the top-level statement list when we're done with the
1181 * entire program.
1182 */
1183 class CTPNStmTop *nested_stm_head_;
1184 class CTPNStmTop *nested_stm_tail_;
1185
1186 /*
1187 * Anonymous object list. This is a list of objects which are
1188 * defined without symbol names.
1189 */
1190 class CTcSymObj *anon_obj_head_;
1191 class CTcSymObj *anon_obj_tail_;
1192
1193 /*
1194 * Non-symbolic object list. This is a list of objects that are
1195 * defined without symbols at all.
1196 */
1197 struct tcprs_nonsym_obj *nonsym_obj_head_;
1198 struct tcprs_nonsym_obj *nonsym_obj_tail_;
1199
1200 /*
1201 * Object template list - this is the master list of templates for the
1202 * root object class.
1203 */
1204 class CTcObjTemplate *template_head_;
1205 class CTcObjTemplate *template_tail_;
1206
1207 /*
1208 * Object template instance parsing expression array. Each time we
1209 * define a new template, we'll make sure this array is long enough
1210 * for the longest defined template. We use this list when we're
1211 * parsing a template instance to keep track of the expressions in
1212 * the template instance - we can't know until we have the entire
1213 * list which template we're using, so we must keep track of the
1214 * entire list until we reach the end of the list.
1215 */
1216 class CTcObjTemplateInst *template_expr_;
1217 size_t template_expr_max_;
1218
1219 /* head and tail of exported symbol list */
1220 class CTcPrsExport *exp_head_;
1221 class CTcPrsExport *exp_tail_;
1222
1223 /*
1224 * Flag: current code body has a local variable context object. If
1225 * this is set, we must generate code that sets up the context
1226 * object on entry to the code body.
1227 */
1228 unsigned int has_local_ctx_ : 1;
1229
1230 /* local variable number of the code body's local variable context */
1231 int local_ctx_var_num_;
1232
1233 /* array of context variable property values */
1234 tctarg_prop_id_t *ctx_var_props_;
1235
1236 /* size of array */
1237 size_t ctx_var_props_size_;
1238
1239 /* number of context variable property values in the list */
1240 size_t ctx_var_props_cnt_;
1241
1242 /*
1243 * number of context variable property values assigned to the
1244 * current code body
1245 */
1246 size_t ctx_var_props_used_;
1247
1248 /* next available local variable context index */
1249 int next_ctx_arr_idx_;
1250
1251 /* reference to the current code body being parsed */
1252 CTcCodeBodyRef *cur_code_body_;
1253
1254 /* flag: 'self' is valid in current code body */
1255 int self_valid_;
1256
1257 /*
1258 * flag: 'self' is used (explicitly or implicitly, such as via a
1259 * property reference or method call) in the current code body
1260 */
1261 int self_referenced_;
1262
1263 /*
1264 * Flag: method context beyond 'self' (targetprop, targetobj,
1265 * definingobj) is referenced (explicitly or implicitly, such as via
1266 * 'inherited' or 'delegated') in the current code body.
1267 */
1268 int full_method_ctx_referenced_;
1269
1270 /*
1271 * Flags: the local context of the outermost code body requires
1272 * 'self'/the full method context to be stored.
1273 */
1274 int local_ctx_needs_self_;
1275 int local_ctx_needs_full_method_ctx_;
1276
1277 /* next available enumerator ID */
1278 ulong next_enum_id_;
1279
1280 /*
1281 * The '+' property - this is the property that defines the
1282 * containment graph for the purposes of the '+' syntax.
1283 */
1284 class CTcSymProp *plus_prop_;
1285
1286 /*
1287 * '+' property location stack. Each time the program defines an
1288 * object using the '+' notation to set the location, we'll update our
1289 * record here of the last object at that depth. Any time an object
1290 * is defined at depth N (i.e., using N '+' signs), its location is
1291 * set to the last object at depth N-1. An object with no '+' signs
1292 * is at depth zero.
1293 */
1294 class CTPNStmObject **plus_stack_;
1295 size_t plus_stack_alloc_;
1296 };
1297
1298 /* ------------------------------------------------------------------------ */
1299 /*
1300 * Statement termination information. This is used for certain nested
1301 * definition parsers, where a lack of termination in the nested
1302 * definition is to be interpreted as being actually caused by a lack of
1303 * termination of the enclosing definition.
1304 */
1305 struct tcprs_term_info
1306 {
1307 /* initialize */
inittcprs_term_info1308 void init(class CTcTokFileDesc *desc, long linenum)
1309 {
1310 /* remember the current location */
1311 desc_ = desc;
1312 linenum_ = linenum;
1313
1314 /* no termination error yet */
1315 unterm_ = FALSE;
1316 }
1317
1318 /*
1319 * source location where original terminator might have been - this is
1320 * where we decided to go into a nested definition, so if it turns out
1321 * that the definintion shouldn't have been nested after all, there
1322 * was missing termination here
1323 */
1324 class CTcTokFileDesc *desc_;
1325 long linenum_;
1326
1327 /*
1328 * flag: termination was in fact missing in the nested definition; the
1329 * nested parser sets this to relay the problem to the caller
1330 */
1331 int unterm_;
1332 };
1333
1334 /* ------------------------------------------------------------------------ */
1335 /*
1336 * Object template list entry
1337 */
1338 class CTcObjTemplate
1339 {
1340 public:
CTcObjTemplate(class CTcObjTemplateItem * item_head,size_t item_cnt)1341 CTcObjTemplate(class CTcObjTemplateItem *item_head, size_t item_cnt)
1342 {
1343 /* remember my item list */
1344 items_ = item_head;
1345 item_cnt_ = item_cnt;
1346
1347 /* not in a list yet */
1348 nxt_ = 0;
1349 }
1350
1351 /* head of list of template items */
1352 class CTcObjTemplateItem *items_;
1353
1354 /* number of items in the list */
1355 size_t item_cnt_;
1356
1357 /* next template in master list of templates */
1358 CTcObjTemplate *nxt_;
1359 };
1360
1361 /*
1362 * Object template list item
1363 */
1364 class CTcObjTemplateItem
1365 {
1366 public:
CTcObjTemplateItem(class CTcSymProp * prop,tc_toktyp_t tok_type,int is_alt,int is_opt)1367 CTcObjTemplateItem(class CTcSymProp *prop, tc_toktyp_t tok_type,
1368 int is_alt, int is_opt)
1369 {
1370 /* remember my defining information */
1371 prop_ = prop;
1372 tok_type_ = tok_type;
1373 is_alt_ = is_alt;
1374 is_opt_ = is_opt;
1375
1376 /* not in a list yet */
1377 nxt_ = 0;
1378 }
1379
1380 /* property that the item in this position defines */
1381 class CTcSymProp *prop_;
1382
1383 /* token type of item in this position */
1384 tc_toktyp_t tok_type_;
1385
1386 /* next item in this template's item list */
1387 CTcObjTemplateItem *nxt_;
1388
1389 /* flag: this item is an alternative to the previous item */
1390 unsigned int is_alt_ : 1;
1391
1392 /* flag: this item is optional */
1393 unsigned int is_opt_ : 1;
1394 };
1395
1396 /*
1397 * Template item instance - we keep track of the actual parameters to a
1398 * template with these items.
1399 */
1400 class CTcObjTemplateInst
1401 {
1402 public:
1403 /*
1404 * expression value for the actual parameter, as either a naked
1405 * expression (expr_) or as a code body (code_body_) - only one of
1406 * expr_ or code_body_ will be valid
1407 */
1408 class CTcPrsNode *expr_;
1409 class CTPNCodeBody *code_body_;
1410
1411 /*
1412 * the introductory token of the parameter - if the parameter is
1413 * introduced by an operator token, this will not be part of the
1414 * expression
1415 */
1416 tc_toktyp_t def_tok_;
1417
1418 /* the first token of the value */
1419 CTcToken expr_tok_;
1420
1421 /*
1422 * The property to which to assign this actual parameter value. This
1423 * isn't filled in until we match the full list to an actual template,
1424 * since we don't know the meanings of the parameters until we match
1425 * the actuals to an existing template in memory.
1426 */
1427 class CTcSymProp *prop_;
1428 };
1429
1430
1431 /* ------------------------------------------------------------------------ */
1432 /*
1433 * Non-symbolic object list entry
1434 */
1435 struct tcprs_nonsym_obj
1436 {
tcprs_nonsym_objtcprs_nonsym_obj1437 tcprs_nonsym_obj(tctarg_obj_id_t id)
1438 {
1439 /* remember the ID */
1440 id_ = id;
1441
1442 /* not in a list yet */
1443 nxt_ = 0;
1444 }
1445
1446 /* ID of this object */
1447 tctarg_obj_id_t id_;
1448
1449 /* next entry in the list */
1450 tcprs_nonsym_obj *nxt_;
1451 };
1452
1453 /* ------------------------------------------------------------------------ */
1454 /*
1455 * Dictionary property list entry. Each time the source code defines a
1456 * dictionary property, we'll make an entry in this list.
1457 */
1458 class CTcDictPropEntry
1459 {
1460 public:
CTcDictPropEntry(class CTcSymProp * prop)1461 CTcDictPropEntry(class CTcSymProp *prop)
1462 {
1463 /* remember the property */
1464 prop_ = prop;
1465
1466 /* not in a list yet */
1467 nxt_ = 0;
1468
1469 /* not defined for current object yet */
1470 defined_ = FALSE;
1471 }
1472
1473 /* my property */
1474 class CTcSymProp *prop_;
1475
1476 /* next entry in list */
1477 CTcDictPropEntry *nxt_;
1478
1479 /* flag: the current object definition includes this property */
1480 unsigned int defined_ : 1;
1481 };
1482
1483 /* ------------------------------------------------------------------------ */
1484 /*
1485 * Dictionary list entry. Each dictionary object gets an entry in this
1486 * list.
1487 */
1488 class CTcDictEntry
1489 {
1490 public:
1491 CTcDictEntry(class CTcSymObj *sym);
1492
1493 /* get/set my object file index */
get_obj_idx()1494 uint get_obj_idx() const { return obj_idx_; }
set_obj_idx(uint idx)1495 void set_obj_idx(uint idx) { obj_idx_ = idx; }
1496
1497 /* get my object symbol */
get_sym()1498 class CTcSymObj *get_sym() const { return sym_; }
1499
1500 /* get/set the next item in the list */
get_next()1501 CTcDictEntry *get_next() const { return nxt_; }
set_next(CTcDictEntry * nxt)1502 void set_next(CTcDictEntry *nxt) { nxt_ = nxt; }
1503
1504 /* add a word to the table */
1505 void add_word(const char *txt, size_t len, int copy,
1506 tc_obj_id obj, tc_prop_id prop);
1507
1508 /* write my symbol to the object file if I haven't already done so */
1509 void write_sym_to_obj_file(CVmFile *fp);
1510
1511 /* get the hash table */
get_hash_table()1512 class CVmHashTable *get_hash_table() const { return hashtab_; }
1513
1514 protected:
1515 /* enumeration callback - write to object file */
1516 static void enum_cb_writeobj(void *ctx, class CVmHashEntry *entry);
1517
1518 /* associated object symbol */
1519 class CTcSymObj *sym_;
1520
1521 /*
1522 * object file index (we use this to match up the dictionary objects
1523 * when we re-load the object file)
1524 */
1525 uint obj_idx_;
1526
1527 /* next item in the dictionary list */
1528 CTcDictEntry *nxt_;
1529
1530 /* hash table containing the word entries */
1531 class CVmHashTable *hashtab_;
1532 };
1533
1534
1535 /*
1536 * entry in a dictionary list
1537 */
1538 struct CTcPrsDictItem
1539 {
CTcPrsDictItemCTcPrsDictItem1540 CTcPrsDictItem(tc_obj_id obj, tc_prop_id prop)
1541 {
1542 obj_ = obj;
1543 prop_ = prop;
1544 nxt_ = 0;
1545 }
1546
1547 /* object */
1548 tc_obj_id obj_;
1549
1550 /* property */
1551 tc_prop_id prop_;
1552
1553 /* next entry in list */
1554 CTcPrsDictItem *nxt_;
1555 };
1556
1557 /*
1558 * Parser dictionary hash table entry
1559 */
1560 class CVmHashEntryPrsDict: public CVmHashEntryCS
1561 {
1562 public:
CVmHashEntryPrsDict(const char * txt,size_t len,int copy)1563 CVmHashEntryPrsDict(const char *txt, size_t len, int copy)
1564 : CVmHashEntryCS(txt, len, copy)
1565 {
1566 /* nothing in my list yet */
1567 list_ = 0;
1568 }
1569
1570 /* add an item to my list */
1571 void add_item(tc_obj_id obj, tc_prop_id prop);
1572
1573 /* get the list head */
get_list()1574 struct CTcPrsDictItem *get_list() const { return list_; }
1575
1576 protected:
1577 /* list of object/property associations with this word */
1578 struct CTcPrsDictItem *list_;
1579 };
1580
1581 /* ------------------------------------------------------------------------ */
1582 /*
1583 * State save structure for parsing property expressions
1584 */
1585 class CTcPrsPropExprSave
1586 {
1587 public:
1588 unsigned int has_local_ctx_ : 1;
1589 int local_ctx_var_num_;
1590 size_t ctx_var_props_used_;
1591 int next_ctx_arr_idx_;
1592 int self_referenced_;
1593 int full_method_ctx_referenced_;
1594 int local_ctx_needs_self_;
1595 int local_ctx_needs_full_method_ctx_;
1596 struct CTcCodeBodyRef *cur_code_body_;
1597 };
1598
1599 /* ------------------------------------------------------------------------ */
1600 /*
1601 * Grammar production list entry
1602 */
1603 class CTcGramProdEntry
1604 {
1605 public:
1606 CTcGramProdEntry(class CTcSymObj *prod_obj);
1607
1608 /* get my production object symbol */
get_prod_sym()1609 class CTcSymObj *get_prod_sym() const { return prod_sym_; }
1610
1611 /* get/set the next item in the list */
get_next()1612 CTcGramProdEntry *get_next() const { return nxt_; }
set_next(CTcGramProdEntry * nxt)1613 void set_next(CTcGramProdEntry *nxt) { nxt_ = nxt; }
1614
1615 /* add an alternative */
1616 void add_alt(class CTcGramProdAlt *alt);
1617
1618 /* get the alternative list head */
get_alt_head()1619 class CTcGramProdAlt *get_alt_head() const { return alt_head_; }
1620
1621 /* write to an object file */
1622 void write_to_obj_file(class CVmFile *fp);
1623
1624 /* load from an object file */
1625 static void load_from_obj_file(class CVmFile *fp,
1626 const tctarg_prop_id_t *prop_xlat,
1627 const ulong *enum_xlat,
1628 class CTcSymObj *private_owner);
1629
1630 /* move alternatives from my list to the given target list */
1631 void move_alts_to(CTcGramProdEntry *new_entry);
1632
1633 /* get/set explicitly-declared flag */
is_declared()1634 int is_declared() const { return is_declared_; }
set_declared(int f)1635 void set_declared(int f) { is_declared_ = f; }
1636
1637 protected:
1638 /* associated production object symbol */
1639 class CTcSymObj *prod_sym_;
1640
1641 /* next item in the list */
1642 CTcGramProdEntry *nxt_;
1643
1644 /* head and tail of alternative list */
1645 class CTcGramProdAlt *alt_head_;
1646 class CTcGramProdAlt *alt_tail_;
1647
1648 /*
1649 * flag: this production was explicitly declared (this means that we
1650 * will consider it valid at link time even if it has no alternatives
1651 * defined)
1652 */
1653 unsigned int is_declared_ : 1;
1654 };
1655
1656 /*
1657 * Grammar production alternative. Each grammar production has one or
1658 * more alternatives that, when matched, generate the production.
1659 */
1660 class CTcGramProdAlt
1661 {
1662 public:
1663 CTcGramProdAlt(class CTcSymObj *obj_sym, class CTcDictEntry *dict);
1664
1665 /* get/set my score */
get_score()1666 int get_score() const { return score_; }
set_score(int score)1667 void set_score(int score) { score_ = score; }
1668
1669 /* get/set my badness */
get_badness()1670 int get_badness() const { return badness_; }
set_badness(int badness)1671 void set_badness(int badness) { badness_ = badness; }
1672
1673 /* get my processor object symbol */
get_processor_obj()1674 class CTcSymObj *get_processor_obj() const { return obj_sym_; }
1675
1676 /* get/set the next list element */
get_next()1677 CTcGramProdAlt *get_next() const { return nxt_; }
set_next(CTcGramProdAlt * nxt)1678 void set_next(CTcGramProdAlt *nxt) { nxt_ = nxt; }
1679
1680 /* add a token to my list */
1681 void add_tok(class CTcGramProdTok *tok);
1682
1683 /* get the head of my token list */
get_tok_head()1684 class CTcGramProdTok *get_tok_head() const { return tok_head_; }
1685
1686 /* write to an object file */
1687 void write_to_obj_file(class CVmFile *fp);
1688
1689 /* load from an object file */
1690 static CTcGramProdAlt *
1691 load_from_obj_file(class CVmFile *fp,
1692 const tctarg_prop_id_t *prop_xlat,
1693 const ulong *enum_xlat);
1694
1695 /* get the dictionary in effect when the alternative was defined */
get_dict()1696 class CTcDictEntry *get_dict() const { return dict_; }
1697
1698 protected:
1699 /* head and tail of our token list */
1700 class CTcGramProdTok *tok_head_;
1701 class CTcGramProdTok *tok_tail_;
1702
1703 /* dictionary in effect when alternative was defined */
1704 class CTcDictEntry *dict_;
1705
1706 /* the processor object associated with this alternative */
1707 class CTcSymObj *obj_sym_;
1708
1709 /* next alternative in our production */
1710 CTcGramProdAlt *nxt_;
1711
1712 /* score */
1713 int score_;
1714
1715 /* badness */
1716 int badness_;
1717 };
1718
1719 /* grammar production token types */
1720 enum tcgram_tok_type
1721 {
1722 /* unknown */
1723 TCGRAM_UNKNOWN,
1724
1725 /* match a production (given by the production object) */
1726 TCGRAM_PROD,
1727
1728 /* match a part of speech (given by the dictionary property) */
1729 TCGRAM_PART_OF_SPEECH,
1730
1731 /* match a literal string */
1732 TCGRAM_LITERAL,
1733
1734 /* token-type match */
1735 TCGRAM_TOKEN_TYPE,
1736
1737 /* free-floating end-of-string */
1738 TCGRAM_STAR,
1739
1740 /* match one of several parts of speech */
1741 TCGRAM_PART_OF_SPEECH_LIST
1742 };
1743
1744 /*
1745 * Grammar production alternative token
1746 */
1747 class CTcGramProdTok
1748 {
1749 public:
CTcGramProdTok()1750 CTcGramProdTok()
1751 {
1752 /* not in a list yet */
1753 nxt_ = 0;
1754
1755 /* no type yet */
1756 typ_ = TCGRAM_UNKNOWN;
1757
1758 /* no property association yte */
1759 prop_assoc_ = TCTARG_INVALID_PROP;
1760 }
1761
1762 /* get/set my next element */
get_next()1763 CTcGramProdTok *get_next() const { return nxt_; }
set_next(CTcGramProdTok * nxt)1764 void set_next(CTcGramProdTok *nxt) { nxt_ = nxt; }
1765
1766 /* set me to match a production object */
set_match_prod(class CTcSymObj * obj)1767 void set_match_prod(class CTcSymObj *obj)
1768 {
1769 /* remember the production object */
1770 typ_ = TCGRAM_PROD;
1771 val_.obj_ = obj;
1772 }
1773
1774 /* set me to match a token type */
set_match_token_type(ulong enum_id)1775 void set_match_token_type(ulong enum_id)
1776 {
1777 /* remember the token enum ID */
1778 typ_ = TCGRAM_TOKEN_TYPE;
1779 val_.enum_id_ = enum_id;
1780 }
1781
1782 /* set me to match a dictionary property */
set_match_part_of_speech(tctarg_prop_id_t prop)1783 void set_match_part_of_speech(tctarg_prop_id_t prop)
1784 {
1785 /* remember the part of speech */
1786 typ_ = TCGRAM_PART_OF_SPEECH;
1787 val_.prop_ = prop;
1788 }
1789
1790 /*
1791 * set me to match a list of parts of speech; each part of speech must
1792 * be separately added via add_match_part_ele()
1793 */
1794 void set_match_part_list();
1795
1796 /* add an element to the part-of-speech match list */
1797 void add_match_part_ele(tctarg_prop_id_t prop);
1798
1799 /* set me to match a literal string */
set_match_literal(const char * txt,size_t len)1800 void set_match_literal(const char *txt, size_t len)
1801 {
1802 /* remember the string */
1803 typ_ = TCGRAM_LITERAL;
1804 val_.str_.txt_ = txt;
1805 val_.str_.len_ = len;
1806 }
1807
1808 /* set me to match a free-floating end-of-string */
set_match_star()1809 void set_match_star()
1810 {
1811 /* set the type */
1812 typ_ = TCGRAM_STAR;
1813 }
1814
1815 /* get my type */
get_type()1816 tcgram_tok_type get_type() const { return typ_; }
1817
1818 /* get my value */
getval_prod()1819 class CTcSymObj *getval_prod() const { return val_.obj_; }
getval_part_of_speech()1820 tctarg_prop_id_t getval_part_of_speech() const { return val_.prop_; }
getval_literal_txt()1821 const char *getval_literal_txt() const { return val_.str_.txt_; }
getval_literal_len()1822 const size_t getval_literal_len() const { return val_.str_.len_; }
getval_token_type()1823 ulong getval_token_type() const { return val_.enum_id_; }
getval_part_list_len()1824 size_t getval_part_list_len() const { return val_.prop_list_.len_; }
getval_part_list_ele(size_t idx)1825 tctarg_prop_id_t getval_part_list_ele(size_t idx) const
1826 { return val_.prop_list_.arr_[idx]; }
1827
1828 /*
1829 * get/set my property association - this is the property to which
1830 * the actual match to the rule is assigned when we match the rule
1831 */
get_prop_assoc()1832 tctarg_prop_id_t get_prop_assoc() const { return prop_assoc_; }
set_prop_assoc(tctarg_prop_id_t prop)1833 void set_prop_assoc(tctarg_prop_id_t prop) { prop_assoc_ = prop; }
1834
1835 /* write to an object file */
1836 void write_to_obj_file(class CVmFile *fp);
1837
1838 /* load from an object file */
1839 static CTcGramProdTok *
1840 load_from_obj_file(class CVmFile *fp,
1841 const tctarg_prop_id_t *prop_xlat,
1842 const ulong *enum_xlat);
1843
1844 protected:
1845 /* next token in my list */
1846 CTcGramProdTok *nxt_;
1847
1848 /* my type - this specifies how this token matches */
1849 tcgram_tok_type typ_;
1850
1851 /* match specification - varies according to my type */
1852 union
1853 {
1854 /* object - for matching a production */
1855 class CTcSymObj *obj_;
1856
1857 /* property - for matching a part of speech */
1858 tctarg_prop_id_t prop_;
1859
1860 /* token enum id - for matching a token type */
1861 ulong enum_id_;
1862
1863 /* literal string */
1864 struct
1865 {
1866 const char *txt_;
1867 size_t len_;
1868 } str_;
1869
1870 /* list of vocabulary elements */
1871 struct
1872 {
1873 /* number of array entries allocated */
1874 size_t alo_;
1875
1876 /* number of array entries actually used */
1877 size_t len_;
1878
1879 /* array of entries */
1880 tctarg_prop_id_t *arr_;
1881 } prop_list_;
1882 } val_;
1883
1884 /* property association */
1885 tctarg_prop_id_t prop_assoc_;
1886 };
1887
1888 /* ------------------------------------------------------------------------ */
1889 /*
1890 * Exported symbol record
1891 */
1892 class CTcPrsExport
1893 {
1894 public:
1895 /* create with the given compiler symbol */
CTcPrsExport(const char * sym,size_t sym_len)1896 CTcPrsExport(const char *sym, size_t sym_len)
1897 {
1898 /* remember my name */
1899 sym_ = sym;
1900 sym_len_ = sym_len;
1901
1902 /*
1903 * we don't yet have an explicit external name, so export using
1904 * the internal name
1905 */
1906 ext_name_ = sym;
1907 ext_len_ = sym_len;
1908
1909 /* we're not in a list yet */
1910 nxt_ = 0;
1911 }
1912
1913 /* set the external name */
set_extern_name(const char * txt,size_t len)1914 void set_extern_name(const char *txt, size_t len)
1915 {
1916 ext_name_ = txt;
1917 ext_len_ = len;
1918 }
1919
1920 /* get the symbol name and length */
get_sym()1921 const char *get_sym() const { return sym_; }
get_sym_len()1922 size_t get_sym_len() const { return sym_len_; }
1923
1924 /* get the external name and length */
get_ext_name()1925 const char *get_ext_name() const { return ext_name_; }
get_ext_len()1926 size_t get_ext_len() const { return ext_len_; }
1927
1928 /* get/set the next entry in the list */
get_next()1929 CTcPrsExport *get_next() const { return nxt_; }
set_next(CTcPrsExport * nxt)1930 void set_next(CTcPrsExport *nxt) { nxt_ = nxt; }
1931
1932 /* write to an object file */
1933 void write_to_obj_file(class CVmFile *fp);
1934
1935 /* read from an object file */
1936 static CTcPrsExport *read_from_obj_file(class CVmFile *fp);
1937
1938 /* determine if my external name matches the given export's */
ext_name_matches(const CTcPrsExport * exp)1939 int ext_name_matches(const CTcPrsExport *exp) const
1940 {
1941 return (exp->get_ext_len() == get_ext_len()
1942 && memcmp(exp->get_ext_name(), get_ext_name(),
1943 get_ext_len()) == 0);
1944 }
1945
1946 /* determine if my name matches the given string */
ext_name_matches(const char * txt)1947 int ext_name_matches(const char *txt) const
1948 {
1949 return (get_ext_len() == get_strlen(txt)
1950 && memcmp(get_ext_name(), txt, get_ext_len()) == 0);
1951 }
1952
1953 /* determine if my symbol name matches the given export's */
sym_matches(const CTcPrsExport * exp)1954 int sym_matches(const CTcPrsExport *exp) const
1955 {
1956 return (exp->get_sym_len() == get_sym_len()
1957 && memcmp(exp->get_sym(), get_sym(), get_sym_len()) == 0);
1958 }
1959
1960 protected:
1961 /* symbol name - this is the internal compiler symbol being exported */
1962 const char *sym_;
1963 size_t sym_len_;
1964
1965 /* external name - this is the name visible to the VM loader */
1966 const char *ext_name_;
1967 size_t ext_len_;
1968
1969 /* next in list */
1970 CTcPrsExport *nxt_;
1971 };
1972
1973
1974 /* ------------------------------------------------------------------------ */
1975 /*
1976 * Parser Symbol Table. The parser maintains a hierarchy of symbol
1977 * tables; a local symbol table can be nested inside an enclosing
1978 * scope's symbol table, and so on up to the top-level block scope,
1979 * which is enclosed by the global scope. In addition, at function
1980 * scope there's a separate table for "goto" labels.
1981 */
1982
1983 /* find_or_def actions for undefined symbols */
1984 enum tcprs_undef_action
1985 {
1986 /* if undefined, add an "undefined" entry unconditionally */
1987 TCPRS_UNDEF_ADD_UNDEF,
1988
1989 /* add a "property" entry unconditionally, but warn about it */
1990 TCPRS_UNDEF_ADD_PROP,
1991
1992 /* add a "property" entry unconditionally, with no warning */
1993 TCPRS_UNDEF_ADD_PROP_NO_WARNING
1994 };
1995
1996 /* parser symbol table */
1997 class CTcPrsSymtab
1998 {
1999 public:
2000 CTcPrsSymtab(CTcPrsSymtab *parent_scope);
2001 ~CTcPrsSymtab();
2002
2003 /* allocate parser symbol tables out of the parser memory pool */
2004 void *operator new(size_t siz);
2005
2006 /*
2007 * perform static initialization/termination - call once at program
2008 * startup and shutdown (respectively)
2009 */
2010 static void s_init();
2011 static void s_terminate();
2012
2013 /* get the enclosing scope's symbol table */
get_parent()2014 CTcPrsSymtab *get_parent() const { return parent_; }
2015
2016 /* find a symbol; returns null if the symbol isn't defined */
find(const textchar_t * sym,size_t len)2017 class CTcSymbol *find(const textchar_t *sym, size_t len)
2018 { return find(sym, len, 0); }
2019
find(const textchar_t * sym)2020 class CTcSymbol *find(const textchar_t *sym)
2021 { return find(sym, strlen(sym), 0); }
2022
2023 /*
2024 * Find a symbol; returns null if the symbol isn't defined. If
2025 * symtab is not null, we'll fill it in with the actual symbol table
2026 * in which we found the symbol; this might be an enclosing symbol
2027 * table, since we search up the enclosing scope list.
2028 */
2029 class CTcSymbol *find(const textchar_t *sym, size_t len,
2030 CTcPrsSymtab **symtab);
2031
2032 /* find a symbol without changing its referenced status */
2033 class CTcSymbol *find_noref(const textchar_t *sym, size_t len,
2034 CTcPrsSymtab **symtab);
2035
2036 /*
2037 * Find a symbol; if the symbol isn't defined, log an error and add
2038 * the symbol as type "undefined". Because we add a symbol entry if
2039 * the symbol isn't defined, this *always* returns a valid symbol
2040 * object.
2041 */
find_or_def_undef(const char * sym,size_t len,int copy_str)2042 class CTcSymbol *find_or_def_undef(const char *sym, size_t len,
2043 int copy_str)
2044 {
2045 return find_or_def(sym, len, copy_str, TCPRS_UNDEF_ADD_UNDEF);
2046 }
2047
2048 /*
2049 * Find a symbol; if the symbol isn't defined, log a warning and
2050 * define the symbol as type property. Because we add an entry if
2051 * the symbol isn't defined, this *always* returns a valid symbol
2052 * object.
2053 */
find_or_def_prop(const char * sym,size_t len,int copy_str)2054 class CTcSymbol *find_or_def_prop(const char *sym, size_t len,
2055 int copy_str)
2056 {
2057 return find_or_def(sym, len, copy_str, TCPRS_UNDEF_ADD_PROP);
2058 }
2059
2060 /*
2061 * Find a symbol; if the symbol isn't defined, define the symbol as
2062 * type property with no warning. This should be used when it is
2063 * unambiguous that a symbol is meant as a property name. Because we
2064 * add an entry if the symbol isn't defined, this *always* returns a
2065 * valid symbol object.
2066 */
find_or_def_prop_explicit(const char * sym,size_t len,int copy_str)2067 class CTcSymbol *find_or_def_prop_explicit(const char *sym, size_t len,
2068 int copy_str)
2069 {
2070 return find_or_def(sym, len, copy_str,
2071 TCPRS_UNDEF_ADD_PROP_NO_WARNING);
2072 }
2073
2074 /*
2075 * Find a symbol. If the symbol isn't defined, and a "self" object
2076 * is available, define the symbol as a property. If the symbol
2077 * isn't defined an no "self" object is available, add an
2078 * "undefined" entry for the symbol.
2079 */
find_or_def_prop_implied(const char * sym,size_t len,int copy_str,int is_self_avail)2080 class CTcSymbol *find_or_def_prop_implied(const char *sym, size_t len,
2081 int copy_str, int is_self_avail)
2082 {
2083 return find_or_def(sym, len, copy_str,
2084 is_self_avail
2085 ? TCPRS_UNDEF_ADD_PROP : TCPRS_UNDEF_ADD_UNDEF);
2086 }
2087
2088 /* add a formal parameter symbol */
2089 void add_formal(const textchar_t *sym, size_t len, int formal_num,
2090 int copy_str);
2091
2092 /* add a local variable symbol */
2093 class CTcSymLocal *add_local(const textchar_t *sym, size_t len,
2094 int local_num, int copy_str,
2095 int init_assigned, int init_referenced);
2096
2097 /* add a 'goto' symbol */
2098 class CTcSymLabel *add_code_label(const textchar_t *sym, size_t len,
2099 int copy_str);
2100
2101 /* add an entry to the table */
2102 void add_entry(class CTcSymbol *sym);
2103
2104 /* remove an entry */
2105 void remove_entry(class CTcSymbol *sym);
2106
2107 /* enumerate entries in the table through a callback */
2108 void enum_entries(void (*func)(void *, class CTcSymbol *), void *ctx);
2109
2110 /*
2111 * Scan the symbol table and check for unreferenced locals. Logs an
2112 * error for each unreferenced or unassigned local.
2113 */
2114 void check_unreferenced_locals();
2115
2116 /*
2117 * Get/set my debugging list index - this is the index of this table
2118 * in the list for this function or method. The index values start
2119 * at 1 - a value of zero indicates that the symbol table isn't part
2120 * of any list.
2121 */
get_list_index()2122 int get_list_index() const { return list_index_; }
set_list_index(int n)2123 void set_list_index(int n) { list_index_ = n; }
2124
2125 /* get/set the next entry in the linked list */
get_list_next()2126 CTcPrsSymtab *get_list_next() const { return list_next_; }
set_list_next(CTcPrsSymtab * nxt)2127 void set_list_next(CTcPrsSymtab *nxt) { list_next_ = nxt; }
2128
2129 protected:
2130 /* add an entry to a global symbol table */
2131 static void add_to_global_symtab(CTcPrsSymtab *tab, CTcSymbol *entry);
2132
2133 /* get the underlying hash table */
get_hashtab()2134 class CVmHashTable *get_hashtab() const { return hashtab_; }
2135
2136 /* enumeration callback - check for unreferenced locals */
2137 static void unref_local_cb(void *ctx, class CTcSymbol *sym);
2138
2139 /*
2140 * find a symbol, or define a new symbol, according to the given
2141 * action mode, if the symbol is undefined
2142 */
2143 class CTcSymbol *find_or_def(const textchar_t *sym, size_t len,
2144 int copy_str, tcprs_undef_action action);
2145
2146 /* enclosing scope (parent) symbol table */
2147 CTcPrsSymtab *parent_;
2148
2149 /* hash table */
2150 class CVmHashTable *hashtab_;
2151
2152 /* hash function */
2153 static class CVmHashFunc *hash_func_;
2154
2155 /*
2156 * Next symbol table in local scope chain. For each function or
2157 * method, we keep a simple linear list of the local scopes so that
2158 * they can be written to the debugging records. We also keep an
2159 * index value giving its position in the list, so that we can store
2160 * references to the table using the list index.
2161 */
2162 CTcPrsSymtab *list_next_;
2163 int list_index_;
2164 };
2165
2166
2167 /* ------------------------------------------------------------------------ */
2168 /*
2169 * Debugger symbol table interface. This is an abstract interface that
2170 * debuggers can implement to allow us to search for symbols that are
2171 * obtained from a compiled program's debugger records. To keep the
2172 * compiler independent of the target architecture and the debugger's
2173 * own internal structures, we define this abstract interface that the
2174 * debugger must implement.
2175 *
2176 * Since this type of symbol table is provided by a debugger as a view
2177 * on the symbol information in a previously compiled program, the
2178 * parser naturally has no need to add symbols to the table; hence the
2179 * only required operations are symbol lookups.
2180 */
2181 class CTcPrsDbgSymtab
2182 {
2183 public:
2184 /*
2185 * Get information on a symbol. Returns true if the symbol is
2186 * found, false if not. If we find the symbol, fills in the
2187 * information structure with the appropriate data.
2188 */
2189 virtual int find_symbol(const textchar_t *sym, size_t len,
2190 struct tcprsdbg_sym_info *info) = 0;
2191 };
2192
2193 /*
2194 * Debugger local symbol information structure
2195 */
2196 struct tcprsdbg_sym_info
2197 {
2198 /* symbol type */
2199 enum tc_symtype_t sym_type;
2200
2201 /* local/parameter number */
2202 uint var_id;
2203
2204 /* context variable index - 0 if it's not a context local */
2205 int ctx_arr_idx;
2206
2207 /* stack frame index */
2208 uint frame_idx;
2209 };
2210
2211
2212
2213 /* ------------------------------------------------------------------------ */
2214 /*
2215 * Parse Tree storage manager.
2216 *
2217 * The parse tree has some special characteristics that make it
2218 * desirable to use a special memory manager for it. First, the parse
2219 * tree consists of many small objects, so we would like to have as
2220 * little overhead per object for memory tracking as possible. Second,
2221 * parse tree objects all have a similar lifetime: we create the entire
2222 * parse tree as we scan the source, then use it to generate target
2223 * code, then discard the whole thing.
2224 *
2225 * To manage memory efficiently for the parse tree, we define our own
2226 * memory manager for parse tree objects. The memory manager is very
2227 * simple, fast, and has minimal per-object overhead. We simply
2228 * maintain a list of large blocks, then suballocate requests out of the
2229 * large blocks. Each time we run out of space in a block, we allocate
2230 * a new block. We do not keep track of any extra tracking information
2231 * per node, so a node cannot be individually freed; however, the entire
2232 * block list can be freed at once, which is exactly the behavior we
2233 * want.
2234 */
2235 class CTcPrsMem
2236 {
2237 public:
2238 CTcPrsMem();
2239 ~CTcPrsMem();
2240
2241 /* allocate storage */
2242 void *alloc(size_t siz);
2243
2244 /* save the current pool state, for later resetting */
2245 void save_state(struct tcprsmem_state_t *state);
2246
2247 /*
2248 * reset the pool to the given state - delete all objects allocated
2249 * in the pool since the state was saved
2250 */
2251 void reset(const struct tcprsmem_state_t *state);
2252
2253 /* reset to initial state */
2254 void reset();
2255
2256 private:
2257 /* delete all parser memory */
2258 void delete_all();
2259
2260 /* allocate a new block */
2261 void alloc_block();
2262
2263 /* head of list of memory blocks */
2264 struct tcprsmem_blk_t *head_;
2265
2266 /* tail of list and current memory block */
2267 struct tcprsmem_blk_t *tail_;
2268
2269 /* current allocation offset in last block */
2270 char *free_ptr_;
2271
2272 /* remaining space available in last block */
2273 size_t rem_;
2274 };
2275
2276 /*
2277 * state-saving structure
2278 */
2279 struct tcprsmem_state_t
2280 {
2281 /* current tail of memory block list */
2282 struct tcprsmem_blk_t *tail;
2283
2284 /* current allocation offset in last block */
2285 char *free_ptr;
2286
2287 /* current remaining space in last block */
2288 size_t rem;
2289 };
2290
2291
2292 /*
2293 * Provide an overridden operator new for allocating objects explicitly
2294 * from the pool
2295 */
new(size_t siz,CTcPrsMem * pool)2296 inline void *operator new(size_t siz, CTcPrsMem *pool)
2297 {
2298 return pool->alloc(siz);
2299 }
2300
2301 /*
2302 * provide an array operator new as well
2303 */
2304 inline void *operator new[](size_t siz, CTcPrsMem *pool)
2305 {
2306 return pool->alloc(siz);
2307 }
2308
2309
2310 /*
2311 * parse tree memory block
2312 */
2313 struct tcprsmem_blk_t
2314 {
2315 /* next block in the list */
2316 tcprsmem_blk_t *next_;
2317
2318 /*
2319 * This block's byte array (the array extends off the end of the
2320 * structure).
2321 */
2322 char buf_[1];
2323 };
2324
2325 /* ------------------------------------------------------------------------ */
2326 /*
2327 * Special array list subclass that uses parser memory
2328 */
2329 class CPrsArrayList: public CArrayList
2330 {
2331 protected:
2332 /*
2333 * override the memory management functions to use parser memory
2334 */
2335
alloc_mem(size_t siz)2336 virtual void *alloc_mem(size_t siz)
2337 {
2338 /* allocate from the parser pool */
2339 return G_prsmem->alloc(siz);
2340 }
2341
realloc_mem(void * p,size_t oldsiz,size_t newsiz)2342 virtual void *realloc_mem(void *p, size_t oldsiz, size_t newsiz)
2343 {
2344 void *pnew;
2345
2346 /* allocate a new block from the parser pool */
2347 pnew = G_prsmem->alloc(newsiz);
2348
2349 /* copy from the old block to the new block */
2350 memcpy(pnew, p, oldsiz);
2351
2352 /* return the new block */
2353 return pnew;
2354 }
2355
free_mem(void * p)2356 virtual void free_mem(void *p)
2357 {
2358 /*
2359 * do nothing - the parser pool automatically frees everything as a
2360 * block when terminating the parser
2361 */
2362 }
2363 };
2364
2365
2366 /* ------------------------------------------------------------------------ */
2367 /*
2368 * Expression Constant Value object. This object is used to express the
2369 * value of a constant expression.
2370 */
2371 class CTcConstVal
2372 {
2373 public:
CTcConstVal()2374 CTcConstVal()
2375 {
2376 /* the type is unknown */
2377 typ_ = TC_CVT_UNK;
2378 }
2379
2380 /*
2381 * determine if this is a constant value - it is a constant if it
2382 * has any known value
2383 */
is_const()2384 int is_const() const { return (typ_ != TC_CVT_UNK); }
2385
2386 /*
2387 * set the type to unknown - this indicates that there is no valid
2388 * value, which generally means that the associated expression does
2389 * not have a constant value
2390 */
set_unknown()2391 void set_unknown() { typ_ = TC_CVT_UNK; }
2392
2393 /* set from another value */
set(const CTcConstVal * val)2394 void set(const CTcConstVal *val)
2395 {
2396 /* copy the type */
2397 typ_ = val->typ_;
2398
2399 /* copy the value */
2400 val_ = val->val_;
2401 }
2402
2403 /* set an integer value */
set_int(long val)2404 void set_int(long val) { typ_ = TC_CVT_INT; val_.intval_ = val; }
2405
2406 /* set a floating-point value */
set_float(const char * val,size_t len)2407 void set_float(const char *val, size_t len)
2408 {
2409 typ_ = TC_CVT_FLOAT;
2410 val_.floatval_.txt_ = val;
2411 val_.floatval_.len_ = len;
2412 }
2413
2414 /* set an enumerator value */
set_enum(ulong val)2415 void set_enum(ulong val) { typ_ = TC_CVT_ENUM; val_.enumval_ = val; }
2416
2417 /* set a single-quoted string value */
2418 void set_sstr(const char *val, size_t len);
2419
2420 /* set a list value */
2421 void set_list(class CTPNList *lst);
2422
2423 /* set an object reference value */
set_obj(ulong obj)2424 void set_obj(ulong obj)
2425 {
2426 typ_ = TC_CVT_OBJ;
2427 val_.objval_ = obj;
2428 }
2429
2430 /* set a property pointer value */
set_prop(uint prop)2431 void set_prop(uint prop)
2432 {
2433 typ_ = TC_CVT_PROP;
2434 val_.propval_ = prop;
2435 }
2436
2437 /* set a function pointer value */
set_funcptr(class CTcSymFunc * sym)2438 void set_funcptr(class CTcSymFunc *sym)
2439 {
2440 typ_ = TC_CVT_FUNCPTR;
2441 val_.funcptrval_ = sym;
2442 }
2443
2444 /* set an anonymous function pointer value */
set_anon_funcptr(class CTPNCodeBody * code_body)2445 void set_anon_funcptr(class CTPNCodeBody *code_body)
2446 {
2447 typ_ = TC_CVT_ANONFUNCPTR;
2448 val_.codebodyval_ = code_body;
2449 }
2450
2451 /* set a nil/true value */
set_nil()2452 void set_nil() { typ_ = TC_CVT_NIL; }
set_true()2453 void set_true() { typ_ = TC_CVT_TRUE; }
2454
2455 /*
2456 * Set a vocabulary list placeholder. This has no actual value
2457 * during compilation; instead, this is just a placeholder. During
2458 * linking, we'll replace each of these with a list of strings
2459 * giving the actual vocabulary for the property.
2460 */
set_vocab_list()2461 void set_vocab_list() { typ_ = TC_CVT_VOCAB_LIST; }
2462
2463 /* set a nil/true value based on a boolean value */
set_bool(int val)2464 void set_bool(int val)
2465 {
2466 typ_ = (val ? TC_CVT_TRUE : TC_CVT_NIL);
2467 }
2468
2469 /* get my type */
get_type()2470 tc_constval_type_t get_type() const { return typ_; }
2471
2472 /* get my int value (no type checking) */
get_val_int()2473 long get_val_int() const { return val_.intval_; }
2474
2475 /* get my floating point value (no type checking) */
get_val_float()2476 const char *get_val_float() const { return val_.floatval_.txt_; }
get_val_float_len()2477 size_t get_val_float_len() const { return val_.floatval_.len_; }
2478
2479 /* get my enumerator value (no type checking) */
get_val_enum()2480 ulong get_val_enum() const { return val_.enumval_; }
2481
2482 /* get my string value (no type checking) */
get_val_str()2483 const char *get_val_str() const { return val_.strval_.strval_; }
get_val_str_len()2484 size_t get_val_str_len() const { return val_.strval_.strval_len_; }
2485
2486 /* get my list value (no type checking) */
get_val_list()2487 class CTPNList *get_val_list() const { return val_.listval_; }
2488
2489 /* get my object reference value (no type checking) */
get_val_obj()2490 ulong get_val_obj() const { return val_.objval_; }
2491
2492 /* get my property pointer value (no type checking) */
get_val_prop()2493 uint get_val_prop() const { return val_.propval_; }
2494
2495 /* get my function pointer symbol value (no type checking) */
get_val_funcptr_sym()2496 class CTcSymFunc *get_val_funcptr_sym() const
2497 { return val_.funcptrval_; }
2498
2499 /* get my anonymous function pointer value (no type checking) */
get_val_anon_func_ptr()2500 class CTPNCodeBody *get_val_anon_func_ptr() const
2501 { return val_.codebodyval_; }
2502
2503 /*
2504 * Determine if this value equals a given constant value. Returns
2505 * true if so, false if not. We'll set (*can_compare) to true if
2506 * the values are comparable, false if the comparison is not
2507 * meaningful.
2508 */
2509 int is_equal_to(const CTcConstVal *val) const;
2510
2511 /*
2512 * Convert an integer, nil, or true value to a string. Fills in the
2513 * buffer with the result of the conversion if the value wasn't
2514 * already a string. If the value is already a string, we'll simply
2515 * return a pointer to the original string without making a copy.
2516 * Returns null if the value is not convertible to a string.
2517 */
2518 const char *cvt_to_str(char *buf, size_t bufl, size_t *result_len);
2519
2520 /*
2521 * Get my true/nil value. Returns false if the value is nil or zero,
2522 * true if it's anything else.
2523 */
get_val_bool()2524 int get_val_bool() const
2525 {
2526 return !(typ_ == TC_CVT_NIL
2527 || (typ_ == TC_CVT_INT && get_val_int() == 0));
2528 }
2529
2530 private:
2531 /* my type */
2532 tc_constval_type_t typ_;
2533
2534 union
2535 {
2536 /* integer value (valid when typ_ == TC_CVT_INT) */
2537 long intval_;
2538
2539 /* floating-point value (valid when typ_ == TC_CVT_FLOAT) */
2540 struct
2541 {
2542 const char *txt_;
2543 size_t len_;
2544 }
2545 floatval_;
2546
2547 /* enumerator value (valid when typ_ == TC_CVT_ENUM) */
2548 ulong enumval_;
2549
2550 /*
2551 * String value (valid when typ_ == TC_CVT_TYPE_SSTR). We need
2552 * to know the length separately, because the underyling string
2553 * may not be null-terminated.
2554 */
2555 struct
2556 {
2557 const char *strval_;
2558 size_t strval_len_;
2559 }
2560 strval_;
2561
2562 /* my list value */
2563 class CTPNList *listval_;
2564
2565 /* property ID value */
2566 uint propval_;
2567
2568 /* object reference value */
2569 ulong objval_;
2570
2571 /*
2572 * function pointer value - we store the underlying symbol,
2573 * since function pointers are generally not resolved until late
2574 * in the compilation
2575 */
2576 class CTcSymFunc *funcptrval_;
2577
2578 /*
2579 * code body pointer value - we store the underlying code body
2580 * for anonymous functions
2581 */
2582 class CTPNCodeBody *codebodyval_;
2583 } val_;
2584 };
2585
2586
2587 /* ------------------------------------------------------------------------ */
2588 /*
2589 * Assignment Types.
2590 */
2591
2592 enum tc_asitype_t
2593 {
2594 /* simple assignment: x = 1 */
2595 TC_ASI_SIMPLE,
2596
2597 /* add to: x += 1 */
2598 TC_ASI_ADD,
2599
2600 /* subtract from: x -= 1 */
2601 TC_ASI_SUB,
2602
2603 /* multiply by: x *= 1 */
2604 TC_ASI_MUL,
2605
2606 /* divide by: x /= 1 */
2607 TC_ASI_DIV,
2608
2609 /* modulo: x %= 1 */
2610 TC_ASI_MOD,
2611
2612 /* bitwise-and with: x &= 1 */
2613 TC_ASI_BAND,
2614
2615 /* bitwise-or with: x |= 1 */
2616 TC_ASI_BOR,
2617
2618 /* bitwise-xor with: x ^= 1 */
2619 TC_ASI_BXOR,
2620
2621 /* shift left: x <<= 1 */
2622 TC_ASI_SHL,
2623
2624 /* shift right: x >>= 1 */
2625 TC_ASI_SHR,
2626
2627 /* pre-increment */
2628 TC_ASI_PREINC,
2629
2630 /* pre-decrement */
2631 TC_ASI_PREDEC,
2632
2633 /* post-increment */
2634 TC_ASI_POSTINC,
2635
2636 /* post-decrement */
2637 TC_ASI_POSTDEC
2638 };
2639
2640
2641 /* ------------------------------------------------------------------------ */
2642 /*
2643 * Expression Operator Parsers. We construct a tree of these operator
2644 * parsers so that we can express the expression grammar in a relatively
2645 * compact and declarative notation.
2646 */
2647
2648 /*
2649 * basic operator parser
2650 */
2651 class CTcPrsOp
2652 {
2653 public:
2654 /*
2655 * Parse an expression with this operator. Logs an error and
2656 * returns non-zero if the expression is not valid; on success,
2657 * returns zero.
2658 *
2659 * Fills in *val with the constant value, if any, of the expression.
2660 * If the expression does not have a constant value, *val's type
2661 * will be set to TC_CVT_UNKNOWN to indicate this.
2662 *
2663 * Returns a parse node if successful, or null if an error occurs
2664 * and the operator parser is unable to make a guess about what was
2665 * intended.
2666 */
2667 virtual class CTcPrsNode *parse() const = 0;
2668 };
2669
2670 /*
2671 * generic left-associative binary operator
2672 */
2673 class CTcPrsOpBin: public CTcPrsOp
2674 {
2675 public:
CTcPrsOpBin()2676 CTcPrsOpBin()
2677 {
2678 /* no left or right subexpression specified */
2679 left_ = right_ = 0;
2680
2681 /* as-yet unknown operator token */
2682 op_tok_ = TOKT_INVALID;
2683 }
2684
CTcPrsOpBin(tc_toktyp_t typ)2685 CTcPrsOpBin(tc_toktyp_t typ)
2686 {
2687 /* remember my operator token */
2688 op_tok_ = typ;
2689 }
2690
CTcPrsOpBin(const CTcPrsOp * left,const CTcPrsOp * right,tc_toktyp_t typ)2691 CTcPrsOpBin(const CTcPrsOp *left, const CTcPrsOp *right, tc_toktyp_t typ)
2692 {
2693 /* remember my left and right sub-operators */
2694 left_ = left;
2695 right_ = right;
2696
2697 /* remember my operator token */
2698 op_tok_ = typ;
2699 }
2700
2701 /* parse the binary expression */
2702 class CTcPrsNode *parse() const;
2703
2704 /* build a new tree out of our left-hand and right-hand subtrees */
2705 virtual class CTcPrsNode
2706 *build_tree(class CTcPrsNode *left,
2707 class CTcPrsNode *right) const = 0;
2708
2709 /*
2710 * Try evaluating a constant result. If the two values can be
2711 * combined with the operator to yield a constant value result,
2712 * create a new parse node for the constant value (or update one of
2713 * the given subnodes) and return it. If we can't provide a
2714 * constant value, return null.
2715 *
2716 * By default, we'll indicate that the expression does not have a
2717 * valid constant value.
2718 */
2719 virtual class CTcPrsNode
eval_constant(class CTcPrsNode * left,class CTcPrsNode * right)2720 *eval_constant(class CTcPrsNode *left,
2721 class CTcPrsNode *right) const
2722 {
2723 /* indicate that we cannot synthesize a constant value */
2724 return 0;
2725 }
2726
2727 /* get/set my token */
get_op_tok()2728 tc_toktyp_t get_op_tok() const { return op_tok_; }
set_op_tok(tc_toktyp_t tok)2729 void set_op_tok(tc_toktyp_t tok) { op_tok_ = tok; }
2730
2731 protected:
2732 /* operator that can be parsed for my left-hand side */
2733 const CTcPrsOp *left_;
2734
2735 /* operator that can be parsed for my right-hand side */
2736 const CTcPrsOp *right_;
2737
2738 /* my operator token */
2739 tc_toktyp_t op_tok_;
2740 };
2741
2742 /*
2743 * Binary Operator Group. This is a group of operators at a common
2744 * precedence level. The group has an array of binary operators that
2745 * are all at the same level of precedence; we'll evaluate the left
2746 * suboperator, then check the token in the input stream against each of
2747 * our group's operators, applying the one that matches, if one matches.
2748 */
2749 class CTcPrsOpBinGroup: public CTcPrsOp
2750 {
2751 public:
CTcPrsOpBinGroup(const CTcPrsOp * left,const CTcPrsOp * right,const class CTcPrsOpBin * const * ops)2752 CTcPrsOpBinGroup(const CTcPrsOp *left, const CTcPrsOp *right,
2753 const class CTcPrsOpBin *const *ops)
2754 {
2755 /* remember my left and right suboperators */
2756 left_ = left;
2757 right_ = right;
2758
2759 /* remember the operators in my group */
2760 ops_ = ops;
2761 }
2762
2763 class CTcPrsNode *parse() const;
2764
2765 protected:
2766 /* find and apply an operator to the parsed left-hand side */
2767 int find_and_apply_op(CTcPrsNode **lhs) const;
2768
2769 /* my left and right suboperators */
2770 const CTcPrsOp *left_;
2771 const CTcPrsOp *right_;
2772
2773 /* group of binary operators at this precedence level */
2774 const class CTcPrsOpBin *const *ops_;
2775 };
2776
2777 /*
2778 * Binary operator group for comparison operators. This is a similar to
2779 * other binary groups, but also includes the special "is in" and "not
2780 * in" operators.
2781 */
2782 class CTcPrsOpBinGroupCompare: public CTcPrsOpBinGroup
2783 {
2784 public:
CTcPrsOpBinGroupCompare(const class CTcPrsOp * left,const class CTcPrsOp * right,const class CTcPrsOpBin * const * ops)2785 CTcPrsOpBinGroupCompare(const class CTcPrsOp *left,
2786 const class CTcPrsOp *right,
2787 const class CTcPrsOpBin *const *ops)
2788 : CTcPrsOpBinGroup(left, right, ops)
2789 {
2790 }
2791
2792 class CTcPrsNode *parse() const;
2793
2794 protected:
2795 /* parse the 'in' list portion of the expression */
2796 class CTPNArglist *parse_inlist() const;
2797 };
2798
2799 /* comma operator */
2800 class CTcPrsOpComma: public CTcPrsOpBin
2801 {
2802 public:
CTcPrsOpComma(const CTcPrsOp * left,const CTcPrsOp * right)2803 CTcPrsOpComma(const CTcPrsOp *left, const CTcPrsOp *right)
2804 : CTcPrsOpBin(left, right, TOKT_COMMA) { }
2805
2806 /* evaluate constant result */
2807 class CTcPrsNode
2808 *eval_constant(class CTcPrsNode *left,
2809 class CTcPrsNode *right) const;
2810
2811 /* build a new tree out of our left-hand and right-hand subtrees */
2812 class CTcPrsNode
2813 *build_tree(class CTcPrsNode *left,
2814 class CTcPrsNode *right) const;
2815 };
2816
2817 /* logical OR */
2818 class CTcPrsOpOr: public CTcPrsOpBin
2819 {
2820 public:
CTcPrsOpOr(const CTcPrsOp * left,const CTcPrsOp * right)2821 CTcPrsOpOr(const CTcPrsOp *left, const CTcPrsOp *right)
2822 : CTcPrsOpBin(left, right, TOKT_OROR) { }
2823
2824 /* evaluate constant result */
2825 class CTcPrsNode
2826 *eval_constant(class CTcPrsNode *left,
2827 class CTcPrsNode *right) const;
2828
2829 /* build a new tree out of our left-hand and right-hand subtrees */
2830 class CTcPrsNode
2831 *build_tree(class CTcPrsNode *left,
2832 class CTcPrsNode *right) const;
2833 };
2834
2835 /* logical AND */
2836 class CTcPrsOpAnd: public CTcPrsOpBin
2837 {
2838 public:
CTcPrsOpAnd(const CTcPrsOp * left,const CTcPrsOp * right)2839 CTcPrsOpAnd(const CTcPrsOp *left, const CTcPrsOp *right)
2840 : CTcPrsOpBin(left, right, TOKT_ANDAND) { }
2841
2842 /* evaluate constant result */
2843 class CTcPrsNode
2844 *eval_constant(class CTcPrsNode *left,
2845 class CTcPrsNode *right) const;
2846
2847 /* build a new tree out of our left-hand and right-hand subtrees */
2848 class CTcPrsNode
2849 *build_tree(class CTcPrsNode *left,
2850 class CTcPrsNode *right) const;
2851 };
2852
2853 /* general magnitude comparison operators */
2854 class CTcPrsOpRel: public CTcPrsOpBin
2855 {
2856 public:
CTcPrsOpRel(tc_toktyp_t typ)2857 CTcPrsOpRel(tc_toktyp_t typ) : CTcPrsOpBin(typ) { }
2858
2859 /* evaluate constant result */
2860 class CTcPrsNode
2861 *eval_constant(class CTcPrsNode *left,
2862 class CTcPrsNode *right) const;
2863
2864 protected:
2865 /*
2866 * Get the result true/false value, given the result of the
2867 * comparison. For example, if this is a greater-than operator,
2868 * this should return TRUE if comp > 0, FALSE otherwise.
2869 */
2870 virtual int get_bool_val(int comparison_value) const = 0;
2871 };
2872
2873 /* comparison - greater than */
2874 class CTcPrsOpGt: public CTcPrsOpRel
2875 {
2876 public:
CTcPrsOpGt()2877 CTcPrsOpGt() : CTcPrsOpRel(TOKT_GT) { }
2878
2879 /* get the boolean value for a comparison sense */
get_bool_val(int comp)2880 int get_bool_val(int comp) const { return comp > 0; }
2881
2882 /* build a new tree out of our left-hand and right-hand subtrees */
2883 class CTcPrsNode
2884 *build_tree(class CTcPrsNode *left,
2885 class CTcPrsNode *right) const;
2886 };
2887
2888 /* comparison - greater than or equal to */
2889 class CTcPrsOpGe: public CTcPrsOpRel
2890 {
2891 public:
CTcPrsOpGe()2892 CTcPrsOpGe() : CTcPrsOpRel(TOKT_GE) { }
2893
2894 /* get the boolean value for a comparison sense */
get_bool_val(int comp)2895 int get_bool_val(int comp) const { return comp >= 0; }
2896
2897 /* build a new tree out of our left-hand and right-hand subtrees */
2898 class CTcPrsNode
2899 *build_tree(class CTcPrsNode *left,
2900 class CTcPrsNode *right) const;
2901 };
2902
2903 /* comparison - less than */
2904 class CTcPrsOpLt: public CTcPrsOpRel
2905 {
2906 public:
CTcPrsOpLt()2907 CTcPrsOpLt() : CTcPrsOpRel(TOKT_LT) { }
2908
2909 /* get the boolean value for a comparison sense */
get_bool_val(int comp)2910 int get_bool_val(int comp) const { return comp < 0; }
2911
2912 /* build a new tree out of our left-hand and right-hand subtrees */
2913 class CTcPrsNode
2914 *build_tree(class CTcPrsNode *left,
2915 class CTcPrsNode *right) const;
2916 };
2917
2918 /* comparison - less than or equal to */
2919 class CTcPrsOpLe: public CTcPrsOpRel
2920 {
2921 public:
CTcPrsOpLe()2922 CTcPrsOpLe() : CTcPrsOpRel(TOKT_LE) { }
2923
2924 /* get the boolean value for a comparison sense */
get_bool_val(int comp)2925 int get_bool_val(int comp) const { return comp <= 0; }
2926
2927 /* build a new tree out of our left-hand and right-hand subtrees */
2928 class CTcPrsNode
2929 *build_tree(class CTcPrsNode *left,
2930 class CTcPrsNode *right) const;
2931 };
2932
2933 /*
2934 * Equality/inequality comparison
2935 */
2936 class CTcPrsOpEqComp: public CTcPrsOpBin
2937 {
2938 public:
CTcPrsOpEqComp(tc_toktyp_t typ)2939 CTcPrsOpEqComp(tc_toktyp_t typ) : CTcPrsOpBin(typ) { }
2940
2941 /* evaluate constant result */
2942 class CTcPrsNode
2943 *eval_constant(class CTcPrsNode *left,
2944 class CTcPrsNode *right) const;
2945
2946 protected:
2947 /* get the boolean value to use if the operands are equal */
2948 virtual int get_bool_val(int ops_equal) const = 0;
2949 };
2950
2951
2952 /*
2953 * Equality comparison
2954 */
2955 class CTcPrsOpEq: public CTcPrsOpEqComp
2956 {
2957 public:
2958 /* start out in C mode - use '==' operator by default */
CTcPrsOpEq()2959 CTcPrsOpEq()
2960 : CTcPrsOpEqComp(TOKT_EQEQ) { }
2961
2962 /* set the current equality operator */
set_eq_op(tc_toktyp_t op)2963 void set_eq_op(tc_toktyp_t op) { op_tok_ = op; }
2964
2965 /* build a new tree out of our left-hand and right-hand subtrees */
2966 class CTcPrsNode
2967 *build_tree(class CTcPrsNode *left,
2968 class CTcPrsNode *right) const;
2969
2970 /* get the boolean value to use if the operands are equal */
get_bool_val(int ops_equal)2971 virtual int get_bool_val(int ops_equal) const { return ops_equal; }
2972 };
2973
2974 /*
2975 * Inequality comparison
2976 */
2977 class CTcPrsOpNe: public CTcPrsOpEqComp
2978 {
2979 public:
CTcPrsOpNe()2980 CTcPrsOpNe() : CTcPrsOpEqComp(TOKT_NE) { }
2981
2982 /* build a new tree out of our left-hand and right-hand subtrees */
2983 class CTcPrsNode
2984 *build_tree(class CTcPrsNode *left,
2985 class CTcPrsNode *right) const;
2986
2987 /* get the boolean value to use if the operands are equal */
get_bool_val(int ops_equal)2988 virtual int get_bool_val(int ops_equal) const { return !ops_equal; }
2989 };
2990
2991 /*
2992 * binary arithmetic operators
2993 */
2994 class CTcPrsOpArith: public CTcPrsOpBin
2995 {
2996 public:
CTcPrsOpArith(tc_toktyp_t typ)2997 CTcPrsOpArith(tc_toktyp_t typ)
2998 : CTcPrsOpBin(typ) { }
2999
CTcPrsOpArith(const CTcPrsOp * left,const CTcPrsOp * right,tc_toktyp_t typ)3000 CTcPrsOpArith(const CTcPrsOp *left, const CTcPrsOp *right,
3001 tc_toktyp_t typ)
3002 : CTcPrsOpBin(left, right, typ) { }
3003
3004 /* evaluate constant result */
3005 class CTcPrsNode
3006 *eval_constant(class CTcPrsNode *left,
3007 class CTcPrsNode *right) const;
3008
3009 protected:
3010 /* calculate the result */
3011 virtual long calc_result(long val1, long val2) const = 0;
3012 };
3013
3014 /* bitwise OR */
3015 class CTcPrsOpBOr: public CTcPrsOpArith
3016 {
3017 public:
CTcPrsOpBOr(const CTcPrsOp * left,const CTcPrsOp * right)3018 CTcPrsOpBOr(const CTcPrsOp *left, const CTcPrsOp *right)
3019 : CTcPrsOpArith(left, right, TOKT_OR) { }
3020
3021 /* build a new tree out of our left-hand and right-hand subtrees */
3022 class CTcPrsNode
3023 *build_tree(class CTcPrsNode *left,
3024 class CTcPrsNode *right) const;
3025
3026 protected:
3027 /* calculate the result */
calc_result(long val1,long val2)3028 virtual long calc_result(long val1, long val2) const
3029 { return val1 | val2; }
3030 };
3031
3032 /* bitwise XOR */
3033 class CTcPrsOpBXor: public CTcPrsOpArith
3034 {
3035 public:
CTcPrsOpBXor(const CTcPrsOp * left,const CTcPrsOp * right)3036 CTcPrsOpBXor(const CTcPrsOp *left, const CTcPrsOp *right)
3037 : CTcPrsOpArith(left, right, TOKT_XOR) { }
3038
3039 /* build a new tree out of our left-hand and right-hand subtrees */
3040 class CTcPrsNode
3041 *build_tree(class CTcPrsNode *left,
3042 class CTcPrsNode *right) const;
3043
3044 protected:
3045 /* calculate the result */
calc_result(long val1,long val2)3046 virtual long calc_result(long val1, long val2) const
3047 { return val1 ^ val2; }
3048 };
3049
3050 /* bitwise AND */
3051 class CTcPrsOpBAnd: public CTcPrsOpArith
3052 {
3053 public:
CTcPrsOpBAnd(const CTcPrsOp * left,const CTcPrsOp * right)3054 CTcPrsOpBAnd(const CTcPrsOp *left, const CTcPrsOp *right)
3055 : CTcPrsOpArith(left, right, TOKT_AND) { }
3056
3057 /* build a new tree out of our left-hand and right-hand subtrees */
3058 class CTcPrsNode
3059 *build_tree(class CTcPrsNode *left,
3060 class CTcPrsNode *right) const;
3061
3062 protected:
3063 /* calculate the result */
calc_result(long val1,long val2)3064 virtual long calc_result(long val1, long val2) const
3065 { return val1 & val2; }
3066 };
3067
3068 /*
3069 * shift left
3070 */
3071 class CTcPrsOpShl: public CTcPrsOpArith
3072 {
3073 public:
CTcPrsOpShl()3074 CTcPrsOpShl() : CTcPrsOpArith(TOKT_SHL) { }
3075
3076 /* build a new tree out of our left-hand and right-hand subtrees */
3077 class CTcPrsNode
3078 *build_tree(class CTcPrsNode *left,
3079 class CTcPrsNode *right) const;
3080
3081 protected:
calc_result(long a,long b)3082 long calc_result(long a, long b) const { return a << b; }
3083 };
3084
3085 /*
3086 * shift right
3087 */
3088 class CTcPrsOpShr: public CTcPrsOpArith
3089 {
3090 public:
CTcPrsOpShr()3091 CTcPrsOpShr() : CTcPrsOpArith(TOKT_SHR) { }
3092
3093 /* build a new tree out of our left-hand and right-hand subtrees */
3094 class CTcPrsNode
3095 *build_tree(class CTcPrsNode *left,
3096 class CTcPrsNode *right) const;
3097
3098 protected:
calc_result(long a,long b)3099 long calc_result(long a, long b) const { return a >> b; }
3100 };
3101
3102 /*
3103 * multiply
3104 */
3105 class CTcPrsOpMul: public CTcPrsOpArith
3106 {
3107 public:
CTcPrsOpMul()3108 CTcPrsOpMul() : CTcPrsOpArith(TOKT_TIMES) { }
3109
3110 /* build a new tree out of our left-hand and right-hand subtrees */
3111 class CTcPrsNode
3112 *build_tree(class CTcPrsNode *left,
3113 class CTcPrsNode *right) const;
3114
3115 protected:
calc_result(long a,long b)3116 long calc_result(long a, long b) const { return a * b; }
3117 };
3118
3119 /*
3120 * divide
3121 */
3122 class CTcPrsOpDiv: public CTcPrsOpArith
3123 {
3124 public:
CTcPrsOpDiv()3125 CTcPrsOpDiv()
3126 : CTcPrsOpArith(TOKT_DIV) { }
3127
CTcPrsOpDiv(tc_toktyp_t tok)3128 CTcPrsOpDiv(tc_toktyp_t tok)
3129 : CTcPrsOpArith(tok) { }
3130
3131 /* build a new tree out of our left-hand and right-hand subtrees */
3132 class CTcPrsNode
3133 *build_tree(class CTcPrsNode *left,
3134 class CTcPrsNode *right) const;
3135
3136 protected:
3137 long calc_result(long a, long b) const;
3138 };
3139
3140
3141 /*
3142 * mod - inherit from divide operator to pick up divide-by-zero checking
3143 */
3144 class CTcPrsOpMod: public CTcPrsOpDiv
3145 {
3146 public:
CTcPrsOpMod()3147 CTcPrsOpMod() : CTcPrsOpDiv(TOKT_MOD) { }
3148
3149 /* build a new tree out of our left-hand and right-hand subtrees */
3150 class CTcPrsNode
3151 *build_tree(class CTcPrsNode *left,
3152 class CTcPrsNode *right) const;
3153
3154 protected:
3155 long calc_result(long a, long b) const;
3156 };
3157
3158 /*
3159 * add
3160 */
3161 class CTcPrsOpAdd: public CTcPrsOpArith
3162 {
3163 public:
CTcPrsOpAdd()3164 CTcPrsOpAdd() : CTcPrsOpArith(TOKT_PLUS) { }
3165
3166 /* build a new tree out of our left-hand and right-hand subtrees */
3167 class CTcPrsNode
3168 *build_tree(class CTcPrsNode *left,
3169 class CTcPrsNode *right) const;
3170
3171 /* evaluate constant result */
3172 class CTcPrsNode
3173 *eval_constant(class CTcPrsNode *left,
3174 class CTcPrsNode *right) const;
3175
3176 protected:
calc_result(long a,long b)3177 long calc_result(long a, long b) const { return a + b; }
3178 };
3179
3180 /*
3181 * subtract
3182 */
3183 class CTcPrsOpSub: public CTcPrsOpArith
3184 {
3185 public:
CTcPrsOpSub()3186 CTcPrsOpSub() : CTcPrsOpArith(TOKT_MINUS) { }
3187
3188 /* build a new tree out of our left-hand and right-hand subtrees */
3189 class CTcPrsNode
3190 *build_tree(class CTcPrsNode *left,
3191 class CTcPrsNode *right) const;
3192
3193 /* evaluate constant result */
3194 class CTcPrsNode
3195 *eval_constant(class CTcPrsNode *left,
3196 class CTcPrsNode *right) const;
3197
3198 protected:
calc_result(long a,long b)3199 long calc_result(long a, long b) const { return a - b; }
3200 };
3201
3202 /*
3203 * Unary Operators
3204 */
3205 class CTcPrsOpUnary: public CTcPrsOp
3206 {
3207 public:
3208 class CTcPrsNode *parse() const;
3209
3210 /*
3211 * evaluate a constant subscript expression; returns a constant
3212 * parse node expression if the subscript can be evaluated to a
3213 * compile-time constant, or null if not
3214 */
3215 static class CTcPrsNode
3216 *eval_const_subscript(class CTcPrsNode *lhs,
3217 class CTcPrsNode *subscript);
3218
3219 /*
3220 * evaluate a constant NOT expression; returns a constant parse node
3221 * expression if the logical negation can be evaluated to a
3222 * compile-time constant, or null if not
3223 */
3224 static class CTcPrsNode *eval_const_not(class CTcPrsNode *lhs);
3225
3226 /* parse a double-quoted string with embedded expressions */
3227 static class CTcPrsNode *parse_dstr_embed();
3228
3229 /* parse a list */
3230 static class CTcPrsNode *parse_list();
3231
3232 /* parse a primary expression */
3233 static class CTcPrsNode *parse_primary();
3234
3235 protected:
3236 /* parse an anonymous function */
3237 static class CTcPrsNode *parse_anon_func(int short_form);
3238
3239 /* parse a logical NOT operator */
3240 static class CTcPrsNode *parse_not(CTcPrsNode *sub);
3241
3242 /* parse a bitwise NOT operator */
3243 static class CTcPrsNode *parse_bnot(CTcPrsNode *sub);
3244
3245 /* parse an address-of operator */
3246 class CTcPrsNode *parse_addr() const;
3247
3248 /* parse an arithmetic positive operator */
3249 static class CTcPrsNode *parse_pos(CTcPrsNode *sub);
3250
3251 /* parse an arithmetic negative operator */
3252 static class CTcPrsNode *parse_neg(CTcPrsNode *sub);
3253
3254 /* parse a pre- or post-increment operator */
3255 static class CTcPrsNode *parse_inc(int pre, CTcPrsNode *sub);
3256
3257 /* parse a pre- or post-decrement operator */
3258 static class CTcPrsNode *parse_dec(int pre, CTcPrsNode *sub);
3259
3260 /* parse a 'new' operator */
3261 static class CTcPrsNode *parse_new(CTcPrsNode *sub, int is_transient);
3262
3263 /* parse a 'delete' operator */
3264 static class CTcPrsNode *parse_delete(CTcPrsNode *sub);
3265
3266 /* parse a postfix expression */
3267 static class CTcPrsNode *parse_postfix(int allow_member_expr,
3268 int allow_call_expr);
3269
3270 /* parse a function or method call */
3271 static class CTcPrsNode *parse_call(CTcPrsNode *lhs);
3272
3273 /* parse an argument list */
3274 static class CTPNArglist *parse_arg_list();
3275
3276 /* parse a subscript */
3277 static class CTcPrsNode *parse_subscript(CTcPrsNode *lhs);
3278
3279 /* parse a member selection ('.' operator) */
3280 static class CTcPrsNode *parse_member(CTcPrsNode *lhs);
3281
3282 /* parse an "inherited" expression */
3283 static class CTcPrsNode *parse_inherited();
3284
3285 /* parse a "delegated" expression */
3286 static class CTcPrsNode *parse_delegated();
3287
3288 /* local symbol enumeration callback for anonymous function setup */
3289 static void enum_for_anon(void *ctx, class CTcSymbol *sym);
3290
3291 /* local symbol enumeration for anon function - follow-up */
3292 static void enum_for_anon2(void *ctx, class CTcSymbol *sym);
3293 };
3294
3295 /*
3296 * tertiary conditional operator
3297 */
3298 class CTcPrsOpIf: public CTcPrsOp
3299 {
3300 public:
3301 class CTcPrsNode *parse() const;
3302 };
3303
3304 /*
3305 * Assignment operators (including the regular assignment, "="/":=",
3306 * plus all calculate-and-assign operators: "+=", "-=", etc)
3307 */
3308 class CTcPrsOpAsi: public CTcPrsOp
3309 {
3310 public:
CTcPrsOpAsi()3311 CTcPrsOpAsi()
3312 {
3313 /* start out with the C-mode simple assignment operator */
3314 asi_op_ = TOKT_EQ;
3315 }
3316
3317 /* parse an assignment */
3318 class CTcPrsNode *parse() const;
3319
3320 /* set the current simple assignment operator */
set_asi_op(tc_toktyp_t tok)3321 void set_asi_op(tc_toktyp_t tok) { asi_op_ = tok; }
3322
3323 private:
3324 /* current simple assignment operator */
3325 tc_toktyp_t asi_op_;
3326 };
3327
3328 #endif /* TCPRS_H */
3329
3330