1 /* $Header: d:/cvsroot/tads/tads3/tcprs.h,v 1.5 1999/07/11 00:46:58 MJRoberts Exp $ */
2 
3 /*
4  *   Copyright (c) 1999, 2002 Michael J. Roberts.  All Rights Reserved.
5  *
6  *   Please see the accompanying license file, LICENSE.TXT, for information
7  *   on using and copying this software.
8  */
9 /*
10 Name
11   tcprs.h - TADS 3 Compiler - parser
12 Function
13 
14 Notes
15 
16 Modified
17   04/29/99 MJRoberts  - Creation
18 */
19 
20 #ifndef TCPRS_H
21 #define TCPRS_H
22 
23 #include <assert.h>
24 
25 #include "tcglob.h"
26 #include "tctok.h"
27 #include "tctargty.h"
28 #include "tcprstyp.h"
29 
30 
31 /* ------------------------------------------------------------------------ */
32 /*
33  *   Object ID type
34  */
35 typedef ulong tc_obj_id;
36 
37 /*
38  *   Property ID type
39  */
40 typedef uint tc_prop_id;
41 
42 
43 /* ------------------------------------------------------------------------ */
44 /*
45  *   scope data structure
46  */
47 struct tcprs_scope_t
48 {
49     /* local symbol table */
50     class CTcPrsSymtab *local_symtab;
51 
52     /* enclosing scope's local symbol table */
53     class CTcPrsSymtab *enclosing_symtab;
54 
55     /* number of locals allocated in scope */
56     int local_cnt;
57 };
58 
59 /* ------------------------------------------------------------------------ */
60 /*
61  *   Code body parsing types.  Each type of code body is essentially the
62  *   same with minor variations, so we use a common code body parser that
63  *   checks the parsing type to apply the variations.
64  */
65 enum tcprs_codebodytype
66 {
67     /* a standard function or method code body */
68     TCPRS_CB_NORMAL,
69 
70     /* anonymous function */
71     TCPRS_CB_ANON_FN,
72 
73     /* short-form anonymous function */
74     TCPRS_CB_SHORT_ANON_FN
75 };
76 
77 
78 /* ------------------------------------------------------------------------ */
79 /*
80  *   the saved method context is always at index 1 in local variable context
81  *   arrays, when we're using local variable context arrays
82  */
83 #define TCPRS_LOCAL_CTX_METHODCTX  1
84 
85 
86 /* ------------------------------------------------------------------------ */
87 /*
88  *   Parser
89  */
90 class CTcParser
91 {
92 public:
93     CTcParser();
94     ~CTcParser();
95 
96     /* initialize - call this after the code generator is set up */
97     void init();
98 
99     /*
100      *   Write an exported symbol file.  An exported symbol file
101      *   facilitates separate compilation by providing a listing of the
102      *   symbols defined in another module.  If module A depends on the
103      *   symbols from module B, the user can first create an exported
104      *   symbol file for module B, then can compile module A in the
105      *   presence of B's symbol file, without actually loading B, and
106      *   without manually entering a set of external definitions in module
107      *   A's source code.
108      */
109     void write_symbol_file(class CVmFile *fp, class CTcMake *make_obj);
110 
111     /*
112      *   Seek to the start of the build configuration information in a symbol
113      *   file.  The return value is the number of bytes stored in the build
114      *   configuration block; on return, the file object will have its seek
115      *   offset set to the first byte of the build configuration data.
116      *   Returns zero if the symbol file is invalid or does not contain any
117      *   configuration data.
118      */
119     static ulong seek_sym_file_build_config_info(class CVmFile *fp);
120 
121     /*
122      *   Write the global table to an object file.
123      */
124     void write_to_object_file(class CVmFile *fp);
125 
126     /*
127      *   Read an object file and load it into the global symbol table.  We
128      *   will fill in the object and property ID translation tables
129      *   provided with the translated values for the object and property
130      *   symbols that we find in the object file.
131      *
132      *   Returns zero on success; logs error messages and returns non-zero
133      *   on error.  Note that a non-zero value should be returned only
134      *   when the file appears to be corrupted or an I/O error occurs;
135      *   errors involving conflicting symbols, or other problems that do
136      *   not prevent us from continuing to read the file in an orderly
137      *   fashion, should not return failure but should simply log the
138      *   error and continue; this way, we can detect any additional symbol
139      *   conflicts or other errors.  This routine should return failure
140      *   only when it is not possible to continue reading the file.
141      */
142     int load_object_file(class CVmFile *fp,
143                          const textchar_t *fname,
144                          tctarg_obj_id_t *obj_xlat,
145                          tctarg_prop_id_t *prop_xlat,
146                          ulong *enum_xlat);
147 
148     /*
149      *   Apply internal object/property ID fixups.  This traverses the
150      *   symbol table and calls each symbol's apply_internal_fixups()
151      *   method.  This can be called once after loading all object files.
152      */
153     void apply_internal_fixups();
154 
155     /*
156      *   Read an exported symbol file.  Reads the file and loads the
157      *   global symbol table with the symbols in the file, with each
158      *   symbol marked as external.
159      *
160      *   This can be used for separate compilation.  If module A depends
161      *   on symbols in module B, first create a symbol file for module B,
162      *   then module A can be compiled simply be pre-loading B's symbol
163      *   file.  Any symbol files that a module depends upon must be loaded
164      *   before the module is compiled - symbol file loading must precede
165      *   parsing.
166      *
167      *   If any errors occur, we'll log the errors and return non-zero.
168      *   We'll return zero on success.
169      */
170     int read_symbol_file(class CVmFile *fp);
171 
172     /* get the global symbol table */
get_global_symtab()173     class CTcPrsSymtab *get_global_symtab() const { return global_symtab_; }
174 
175     /* get the current local symbol table */
get_local_symtab()176     class CTcPrsSymtab *get_local_symtab() const { return local_symtab_; }
177 
178     /* get the 'goto' symbol table */
get_goto_symtab()179     class CTcPrsSymtab *get_goto_symtab() const { return goto_symtab_; }
180 
181     /* set the current pragma C mode */
182     void set_pragma_c(int mode);
183 
184     /* turn preprocess expression mode on or off */
set_pp_expr_mode(int f)185     void set_pp_expr_mode(int f) { pp_expr_mode_ = f; }
186 
187     /* get the current preprocess expression mode flag */
get_pp_expr_mode()188     int get_pp_expr_mode() const { return pp_expr_mode_; }
189 
190     /* get/set the syntax-only mode flag */
get_syntax_only()191     int get_syntax_only() const { return syntax_only_; }
set_syntax_only(int f)192     void set_syntax_only(int f) { syntax_only_ = f; }
193 
194     /*
195      *   Get the constructor and finalize property ID's - all constructors
196      *   and finalizers have these property ID's respectively
197      */
get_constructor_prop()198     tc_prop_id get_constructor_prop() const { return constructor_prop_; }
get_finalize_prop()199     tc_prop_id get_finalize_prop() const { return finalize_prop_; }
200 
201     /* get the constructor property symbol */
get_constructor_sym()202     class CTcSymProp *get_constructor_sym() const { return constructor_sym_; }
203 
204     /* get the object-call property */
get_objcall_prop()205     tc_prop_id get_objcall_prop() const { return objcall_prop_; }
206 
207     /*
208      *   Check for unresolved external symbols.  Scans the global symbol
209      *   table and logs an error for each unresolved external.  Returns
210      *   true if any unresolved externals exist, false if not.
211      */
212     int check_unresolved_externs();
213 
214     /*
215      *   build the dictionaries - scans the global symbol table, and
216      *   inserts each object symbol's dictionary words into its
217      *   corresponding dictionary
218      */
219     void build_dictionaries();
220 
221     /* build the grammar productions */
222     void build_grammar_productions();
223 
224     /*
225      *   Top-level parser.  Parse functions, objects, and other top-level
226      *   definitions and declarations.
227      */
228     class CTPNStmProg *parse_top();
229 
230     /*
231      *   Parse a required semicolon.  If the semicolon is present, we'll
232      *   simply skip it.  If it's missing, we'll log an error and try to
233      *   resynchronize.  If we find something that looks like it should go
234      *   at the end of an expression, we'll try to skip up to the next
235      *   semicolon; otherwise, we'll simply stay put.
236      *
237      *   Returns zero if the caller should proceed, non-zero if we're at
238      *   end of file, in which case there's nothing more for the caller to
239      *   parse.
240      */
241     static int parse_req_sem();
242 
243     /*
244      *   Skip to the next semicolon, ignoring any tokens up to that point.
245      *   This can be used when the caller encounters an error that makes
246      *   it impossible to process the current statement further, and wants
247      *   to find the next semicolon in the hope that it will be a good
248      *   place to start again with the next statement.
249      *
250      *   Returns zero if the caller should proceed, non-zero if we reach
251      *   the end of the file.
252      */
253     static int skip_to_sem();
254 
255     /*
256      *   Parse an expression.  This parses a top-level "comma" expression.
257      */
258     class CTcPrsNode *parse_expr();
259 
260     /*
261      *   Parse a condition expression.  This parses a top-level "comma"
262      *   expression, but displays a warning if the outermost operator in
263      *   the expression is an assignment, because such expressions are
264      *   very frequently meant as comparisons, but the '=' operator was
265      *   inadvertantly used instead of '=='.
266      */
267     class CTcPrsNode *parse_cond_expr();
268 
269     /*
270      *   Parse a value expression or a double-quoted string expression
271      *   (including a double-quoted string with embedded expressions).  If
272      *   allow_comma_expr is true, we'll parse a comma expression;
273      *   otherwise, we'll parse an assignment expression.  (A comma
274      *   expression is broader than an assignment expression, since the
275      *   comma separates assignment expressions.)
276      */
277     class CTcPrsNode *parse_expr_or_dstr(int allow_comma_expr);
278 
279     /*
280      *   Parse an assignment expression - this is the next precedence
281      *   level down from comma expressions.  In certain contexts, a
282      *   top-level comma expression is not allowed because a comma has a
283      *   separate meaning (in the initializer clause of a 'for' statement,
284      *   for example, or in a list element).
285      */
286     class CTcPrsNode *parse_asi_expr();
287 
288     /* parse an 'enum' top-level statement */
289     void parse_enum(int *err);
290 
291     /* parse a 'dictionary' top-level statement */
292     class CTPNStmTop *parse_dict(int *err);
293 
294     /* parse a 'grammar' top-level statement */
295     class CTPNStmTop *parse_grammar(int *err, int replace, int modify);
296 
297     /* parse and flatten a set of grammar rules */
298     class CTcPrsGramNode *flatten_gram_rule(int *err);
299 
300     /* parse a 'grammar' OR node */
301     class CTcPrsGramNode *parse_gram_or(int *err, int level);
302 
303     /* parse a 'grammar' CAT node */
304     class CTcPrsGramNode *parse_gram_cat(int *err, int level);
305 
306     /* parse a 'grammar' qualifier int value */
307     int parse_gram_qual_int(int *err, const char *qual_name, int *stm_end);
308 
309     /* skip to the end of a mal-formed grammar qualifier */
310     void parse_gram_qual_skip(int *err, int *stm_end);
311 
312     /*
313      *   Parse a 'function' top-level statement.  If 'is_extern' is true,
314      *   the function is being defined externally, so it should have no
315      *   code body defined here (just the prototype).  If 'replace' is
316      *   true, we're replacing an existing function.
317      *
318      *   If 'func_kw_present' is true, the 'function' keyword is present
319      *   and must be skipped; otherwise, the function definition elides
320      *   the 'function' keyword and starts directly with the function name
321      *   symbol.
322      */
323     class CTPNStmTop *parse_function(int *err, int is_extern,
324                                      int replace, int modify,
325                                      int func_kw_present);
326 
327     /* parse an 'intrinsic' top-level statement */
328     class CTPNStmTop *parse_intrinsic(int *err);
329 
330     /* parse an 'intrinsic class' top-level statement */
331     class CTPNStmTop *parse_intrinsic_class(int *err);
332 
333     /* parse an 'extern' top-level statement */
334     void parse_extern(int *err);
335 
336     /*
337      *   parse an object or function defintion (this is called when the
338      *   first thing in a statement is a symbol; we must check what
339      *   follows to determine what type of definition it is)
340      */
341     class CTPNStmTop *parse_object_or_func(int *err, int replace,
342                                            int suppress_error,
343                                            int *suppress_next_error);
344 
345     /* parse a template definition statement */
346     class CTPNStmTop *parse_template_def(int *err,
347                                          const class CTcToken *class_tok);
348 
349     /* add a template definition */
350     void add_template_def(class CTcSymObj *class_sym,
351                           class CTcObjTemplateItem *item_head,
352                           size_t item_cnt);
353 
354     /* add inherited template definitions */
355     void add_inherited_templates(class CTcSymObj *sc_sym,
356                                  class CTcObjTemplateItem *item_head,
357                                  size_t item_cnt);
358 
359     /*
360      *   expand the 'inherited' keyword in a template for the given
361      *   superclass template and add the result to the template list for the
362      *   class
363      */
364     void expand_and_add_inherited_template(class CTcSymObj *sc_sym,
365                                            class CTcObjTemplateItem *items,
366                                            class CTcObjTemplate *sc_tpl);
367 
368     /*
369      *   build a list of superclass templates, for expanding an 'inherited'
370      *   token in a template definition
371      */
372     void build_super_template_list(struct inh_tpl_entry **list_head,
373                                    struct inh_tpl_entry **list_tail,
374                                    class CTcSymObj *sc_sym);
375 
376     /* parse an 'object' statement */
377     class CTPNStmTop *parse_object_stm(int *err, int is_transient);
378 
379     /*
380      *   parse an object definition that starts with a '+' string; this
381      *   also parses '+ property' statements
382      */
383     class CTPNStmTop *parse_plus_object(int *err);
384 
385     /*
386      *   Parse an object definition.  If 'replace' is true, this
387      *   definition is to replace a previous definition of the same
388      *   object; if 'modify' is true, this definition is to modify a
389      *   previous definition.  If 'is_class' is true, the definition is
390      *   for a class, otherwise it's for a static instance.
391      *
392      *   If the definition uses the '+' notation to set the location,
393      *   plus_cnt gives the number of '+' signs preceding the object
394      *   definition.
395      */
396     class CTPNStmTop *parse_object(int *err, int replace, int modify,
397                                    int is_class, int plus_cnt,
398                                    int is_transient);
399 
400     /* find or define an object symbol */
401     CTcSymObj *find_or_def_obj(const char *tok_txt, size_t tok_len,
402                                int replace, int modify, int *is_class,
403                                class CTcSymObj **mod_orig_sym,
404                                class CTcSymMetaclass **meta_sym,
405                                int *is_transient);
406 
407     /* parse an anonymous object */
408     class CTPNStmObject *parse_anon_object(int *err, int plus_cnt,
409                                            int is_nested,
410                                            struct tcprs_term_info *term_info,
411                                            int is_transient);
412 
413     /*
414      *   Parse an object body.  We start parsing from the colon that
415      *   introduces the class list, and parse the class list and the
416      *   property list for the object.
417      *
418      *   If 'is_anon' is true, this is an anonymous object.  'obj_sym'
419      *   should be null in this case.
420      *
421      *   If 'is_nested' is true, this is a nested object defined in-line in
422      *   an object's property list.  Note that is_nested implies is_anon,
423      *   since nested objects are always anonymous.
424      *
425      *   If this is a 'modify' definition, 'mod_orig_tok' should be set up
426      *   with the synthesized symbol for the modified base object;
427      *   otherwise, 'mod_orig_tok' should be null.
428      *
429      *   If 'meta_sym' is non-null, we're modifying an intrinsic class.
430      *   This imposes certain restrictions; in particular, we cannot modify
431      *   a method defined in the native interface to the class.
432      */
433     class CTPNStmObject *parse_object_body(int *err, class CTcSymObj *obj_sym,
434                                            int is_class, int is_anon,
435                                            int is_grammar,
436                                            int is_nested, int modify,
437                                            class CTcSymObj *mod_orig_sym,
438                                            int plus_cnt,
439                                            class CTcSymMetaclass *meta_sym,
440                                            struct tcprs_term_info *term_info,
441                                            int is_transient);
442 
443     /* parse an object template instance in an object body */
444     void parse_obj_template(int *err, class CTPNStmObject *obj_stm);
445 
446     /* search a superclass list for a template match */
447     const class CTcObjTemplate
448         *find_class_template(const class CTPNSuperclass *first_sc,
449                              class CTcObjTemplateInst *src,
450                              size_t src_cnt, const CTPNSuperclass **def_sc,
451                              int *undescribed_class);
452 
453     /* find a match for a given template in the given list */
454     const class CTcObjTemplate
455         *find_template_match(const class CTcObjTemplate *first_tpl,
456                              class CTcObjTemplateInst *src,
457                              size_t src_cnt);
458 
459     /*
460      *   Match a template to a given actual template parameter list.  Returns
461      *   true if we match, false if not.  We'll fill in the actual list with
462      *   the property symbols that we matched; these values are only
463      *   meaningful if we return true to indicate a match.
464      */
465     int match_template(const class CTcObjTemplateItem *tpl_head,
466                        class CTcObjTemplateInst *src, size_t src_cnt);
467 
468     /* parse property definition within an object */
469     void parse_obj_prop(int *err, class CTPNStmObject *obj_stm, int replace,
470                         class CTcSymMetaclass *meta_sym,
471                         struct tcprs_term_info *term_info,
472                         struct propset_def *propset_stack, int propset_depth,
473                         int enclosing_obj_is_nested);
474 
475     /* parse a class definition */
476     class CTPNStmTop *parse_class(int *err);
477 
478     /* parse a 'modify' definition */
479     class CTPNStmTop *parse_modify(int *err);
480 
481     /* parse a 'replace' definition */
482     class CTPNStmTop *parse_replace(int *err);
483 
484     /* parse a 'property' statement */
485     void parse_property(int *err);
486 
487     /* parse an 'export' statement */
488     void parse_export(int *err);
489 
490     /* add an export for the given symbol; returns the new export record */
491     class CTcPrsExport *add_export(const char *sym, size_t sym_len);
492 
493     /* add an export record to our list */
494     void add_export_to_list(class CTcPrsExport *exp);
495 
496     /* get the head of the export list */
get_exp_head()497     class CTcPrsExport *get_exp_head() const { return exp_head_; }
498 
499     /*
500      *   Parse a function or method body, starting with the formal parameter
501      *   list.  If 'eq_before_brace' is set, we expect an '=' before the
502      *   opening brace of the code body, and we allow the expression syntax,
503      *   where an expression enclosed in parentheses can be used.
504      *   'self_valid' indicates whether or not 'self' is valid in the context
505      *   of the code being compiled; for an object method, 'self' is usually
506      *   valid, while for a stand-alone function it isn't.
507      */
508     class CTPNCodeBody *parse_code_body(int eq_before_brace, int is_obj_prop,
509                                         int self_valid,
510                                         int *p_argc, int *p_varargs,
511                                         int *p_varargs_list,
512                                         class CTcSymLocal **
513                                             p_varargs_list_local,
514                                         int *has_retval, int *err,
515                                         class CTcPrsSymtab *local_symtab,
516                                         tcprs_codebodytype cb_type,
517                                         struct propset_def *propset_stack,
518                                         int propset_depth,
519                                         struct CTcCodeBodyRef *enclosing);
520 
521     /* parse a nested code body (such as an anonymous function) */
522     class CTPNCodeBody *parse_nested_code_body(
523         int eq_before_brace,
524         int self_valid,
525         int *p_argc, int *p_varargs,
526         int *p_varargs_list,
527         class CTcSymLocal **p_varargs_list_local,
528         int *has_retval, int *err,
529         class CTcPrsSymtab *local_symtab,
530         tcprs_codebodytype cb_type);
531 
532     /* parse a formal parameter list */
533     void parse_formal_list(int count_only, int opt_allowed,
534                            int *argc, int *opt_argc, int *varargs,
535                            int *varargs_list,
536                            class CTcSymLocal **varargs_list_local,
537                            int *err, int base_formal_num,
538                            int for_short_anon_func);
539 
540     /*
541      *   Parse a compound statement.  The caller must skip the opening
542      *   '{'; on return, we'll have skipped the closing '}'.
543      *   enclosing_symtab is the enclosing scope's symbol table, and
544      *   local_symtab is the symbol table for the new scope within the
545      *   compound statement; if the caller has not already allocated a new
546      *   symbol table for the inner scope, it should simply pass the same
547      *   value for both symbol tables.
548      *
549      *   'enclosing_switch' is the immediately enclosing switch statement,
550      *   if any.  This is only set when we're parsing the immediate body
551      *   of a switch statement.
552      */
553     class CTPNStmComp *parse_compound(int *err, int skip_lbrace,
554                                       class CTPNStmSwitch *enclosing_switch,
555                                       int use_enclosing_scope);
556 
557     /* parse a local variable definition */
558     class CTPNStm *parse_local(int *err);
559 
560     /* parse a local initializer */
561     class CTcPrsNode *parse_local_initializer(class CTcSymLocal *lcl,
562                                               int *err);
563 
564     /*
565      *   Parse an individual statement.
566      *
567      *   If 'compound_use_enclosing_scope' is true, then if the statement
568      *   is a compound statement (i.e., the current token is a left
569      *   brace), the compound statement will use the current scope rather
570      *   than creating its own scope.  Normally, a compound statement
571      *   establishes its own scope, so that local variables can hide
572      *   locals and parameters defined outside the braces.  In certain
573      *   cases, however, locals defined within the braces should share the
574      *   enclosing scope: at the top level of a function or method, for
575      *   example, the formal parameters and the locals within the function
576      *   body go in the same scope, so the function body's compound
577      *   statement doesn't create its own scope.
578      */
579     class CTPNStm *parse_stm(int *err, class CTPNStmSwitch *enclosing_switch,
580                              int compound_use_enclosing_scope);
581 
582     /* parse a 'case' label */
583     class CTPNStm *parse_case(int *err,
584                               class CTPNStmSwitch *enclosing_switch);
585 
586     /* parse a 'default' label */
587     class CTPNStm *parse_default(int *err,
588                                  class CTPNStmSwitch *enclosing_switch);
589 
590     /* parse an 'if' statement */
591     class CTPNStm *parse_if(int *err);
592 
593     /* parse a 'return' statement */
594     class CTPNStm *parse_return(int *err);
595 
596     /* parse a 'for' statement */
597     class CTPNStm *parse_for(int *err);
598 
599     /* parse a 'foreach' statement */
600     class CTPNStm *parse_foreach(int *err);
601 
602     /* parse a 'break' statement */
603     class CTPNStm *parse_break(int *err);
604 
605     /* parse a 'continue' statement */
606     class CTPNStm *parse_continue(int *err);
607 
608     /* parse a 'while' */
609     class CTPNStm *parse_while(int *err);
610 
611     /* parse a 'do-while' */
612     class CTPNStm *parse_do_while(int *err);
613 
614     /* parse a 'switch' */
615     class CTPNStm *parse_switch(int *err);
616 
617     /* parse a 'goto' */
618     class CTPNStm *parse_goto(int *err);
619 
620     /* parse a 'try' */
621     class CTPNStm *parse_try(int *err);
622 
623     /* parse a 'throw' */
624     class CTPNStm *parse_throw(int *err);
625 
626     /*
627      *   Create a symbol node.  We'll look up the symbol in local scope.
628      *   If we find the symbol in local scope, we'll return a resolved
629      *   symbol node for the local scope item.  If the symbol isn't
630      *   defined in local scope, we'll return an unresolved symbol node,
631      *   so that the symbol's resolution can be deferred until code
632      *   generation.
633      */
634     class CTcPrsNode *create_sym_node(const textchar_t *sym, size_t sym_len);
635 
636     /*
637      *   Get the source file descriptor and line number for the current
638      *   source line.  We note this at the start of each statement, so
639      *   that a statement node constructed when we finish parsing the
640      *   statement can record the location of the start of the statement.
641      */
get_cur_desc()642     class CTcTokFileDesc *get_cur_desc() const { return cur_desc_; }
get_cur_linenum()643     long get_cur_linenum() const { return cur_linenum_; }
644 
645     /*
646      *   Get/set the current enclosing statement.  An enclosing statement
647      *   is a 'try' or 'label:' container.  At certain times, we need to
648      *   know the current enclosing statement, or one of its enclosing
649      *   statements; for example, a 'break' with a label must find the
650      *   label in the enclosing statement list to know where to jump to
651      *   after the 'break', and must also know about all of the enclosing
652      *   'try' blocks our to that point so that it can invoke their
653      *   'finally' blocks.
654      */
get_enclosing_stm()655     class CTPNStmEnclosing *get_enclosing_stm() const
656         { return enclosing_stm_; }
set_enclosing_stm(class CTPNStmEnclosing * stm)657     class CTPNStmEnclosing *set_enclosing_stm(class CTPNStmEnclosing *stm)
658     {
659         class CTPNStmEnclosing *old_enclosing;
660 
661         /* remember the current enclosing statement for a moment */
662         old_enclosing = enclosing_stm_;
663 
664         /* set the new enclosing statement */
665         enclosing_stm_ = stm;
666 
667         /*
668          *   return the previous enclosing statement - this allows the
669          *   caller to restore the previous enclosing statement upon
670          *   leaving a nested block, if that's why the caller is setting a
671          *   new enclosing statement
672          */
673         return old_enclosing;
674     }
675 
676     /* get the current code body reference object */
get_cur_code_body()677     struct CTcCodeBodyRef *get_cur_code_body() const
678         { return cur_code_body_; }
679 
680     /* determine if 'self' is valid in the current context */
is_self_valid()681     int is_self_valid() const { return self_valid_; }
682 
683     /*
684      *   get/set the 'self' reference status - this indicates whether or not
685      *   'self' has been referenced, explicitly via the 'self'
686      *   pseudo-variable or implicitly (such as via a property reference or
687      *   method call), in the code body currently being parsed
688      */
self_referenced()689     int self_referenced() const { return self_referenced_; }
set_self_referenced(int f)690     void set_self_referenced(int f) { self_referenced_ = f; }
691 
692     /*
693      *   get/set the full method context reference status - this indicates
694      *   whether or not any of the method context variables (self,
695      *   targetprop, targetobj, definingobj) have been referenced, explicitly
696      *   or implicitly, in the code body currently being parsed
697      */
full_method_ctx_referenced()698     int full_method_ctx_referenced() const
699         { return full_method_ctx_referenced_; }
set_full_method_ctx_referenced(int f)700     void set_full_method_ctx_referenced(int f)
701         { full_method_ctx_referenced_ = f; }
702 
703     /*
704      *   Get/set the flag indicating whether or not the local context of the
705      *   outermost code body needs 'self'.  The outer code body needs 'self'
706      *   in the local context if any lexically nested code body requires
707      *   access to 'self'.
708      */
local_ctx_needs_self()709     int local_ctx_needs_self() const { return local_ctx_needs_self_; }
set_local_ctx_needs_self(int f)710     void set_local_ctx_needs_self(int f) { local_ctx_needs_self_ = f; }
711 
712     /*
713      *   Get/set the flag indicating whether or not the local context of the
714      *   outermost code body needs the full method context stored in its
715      *   local context.  The outer code body needs the full context stored if
716      *   any lexically nested code body requires access to any of the method
717      *   context variables besides 'self' (targetprop, targetobj,
718      *   definingobj).
719      */
local_ctx_needs_full_method_ctx()720     int local_ctx_needs_full_method_ctx() const
721         { return local_ctx_needs_full_method_ctx_; }
set_local_ctx_needs_full_method_ctx(int f)722     void set_local_ctx_needs_full_method_ctx(int f)
723         { local_ctx_needs_full_method_ctx_ = f; }
724 
725     /*
726      *   Add a code label.  This creates a 'goto' symbol table for the
727      *   current code body if one doesn't already exist
728      */
729     class CTcSymLabel *add_code_label(const class CTcToken *tok);
730 
731     /*
732      *   Set the debugger local symbol table.  Returns the previous symbol
733      *   table so that it can be restored if desired.
734      */
set_debug_symtab(class CTcPrsDbgSymtab * tab)735     class CTcPrsDbgSymtab *set_debug_symtab(class CTcPrsDbgSymtab *tab)
736     {
737         class CTcPrsDbgSymtab *old_tab;
738 
739         /* remember the original for later use */
740         old_tab = debug_symtab_;
741 
742         /* set the new table */
743         debug_symtab_ = tab;
744 
745         /* return the original */
746         return old_tab;
747     }
748 
749     /*
750      *   given a (1-based) object file symbol index, get the symbol
751      */
get_objfile_sym(uint idx)752     class CTcSymbol *get_objfile_sym(uint idx)
753         { return (idx == 0 ? 0 : obj_sym_list_[idx - 1]); }
754 
755     /*
756      *   given a 1-based object file symbol index, get an object symbol;
757      *   if the symbol does not refer to an object, we'll return null
758      */
759     class CTcSymObj *get_objfile_objsym(uint idx);
760 
761     /*
762      *   given an object file (1-based) object file dictionary index, get
763      *   the dictionary entry
764      */
get_obj_dict(uint idx)765     class CTcDictEntry *get_obj_dict(uint idx)
766         { return (idx == 0 ? 0 : obj_dict_list_[idx - 1]); }
767 
768     /* add a dictionary object loaded from the object file */
769     void add_dict_from_obj_file(class CTcSymObj *sym);
770 
771     /* add a symbol object loaded from the object file */
772     void add_sym_from_obj_file(uint idx, class CTcSymbol *sym);
773 
774     /*
775      *   Get the next object file symbol index.  Object file symbol
776      *   indices are used to relate symbols stored in the object file to
777      *   the corresponding symbol object in memory when the object file is
778      *   reloaded.
779      */
get_next_obj_file_sym_idx()780     uint get_next_obj_file_sym_idx()
781     {
782         /* return the next index, consuming the index value */
783         return obj_file_sym_idx_++;
784     }
785 
786     /*
787      *   Get the next object file dictionary index.
788      */
get_next_obj_file_dict_idx()789     uint get_next_obj_file_dict_idx()
790     {
791         /* return the next index, consuming the index value */
792         return obj_file_dict_idx_++;
793     }
794 
795     /*
796      *   add an anonymous function or other anonymous top-level statement
797      *   to our list of nested top-level statements
798      */
799     void add_nested_stm(class CTPNStmTop *stm);
800 
801     /* add an anonymous object to our list */
802     void add_anon_obj(class CTcSymObj *obj);
803 
804     /* add a non-symbolic object ID */
805     void add_nonsym_obj(tctarg_obj_id_t id);
806 
807     /* determine if the current code body has a local context */
has_local_ctx()808     int has_local_ctx() const { return has_local_ctx_ != 0; }
809 
810     /* get the local context variable number */
get_local_ctx_var()811     int get_local_ctx_var() const { return local_ctx_var_num_; }
812 
813     /* set up a local context */
814     void init_local_ctx();
815 
816     /* allocate a context variable property ID */
817     tctarg_prop_id_t alloc_ctx_var_prop();
818 
819     /*
820      *   allocate a context variable index - this assigns an array index
821      *   for a context variable within the context object that contains
822      *   the shared locals for its scope
823      */
824     int alloc_ctx_arr_idx();
825 
826     /* allocate a local for use as a local context holder */
alloc_ctx_holder_var()827     int alloc_ctx_holder_var() { return alloc_local(); }
828 
829     /* get the maximum number of locals required in the function */
get_max_local_cnt()830     int get_max_local_cnt() const { return max_local_cnt_; }
831 
832     /*
833      *   find a grammar production symbol, adding a new one if needed,
834      *   returning the grammar production list entry for the object
835      */
836     class CTcGramProdEntry *declare_gramprod(const char *sym, size_t len);
837 
838     /* find a grammar production list entry for a given object */
839     class CTcGramProdEntry *get_gramprod_entry(class CTcSymObj *sym);
840 
841     /* find a grammar production symbol, adding a new one if needed */
842     class CTcSymObj *find_or_def_gramprod(const char *txt, size_t len,
843                                           class CTcGramProdEntry **entryp);
844 
845     /* allocate a new enumerator ID */
new_enum_id()846     ulong new_enum_id() { return next_enum_id_++; }
847 
848     /* get the number of enumerator ID's allocated */
get_enum_count()849     ulong get_enum_count() const { return next_enum_id_; }
850 
851     /*
852      *   Look up a property symbol, adding it if not yet defined.  If the
853      *   symbol is defined as another type, we'll show an error if
854      *   show_err is true, and return null.
855      */
856     CTcSymProp *look_up_prop(const class CTcToken *tok, int show_err);
857 
858     /* get the '+' property for tracking the location graph */
get_plus_prop()859     CTcSymProp *get_plus_prop() const { return plus_prop_; }
860 
861     /*
862      *   Read a length-prefixed string from a file.  Copies the string into
863      *   tokenizer space (which is guaranteed valid throughout compilation),
864      *   and returns a pointer to the tokenizer copy.  If ret_len is null,
865      *   we'll return a null-terminated string; otherwise, we'll return a
866      *   non-null-terminated string and set *ret_len to the length of the
867      *   string.
868      *
869      *   The string must fit in the temporary buffer to be read, but the
870      *   permanent tokenizer copy is returned rather than the temp buffer.
871      *   If the string doesn't fit in the temp buffer (with null
872      *   termination, if null termination is requested), we'll log the given
873      *   error.
874      */
875     static const char *read_len_prefix_str
876         (CVmFile *fp, char *tmp_buf, size_t tmp_buf_len, size_t *ret_len,
877          int err_if_too_long);
878 
879     /*
880      *   Read a length-prefixed string into the given buffer, null
881      *   terminating the result.  If the string is too long for the buffer,
882      *   we'll flag the given error code and return non-zero.  If
883      *   successful, we'll return zero.
884      */
885     static int read_len_prefix_str(CVmFile *fp, char *buf, size_t buf_len,
886                                    int err_if_too_long);
887 
888 
889     /* get the miscVocab property symbol */
get_miscvocab_prop()890     tctarg_prop_id_t get_miscvocab_prop() const { return miscvocab_prop_; }
891 
892 private:
893     /* clear the anonymous function local context information */
894     void clear_local_ctx();
895 
896     /*
897      *   begin a property expression, saving parser state for later
898      *   restoration with finish_prop_expr
899      */
900     void begin_prop_expr(class CTcPrsPropExprSave *save_info);
901 
902     /*
903      *   Finish a property expression, wrapping it in a code body if
904      *   necessary to allow for an embedded anonymous function.  Returns
905      *   null if no wrapping is required, in which case the original
906      *   expression should continue to be used, or the non-null code body
907      *   wrapper if needed, in which case the original expression should be
908      *   discarded in favor of the fully wrapped code body.
909      */
910     class CTPNCodeBody *finish_prop_expr(class CTcPrsPropExprSave *save_info,
911                                          class CTcPrsNode *expr,
912                                          int is_static,
913                                          class CTcSymProp *prop_sym);
914 
915     /*
916      *   callback for symbol table enumeration for writing a symbol export
917      *   file
918      */
919     static void write_sym_cb(void *ctx, class CTcSymbol *sym);
920 
921     /* callback for symbol table enumeration for writing an object file */
922     static void write_obj_cb(void *ctx, class CTcSymbol *sym);
923 
924     /* callback for symbol table enumeration for writing cross references */
925     static void write_obj_ref_cb(void *ctx, class CTcSymbol *sym);
926 
927     /* callback for symbol table enumeration for named grammar rules */
928     static void write_obj_gram_cb(void *ctx, class CTcSymbol *sym);
929 
930     /* callback for symbol table enumeration for merging grammar rules */
931     static void build_grammar_cb(void *ctx, class CTcSymbol *sym);
932 
933 
934     /*
935      *   Enter a scope.  Upon entering, we'll remember the current local
936      *   variable data; on leaving, we'll restore the enclosing scope.
937      */
enter_scope(struct tcprs_scope_t * info)938     void enter_scope(struct tcprs_scope_t *info)
939     {
940         /* remember the current scope information */
941         info->local_symtab = local_symtab_;
942         info->enclosing_symtab = enclosing_local_symtab_;
943         info->local_cnt = local_cnt_;
944 
945         /*
946          *   We haven't yet allocated a symbol table local to the new
947          *   scope -- we defer this until we actually need to insert a
948          *   symbol into the new scope.  In order to detect when we need
949          *   to create our own local symbol table, we keep track of the
950          *   enclosing symbol table; when the local table is the same as
951          *   the enclosing table, and we need to insert a symbol, it means
952          *   that we must create a new table for the current scope.
953          */
954         enclosing_local_symtab_ = local_symtab_;
955     }
956 
957     /* leave a scope */
leave_scope(struct tcprs_scope_t * info)958     void leave_scope(struct tcprs_scope_t *info)
959     {
960         /* restore enclosing scope information */
961         local_symtab_ = info->local_symtab;
962         enclosing_local_symtab_ = info->enclosing_symtab;
963 
964         /* return to the local count in the enclosing scope */
965         // $$$ we can't actually do this because variables could
966         //     be allocated after this scope ends, but need lifetimes
967         //     that overlap with the enclosed scope; what we actually
968         //     need to do, if we wanted to optimize things, would be
969         //     to allow this block of variables to be used in *disjoint*
970         //     scopes, but not again in enclosing scopes.  We can easily,
971         //     though suboptimally, handle this by simply not allowing
972         //     the variables in the enclosed scope to be re-used at all
973         //     in the current code block.
974         // local_cnt_ = info->local_cnt;
975     }
976 
977     /*
978      *   Create a local symbol table in the current scope, if necessary.
979      *   If we've already created a local symbol table for the current
980      *   scope, this has no effect.
981      */
982     void create_scope_local_symtab();
983 
984     /* allocate a new local variable ID */
alloc_local()985     int alloc_local()
986     {
987         /*
988          *   if this exceeds the maximum depth in the block so far, note
989          *   the new maximum depth
990          */
991         if (local_cnt_ + 1 > max_local_cnt_)
992             max_local_cnt_ = local_cnt_ + 1;
993 
994         /* return the local number, and increment the counter */
995         return local_cnt_++;
996     }
997 
998     /* find a dictionary symbol, adding a new one if needed */
999     class CTcDictEntry *declare_dict(const char *sym, size_t len);
1000 
1001     /* create a new dictionary list entry */
1002     class CTcDictEntry *create_dict_entry(class CTcSymObj *sym);
1003 
1004     /* find a dictionary list entry for a given object */
1005     class CTcDictEntry *get_dict_entry(class CTcSymObj *sym);
1006 
1007     /* create a new grammar production list entry */
1008     class CTcGramProdEntry *create_gramprod_entry(class CTcSymObj *sym);
1009 
1010     /* symbol enumerator - look for unresolved external references */
1011     static void enum_sym_extref(void *ctx, class CTcSymbol *sym);
1012 
1013     /* symbol enumerator - apply internal fixups */
1014     static void enum_sym_internal_fixup(void *ctx, class CTcSymbol *sym);
1015 
1016     /* symbol enumerator - build dictionary */
1017     static void enum_sym_dict(void *ctx, class CTcSymbol *sym);
1018 
1019     /* enumeration callback - context local conversion */
1020     static void enum_for_ctx_locals(void *ctx, class CTcSymbol *sym);
1021 
1022     /* global symbol table */
1023     class CTcPrsSymtab *global_symtab_;
1024 
1025     /* the constructor property ID and symbol */
1026     tc_prop_id constructor_prop_;
1027     class CTcSymProp *constructor_sym_;
1028 
1029     /* the finalizer property ID */
1030     tc_prop_id finalize_prop_;
1031 
1032     /* object-call property ID */
1033     tc_prop_id objcall_prop_;
1034 
1035     /* grammarInfo property symbol */
1036     class CTcSymProp *graminfo_prop_;
1037 
1038     /* miscVocab property ID */
1039     tc_prop_id miscvocab_prop_;
1040 
1041     /* lexicalParent property symbol */
1042     class CTcSymProp *lexical_parent_sym_;
1043 
1044     /* sourceTextOrder property symbol */
1045     class CTcSymProp *src_order_sym_;
1046 
1047     /*
1048      *   Source text order index.  Each time we encounter an object
1049      *   definition in the source code, we assign the current index value to
1050      *   the object's 'sourceTextOrder' property, then we increment the
1051      *   index.  This provides the game program with information on the order
1052      *   in which static objects appear in the source code, so that the
1053      *   program can sort a collection of objects into their source file
1054      *   order if desired.
1055      */
1056     long src_order_idx_;
1057 
1058     /*
1059      *   flag: in preprocessor constant expression mode; double-quoted
1060      *   strings should be treated the same as single-quoted strings for
1061      *   concatenation and comparisons
1062      */
1063     uint pp_expr_mode_ : 1;
1064 
1065     /*
1066      *   Flag: syntax-only mode.  We use this mode to analyze the syntax
1067      *   of the file without building the image; this is used, for
1068      *   example, to build the exported symbol file for a source file.  In
1069      *   this mode, we'll suppress certain warnings and avoid doing work
1070      *   that's not necessary for syntactic analysis; for example, we
1071      *   won't show "unreachable code" errors.
1072      */
1073     uint syntax_only_ : 1;
1074 
1075     /*
1076      *   Code block parsing state
1077      */
1078 
1079     /*
1080      *   'goto' symbol table for the current code block - there's only one
1081      *   'goto' scope for an entire code block, so this never changes over
1082      *   the course of a code block
1083      */
1084     class CTcPrsSymtab *goto_symtab_;
1085 
1086     /*
1087      *   Current local symbol table.  Each inner scope that defines its
1088      *   own local variables has its own local symbol table, nested within
1089      *   the enclosing scope's.  When leaving an inner scope, this should
1090      *   always be restored to the local symbol table of the enclosing
1091      *   scope.
1092      */
1093     class CTcPrsSymtab *local_symtab_;
1094 
1095     /*
1096      *   Enclosing local symbol table.  If this is the same as
1097      *   local_symtab_, it means that the current scope has not yet
1098      *   created its own local symbol table.  We defer this creation until
1099      *   we find we actually need a local symbol table in a scope, since
1100      *   most scopes don't define any of their own local variables.
1101      */
1102     class CTcPrsSymtab *enclosing_local_symtab_;
1103 
1104     /*
1105      *   Current debugger local symbol table.  When we're compiling a
1106      *   debugger expression, this will provide access to the current
1107      *   local scope in the debug records.
1108      */
1109     class CTcPrsDbgSymtab *debug_symtab_;
1110 
1111     /*
1112      *   Number of local variables allocated so far in current code block
1113      *   -- this reflects nesting to the current innermost scope, because
1114      *   variables in inner scope are allocated in the same stack frame as
1115      *   the enclosing scopes.  When leaving an inner scope, this should
1116      *   be restored
1117      */
1118     int local_cnt_;
1119 
1120     /*
1121      *   maximum local variable depth for the current code block -- this
1122      *   reflects the maximum depth, including all inner scopes so far
1123      */
1124     int max_local_cnt_;
1125 
1126     /*
1127      *   Enclosing statement - this is the innermost 'try' or 'label:'
1128      *   enclosing the current code.
1129      */
1130     class CTPNStmEnclosing *enclosing_stm_;
1131 
1132     /* file descriptor and line number at start of current statement */
1133     class CTcTokFileDesc *cur_desc_;
1134     long cur_linenum_;
1135 
1136     /* currently active dictionary */
1137     class CTcDictEntry *dict_cur_;
1138 
1139     /* head and tail of dictionary list */
1140     class CTcDictEntry *dict_head_;
1141     class CTcDictEntry *dict_tail_;
1142 
1143     /* head and tail of grammar production entry list */
1144     class CTcGramProdEntry *gramprod_head_;
1145     class CTcGramProdEntry *gramprod_tail_;
1146 
1147     /*
1148      *   array of symbols loaded from the object file - these are indexed
1149      *   by the object file symbol index stored in symbol references in
1150      *   the object file, allowing us to fix up references from one symbol
1151      *   to another during loading
1152      */
1153     class CTcSymbol **obj_sym_list_;
1154 
1155     /*
1156      *   array of dictionary objects for the object file being loaded -
1157      *   these are indexed by the dictionary index stored in symbol
1158      *   references in the object file, allowing us to fix up references
1159      *   from an object to its dictionary
1160      */
1161     class CTcDictEntry **obj_dict_list_;
1162 
1163     /* next available object file dictionary index */
1164     uint obj_file_dict_idx_;
1165 
1166     /* next available object file symbol index */
1167     uint obj_file_sym_idx_;
1168 
1169     /* dictionary property list head */
1170     class CTcDictPropEntry *dict_prop_head_;
1171 
1172     /*
1173      *   Head and tail of list of nested top-level statements parsed for the
1174      *   current top-level statement.  This list includes anonymous
1175      *   functions and nested objects, since these statements must
1176      *   ultimately be linked into the top-level statement queue, but can't
1177      *   be linked in while they're being parsed because of their nested
1178      *   location in the recursive descent.  We'll throw each new nested
1179      *   top-level statement into this list as we parse them, then add this
1180      *   list to the top-level statement list when we're done with the
1181      *   entire program.
1182      */
1183     class CTPNStmTop *nested_stm_head_;
1184     class CTPNStmTop *nested_stm_tail_;
1185 
1186     /*
1187      *   Anonymous object list.  This is a list of objects which are
1188      *   defined without symbol names.
1189      */
1190     class CTcSymObj *anon_obj_head_;
1191     class CTcSymObj *anon_obj_tail_;
1192 
1193     /*
1194      *   Non-symbolic object list.  This is a list of objects that are
1195      *   defined without symbols at all.
1196      */
1197     struct tcprs_nonsym_obj *nonsym_obj_head_;
1198     struct tcprs_nonsym_obj *nonsym_obj_tail_;
1199 
1200     /*
1201      *   Object template list - this is the master list of templates for the
1202      *   root object class.
1203      */
1204     class CTcObjTemplate *template_head_;
1205     class CTcObjTemplate *template_tail_;
1206 
1207     /*
1208      *   Object template instance parsing expression array.  Each time we
1209      *   define a new template, we'll make sure this array is long enough
1210      *   for the longest defined template.  We use this list when we're
1211      *   parsing a template instance to keep track of the expressions in
1212      *   the template instance - we can't know until we have the entire
1213      *   list which template we're using, so we must keep track of the
1214      *   entire list until we reach the end of the list.
1215      */
1216     class CTcObjTemplateInst *template_expr_;
1217     size_t template_expr_max_;
1218 
1219     /* head and tail of exported symbol list */
1220     class CTcPrsExport *exp_head_;
1221     class CTcPrsExport *exp_tail_;
1222 
1223     /*
1224      *   Flag: current code body has a local variable context object.  If
1225      *   this is set, we must generate code that sets up the context
1226      *   object on entry to the code body.
1227      */
1228     unsigned int has_local_ctx_ : 1;
1229 
1230     /* local variable number of the code body's local variable context */
1231     int local_ctx_var_num_;
1232 
1233     /* array of context variable property values */
1234     tctarg_prop_id_t *ctx_var_props_;
1235 
1236     /* size of array */
1237     size_t ctx_var_props_size_;
1238 
1239     /* number of context variable property values in the list */
1240     size_t ctx_var_props_cnt_;
1241 
1242     /*
1243      *   number of context variable property values assigned to the
1244      *   current code body
1245      */
1246     size_t ctx_var_props_used_;
1247 
1248     /* next available local variable context index */
1249     int next_ctx_arr_idx_;
1250 
1251     /* reference to the current code body being parsed */
1252     CTcCodeBodyRef *cur_code_body_;
1253 
1254     /* flag: 'self' is valid in current code body */
1255     int self_valid_;
1256 
1257     /*
1258      *   flag: 'self' is used (explicitly or implicitly, such as via a
1259      *   property reference or method call) in the current code body
1260      */
1261     int self_referenced_;
1262 
1263     /*
1264      *   Flag: method context beyond 'self' (targetprop, targetobj,
1265      *   definingobj) is referenced (explicitly or implicitly, such as via
1266      *   'inherited' or 'delegated') in the current code body.
1267      */
1268     int full_method_ctx_referenced_;
1269 
1270     /*
1271      *   Flags: the local context of the outermost code body requires
1272      *   'self'/the full method context to be stored.
1273      */
1274     int local_ctx_needs_self_;
1275     int local_ctx_needs_full_method_ctx_;
1276 
1277     /* next available enumerator ID */
1278     ulong next_enum_id_;
1279 
1280     /*
1281      *   The '+' property - this is the property that defines the
1282      *   containment graph for the purposes of the '+' syntax.
1283      */
1284     class CTcSymProp *plus_prop_;
1285 
1286     /*
1287      *   '+' property location stack.  Each time the program defines an
1288      *   object using the '+' notation to set the location, we'll update our
1289      *   record here of the last object at that depth.  Any time an object
1290      *   is defined at depth N (i.e., using N '+' signs), its location is
1291      *   set to the last object at depth N-1.  An object with no '+' signs
1292      *   is at depth zero.
1293      */
1294     class CTPNStmObject **plus_stack_;
1295     size_t plus_stack_alloc_;
1296 };
1297 
1298 /* ------------------------------------------------------------------------ */
1299 /*
1300  *   Statement termination information.  This is used for certain nested
1301  *   definition parsers, where a lack of termination in the nested
1302  *   definition is to be interpreted as being actually caused by a lack of
1303  *   termination of the enclosing definition.
1304  */
1305 struct tcprs_term_info
1306 {
1307     /* initialize */
inittcprs_term_info1308     void init(class CTcTokFileDesc *desc, long linenum)
1309     {
1310         /* remember the current location */
1311         desc_ = desc;
1312         linenum_ = linenum;
1313 
1314         /* no termination error yet */
1315         unterm_ = FALSE;
1316     }
1317 
1318     /*
1319      *   source location where original terminator might have been - this is
1320      *   where we decided to go into a nested definition, so if it turns out
1321      *   that the definintion shouldn't have been nested after all, there
1322      *   was missing termination here
1323      */
1324     class CTcTokFileDesc *desc_;
1325     long linenum_;
1326 
1327     /*
1328      *   flag: termination was in fact missing in the nested definition; the
1329      *   nested parser sets this to relay the problem to the caller
1330      */
1331     int unterm_;
1332 };
1333 
1334 /* ------------------------------------------------------------------------ */
1335 /*
1336  *   Object template list entry
1337  */
1338 class CTcObjTemplate
1339 {
1340 public:
CTcObjTemplate(class CTcObjTemplateItem * item_head,size_t item_cnt)1341     CTcObjTemplate(class CTcObjTemplateItem *item_head, size_t item_cnt)
1342     {
1343         /* remember my item list */
1344         items_ = item_head;
1345         item_cnt_ = item_cnt;
1346 
1347         /* not in a list yet */
1348         nxt_ = 0;
1349     }
1350 
1351     /* head of list of template items */
1352     class CTcObjTemplateItem *items_;
1353 
1354     /* number of items in the list */
1355     size_t item_cnt_;
1356 
1357     /* next template in master list of templates */
1358     CTcObjTemplate *nxt_;
1359 };
1360 
1361 /*
1362  *   Object template list item
1363  */
1364 class CTcObjTemplateItem
1365 {
1366 public:
CTcObjTemplateItem(class CTcSymProp * prop,tc_toktyp_t tok_type,int is_alt,int is_opt)1367     CTcObjTemplateItem(class CTcSymProp *prop, tc_toktyp_t tok_type,
1368                        int is_alt, int is_opt)
1369     {
1370         /* remember my defining information */
1371         prop_ = prop;
1372         tok_type_ = tok_type;
1373         is_alt_ = is_alt;
1374         is_opt_ = is_opt;
1375 
1376         /* not in a list yet */
1377         nxt_ = 0;
1378     }
1379 
1380     /* property that the item in this position defines */
1381     class CTcSymProp *prop_;
1382 
1383     /* token type of item in this position */
1384     tc_toktyp_t tok_type_;
1385 
1386     /* next item in this template's item list */
1387     CTcObjTemplateItem *nxt_;
1388 
1389     /* flag: this item is an alternative to the previous item */
1390     unsigned int is_alt_ : 1;
1391 
1392     /* flag: this item is optional */
1393     unsigned int is_opt_ : 1;
1394 };
1395 
1396 /*
1397  *   Template item instance - we keep track of the actual parameters to a
1398  *   template with these items.
1399  */
1400 class CTcObjTemplateInst
1401 {
1402 public:
1403     /*
1404      *   expression value for the actual parameter, as either a naked
1405      *   expression (expr_) or as a code body (code_body_) - only one of
1406      *   expr_ or code_body_ will be valid
1407      */
1408     class CTcPrsNode *expr_;
1409     class CTPNCodeBody *code_body_;
1410 
1411     /*
1412      *   the introductory token of the parameter - if the parameter is
1413      *   introduced by an operator token, this will not be part of the
1414      *   expression
1415      */
1416     tc_toktyp_t def_tok_;
1417 
1418     /* the first token of the value */
1419     CTcToken expr_tok_;
1420 
1421     /*
1422      *   The property to which to assign this actual parameter value.  This
1423      *   isn't filled in until we match the full list to an actual template,
1424      *   since we don't know the meanings of the parameters until we match
1425      *   the actuals to an existing template in memory.
1426      */
1427     class CTcSymProp *prop_;
1428 };
1429 
1430 
1431 /* ------------------------------------------------------------------------ */
1432 /*
1433  *   Non-symbolic object list entry
1434  */
1435 struct tcprs_nonsym_obj
1436 {
tcprs_nonsym_objtcprs_nonsym_obj1437     tcprs_nonsym_obj(tctarg_obj_id_t id)
1438     {
1439         /* remember the ID */
1440         id_ = id;
1441 
1442         /* not in a list yet */
1443         nxt_ = 0;
1444     }
1445 
1446     /* ID of this object */
1447     tctarg_obj_id_t id_;
1448 
1449     /* next entry in the list */
1450     tcprs_nonsym_obj *nxt_;
1451 };
1452 
1453 /* ------------------------------------------------------------------------ */
1454 /*
1455  *   Dictionary property list entry.  Each time the source code defines a
1456  *   dictionary property, we'll make an entry in this list.
1457  */
1458 class CTcDictPropEntry
1459 {
1460 public:
CTcDictPropEntry(class CTcSymProp * prop)1461     CTcDictPropEntry(class CTcSymProp *prop)
1462     {
1463         /* remember the property */
1464         prop_ = prop;
1465 
1466         /* not in a list yet */
1467         nxt_ = 0;
1468 
1469         /* not defined for current object yet */
1470         defined_ = FALSE;
1471     }
1472 
1473     /* my property */
1474     class CTcSymProp *prop_;
1475 
1476     /* next entry in list */
1477     CTcDictPropEntry *nxt_;
1478 
1479     /* flag: the current object definition includes this property */
1480     unsigned int defined_ : 1;
1481 };
1482 
1483 /* ------------------------------------------------------------------------ */
1484 /*
1485  *   Dictionary list entry.  Each dictionary object gets an entry in this
1486  *   list.
1487  */
1488 class CTcDictEntry
1489 {
1490 public:
1491     CTcDictEntry(class CTcSymObj *sym);
1492 
1493     /* get/set my object file index */
get_obj_idx()1494     uint get_obj_idx() const { return obj_idx_; }
set_obj_idx(uint idx)1495     void set_obj_idx(uint idx) { obj_idx_ = idx; }
1496 
1497     /* get my object symbol */
get_sym()1498     class CTcSymObj *get_sym() const { return sym_; }
1499 
1500     /* get/set the next item in the list */
get_next()1501     CTcDictEntry *get_next() const { return nxt_; }
set_next(CTcDictEntry * nxt)1502     void set_next(CTcDictEntry *nxt) { nxt_ = nxt; }
1503 
1504     /* add a word to the table */
1505     void add_word(const char *txt, size_t len, int copy,
1506                   tc_obj_id obj, tc_prop_id prop);
1507 
1508     /* write my symbol to the object file if I haven't already done so */
1509     void write_sym_to_obj_file(CVmFile *fp);
1510 
1511     /* get the hash table */
get_hash_table()1512     class CVmHashTable *get_hash_table() const { return hashtab_; }
1513 
1514 protected:
1515     /* enumeration callback - write to object file */
1516     static void enum_cb_writeobj(void *ctx, class CVmHashEntry *entry);
1517 
1518     /* associated object symbol */
1519     class CTcSymObj *sym_;
1520 
1521     /*
1522      *   object file index (we use this to match up the dictionary objects
1523      *   when we re-load the object file)
1524      */
1525     uint obj_idx_;
1526 
1527     /* next item in the dictionary list */
1528     CTcDictEntry *nxt_;
1529 
1530     /* hash table containing the word entries */
1531     class CVmHashTable *hashtab_;
1532 };
1533 
1534 
1535 /*
1536  *   entry in a dictionary list
1537  */
1538 struct CTcPrsDictItem
1539 {
CTcPrsDictItemCTcPrsDictItem1540     CTcPrsDictItem(tc_obj_id obj, tc_prop_id prop)
1541     {
1542         obj_ = obj;
1543         prop_ = prop;
1544         nxt_ = 0;
1545     }
1546 
1547     /* object */
1548     tc_obj_id obj_;
1549 
1550     /* property */
1551     tc_prop_id prop_;
1552 
1553     /* next entry in list */
1554     CTcPrsDictItem *nxt_;
1555 };
1556 
1557 /*
1558  *   Parser dictionary hash table entry
1559  */
1560 class CVmHashEntryPrsDict: public CVmHashEntryCS
1561 {
1562 public:
CVmHashEntryPrsDict(const char * txt,size_t len,int copy)1563     CVmHashEntryPrsDict(const char *txt, size_t len, int copy)
1564         : CVmHashEntryCS(txt, len, copy)
1565     {
1566         /* nothing in my list yet */
1567         list_ = 0;
1568     }
1569 
1570     /* add an item to my list */
1571     void add_item(tc_obj_id obj, tc_prop_id prop);
1572 
1573     /* get the list head */
get_list()1574     struct CTcPrsDictItem *get_list() const { return list_; }
1575 
1576 protected:
1577     /* list of object/property associations with this word */
1578     struct CTcPrsDictItem *list_;
1579 };
1580 
1581 /* ------------------------------------------------------------------------ */
1582 /*
1583  *   State save structure for parsing property expressions
1584  */
1585 class CTcPrsPropExprSave
1586 {
1587 public:
1588     unsigned int has_local_ctx_ : 1;
1589     int local_ctx_var_num_;
1590     size_t ctx_var_props_used_;
1591     int next_ctx_arr_idx_;
1592     int self_referenced_;
1593     int full_method_ctx_referenced_;
1594     int local_ctx_needs_self_;
1595     int local_ctx_needs_full_method_ctx_;
1596     struct CTcCodeBodyRef *cur_code_body_;
1597 };
1598 
1599 /* ------------------------------------------------------------------------ */
1600 /*
1601  *   Grammar production list entry
1602  */
1603 class CTcGramProdEntry
1604 {
1605 public:
1606     CTcGramProdEntry(class CTcSymObj *prod_obj);
1607 
1608     /* get my production object symbol */
get_prod_sym()1609     class CTcSymObj *get_prod_sym() const { return prod_sym_; }
1610 
1611     /* get/set the next item in the list */
get_next()1612     CTcGramProdEntry *get_next() const { return nxt_; }
set_next(CTcGramProdEntry * nxt)1613     void set_next(CTcGramProdEntry *nxt) { nxt_ = nxt; }
1614 
1615     /* add an alternative */
1616     void add_alt(class CTcGramProdAlt *alt);
1617 
1618     /* get the alternative list head */
get_alt_head()1619     class CTcGramProdAlt *get_alt_head() const { return alt_head_; }
1620 
1621     /* write to an object file */
1622     void write_to_obj_file(class CVmFile *fp);
1623 
1624     /* load from an object file */
1625     static void load_from_obj_file(class CVmFile *fp,
1626                                    const tctarg_prop_id_t *prop_xlat,
1627                                    const ulong *enum_xlat,
1628                                    class CTcSymObj *private_owner);
1629 
1630     /* move alternatives from my list to the given target list */
1631     void move_alts_to(CTcGramProdEntry *new_entry);
1632 
1633     /* get/set explicitly-declared flag */
is_declared()1634     int is_declared() const { return is_declared_; }
set_declared(int f)1635     void set_declared(int f) { is_declared_ = f; }
1636 
1637 protected:
1638     /* associated production object symbol */
1639     class CTcSymObj *prod_sym_;
1640 
1641     /* next item in the list */
1642     CTcGramProdEntry *nxt_;
1643 
1644     /* head and tail of alternative list */
1645     class CTcGramProdAlt *alt_head_;
1646     class CTcGramProdAlt *alt_tail_;
1647 
1648     /*
1649      *   flag: this production was explicitly declared (this means that we
1650      *   will consider it valid at link time even if it has no alternatives
1651      *   defined)
1652      */
1653     unsigned int is_declared_ : 1;
1654 };
1655 
1656 /*
1657  *   Grammar production alternative.  Each grammar production has one or
1658  *   more alternatives that, when matched, generate the production.
1659  */
1660 class CTcGramProdAlt
1661 {
1662 public:
1663     CTcGramProdAlt(class CTcSymObj *obj_sym, class CTcDictEntry *dict);
1664 
1665     /* get/set my score */
get_score()1666     int get_score() const { return score_; }
set_score(int score)1667     void set_score(int score) { score_ = score; }
1668 
1669     /* get/set my badness */
get_badness()1670     int get_badness() const { return badness_; }
set_badness(int badness)1671     void set_badness(int badness) { badness_ = badness; }
1672 
1673     /* get my processor object symbol */
get_processor_obj()1674     class CTcSymObj *get_processor_obj() const { return obj_sym_; }
1675 
1676     /* get/set the next list element */
get_next()1677     CTcGramProdAlt *get_next() const { return nxt_; }
set_next(CTcGramProdAlt * nxt)1678     void set_next(CTcGramProdAlt *nxt) { nxt_ = nxt; }
1679 
1680     /* add a token to my list */
1681     void add_tok(class CTcGramProdTok *tok);
1682 
1683     /* get the head of my token list */
get_tok_head()1684     class CTcGramProdTok *get_tok_head() const { return tok_head_; }
1685 
1686     /* write to an object file */
1687     void write_to_obj_file(class CVmFile *fp);
1688 
1689     /* load from an object file */
1690     static CTcGramProdAlt *
1691         load_from_obj_file(class CVmFile *fp,
1692                            const tctarg_prop_id_t *prop_xlat,
1693                            const ulong *enum_xlat);
1694 
1695     /* get the dictionary in effect when the alternative was defined */
get_dict()1696     class CTcDictEntry *get_dict() const { return dict_; }
1697 
1698 protected:
1699     /* head and tail of our token list */
1700     class CTcGramProdTok *tok_head_;
1701     class CTcGramProdTok *tok_tail_;
1702 
1703     /* dictionary in effect when alternative was defined */
1704     class CTcDictEntry *dict_;
1705 
1706     /* the processor object associated with this alternative */
1707     class CTcSymObj *obj_sym_;
1708 
1709     /* next alternative in our production */
1710     CTcGramProdAlt *nxt_;
1711 
1712     /* score */
1713     int score_;
1714 
1715     /* badness */
1716     int badness_;
1717 };
1718 
1719 /* grammar production token types */
1720 enum tcgram_tok_type
1721 {
1722     /* unknown */
1723     TCGRAM_UNKNOWN,
1724 
1725     /* match a production (given by the production object) */
1726     TCGRAM_PROD,
1727 
1728     /* match a part of speech (given by the dictionary property) */
1729     TCGRAM_PART_OF_SPEECH,
1730 
1731     /* match a literal string */
1732     TCGRAM_LITERAL,
1733 
1734     /* token-type match */
1735     TCGRAM_TOKEN_TYPE,
1736 
1737     /* free-floating end-of-string */
1738     TCGRAM_STAR,
1739 
1740     /* match one of several parts of speech */
1741     TCGRAM_PART_OF_SPEECH_LIST
1742 };
1743 
1744 /*
1745  *   Grammar production alternative token
1746  */
1747 class CTcGramProdTok
1748 {
1749 public:
CTcGramProdTok()1750     CTcGramProdTok()
1751     {
1752         /* not in a list yet */
1753         nxt_ = 0;
1754 
1755         /* no type yet */
1756         typ_ = TCGRAM_UNKNOWN;
1757 
1758         /* no property association yte */
1759         prop_assoc_ = TCTARG_INVALID_PROP;
1760     }
1761 
1762     /* get/set my next element */
get_next()1763     CTcGramProdTok *get_next() const { return nxt_; }
set_next(CTcGramProdTok * nxt)1764     void set_next(CTcGramProdTok *nxt) { nxt_ = nxt; }
1765 
1766     /* set me to match a production object */
set_match_prod(class CTcSymObj * obj)1767     void set_match_prod(class CTcSymObj *obj)
1768     {
1769         /* remember the production object */
1770         typ_ = TCGRAM_PROD;
1771         val_.obj_ = obj;
1772     }
1773 
1774     /* set me to match a token type */
set_match_token_type(ulong enum_id)1775     void set_match_token_type(ulong enum_id)
1776     {
1777         /* remember the token enum ID */
1778         typ_ = TCGRAM_TOKEN_TYPE;
1779         val_.enum_id_ = enum_id;
1780     }
1781 
1782     /* set me to match a dictionary property */
set_match_part_of_speech(tctarg_prop_id_t prop)1783     void set_match_part_of_speech(tctarg_prop_id_t prop)
1784     {
1785         /* remember the part of speech */
1786         typ_ = TCGRAM_PART_OF_SPEECH;
1787         val_.prop_ = prop;
1788     }
1789 
1790     /*
1791      *   set me to match a list of parts of speech; each part of speech must
1792      *   be separately added via add_match_part_ele()
1793      */
1794     void set_match_part_list();
1795 
1796     /* add an element to the part-of-speech match list */
1797     void add_match_part_ele(tctarg_prop_id_t prop);
1798 
1799     /* set me to match a literal string */
set_match_literal(const char * txt,size_t len)1800     void set_match_literal(const char *txt, size_t len)
1801     {
1802         /* remember the string */
1803         typ_ = TCGRAM_LITERAL;
1804         val_.str_.txt_ = txt;
1805         val_.str_.len_ = len;
1806     }
1807 
1808     /* set me to match a free-floating end-of-string */
set_match_star()1809     void set_match_star()
1810     {
1811         /* set the type */
1812         typ_ = TCGRAM_STAR;
1813     }
1814 
1815     /* get my type */
get_type()1816     tcgram_tok_type get_type() const { return typ_; }
1817 
1818     /* get my value */
getval_prod()1819     class CTcSymObj *getval_prod() const { return val_.obj_; }
getval_part_of_speech()1820     tctarg_prop_id_t getval_part_of_speech() const { return val_.prop_; }
getval_literal_txt()1821     const char *getval_literal_txt() const { return val_.str_.txt_; }
getval_literal_len()1822     const size_t getval_literal_len() const { return val_.str_.len_; }
getval_token_type()1823     ulong getval_token_type() const { return val_.enum_id_; }
getval_part_list_len()1824     size_t getval_part_list_len() const { return val_.prop_list_.len_; }
getval_part_list_ele(size_t idx)1825     tctarg_prop_id_t getval_part_list_ele(size_t idx) const
1826         { return val_.prop_list_.arr_[idx]; }
1827 
1828     /*
1829      *   get/set my property association - this is the property to which
1830      *   the actual match to the rule is assigned when we match the rule
1831      */
get_prop_assoc()1832     tctarg_prop_id_t get_prop_assoc() const { return prop_assoc_; }
set_prop_assoc(tctarg_prop_id_t prop)1833     void set_prop_assoc(tctarg_prop_id_t prop) { prop_assoc_ = prop; }
1834 
1835     /* write to an object file */
1836     void write_to_obj_file(class CVmFile *fp);
1837 
1838     /* load from an object file */
1839     static CTcGramProdTok *
1840         load_from_obj_file(class CVmFile *fp,
1841                            const tctarg_prop_id_t *prop_xlat,
1842                            const ulong *enum_xlat);
1843 
1844 protected:
1845     /* next token in my list */
1846     CTcGramProdTok *nxt_;
1847 
1848     /* my type - this specifies how this token matches */
1849     tcgram_tok_type typ_;
1850 
1851     /* match specification - varies according to my type */
1852     union
1853     {
1854         /* object - for matching a production */
1855         class CTcSymObj *obj_;
1856 
1857         /* property - for matching a part of speech */
1858         tctarg_prop_id_t prop_;
1859 
1860         /* token enum id - for matching a token type */
1861         ulong enum_id_;
1862 
1863         /* literal string */
1864         struct
1865         {
1866             const char *txt_;
1867             size_t len_;
1868         } str_;
1869 
1870         /* list of vocabulary elements */
1871         struct
1872         {
1873             /* number of array entries allocated */
1874             size_t alo_;
1875 
1876             /* number of array entries actually used */
1877             size_t len_;
1878 
1879             /* array of entries */
1880             tctarg_prop_id_t *arr_;
1881         } prop_list_;
1882     } val_;
1883 
1884     /* property association */
1885     tctarg_prop_id_t prop_assoc_;
1886 };
1887 
1888 /* ------------------------------------------------------------------------ */
1889 /*
1890  *   Exported symbol record
1891  */
1892 class CTcPrsExport
1893 {
1894 public:
1895     /* create with the given compiler symbol */
CTcPrsExport(const char * sym,size_t sym_len)1896     CTcPrsExport(const char *sym, size_t sym_len)
1897     {
1898         /* remember my name */
1899         sym_ = sym;
1900         sym_len_ = sym_len;
1901 
1902         /*
1903          *   we don't yet have an explicit external name, so export using
1904          *   the internal name
1905          */
1906         ext_name_ = sym;
1907         ext_len_ = sym_len;
1908 
1909         /* we're not in a list yet */
1910         nxt_ = 0;
1911     }
1912 
1913     /* set the external name */
set_extern_name(const char * txt,size_t len)1914     void set_extern_name(const char *txt, size_t len)
1915     {
1916         ext_name_ = txt;
1917         ext_len_ = len;
1918     }
1919 
1920     /* get the symbol name and length */
get_sym()1921     const char *get_sym() const { return sym_; }
get_sym_len()1922     size_t get_sym_len() const { return sym_len_; }
1923 
1924     /* get the external name and length */
get_ext_name()1925     const char *get_ext_name() const { return ext_name_; }
get_ext_len()1926     size_t get_ext_len() const { return ext_len_; }
1927 
1928     /* get/set the next entry in the list */
get_next()1929     CTcPrsExport *get_next() const { return nxt_; }
set_next(CTcPrsExport * nxt)1930     void set_next(CTcPrsExport *nxt) { nxt_ = nxt; }
1931 
1932     /* write to an object file */
1933     void write_to_obj_file(class CVmFile *fp);
1934 
1935     /* read from an object file */
1936     static CTcPrsExport *read_from_obj_file(class CVmFile *fp);
1937 
1938     /* determine if my external name matches the given export's */
ext_name_matches(const CTcPrsExport * exp)1939     int ext_name_matches(const CTcPrsExport *exp) const
1940     {
1941         return (exp->get_ext_len() == get_ext_len()
1942                 && memcmp(exp->get_ext_name(), get_ext_name(),
1943                           get_ext_len()) == 0);
1944     }
1945 
1946     /* determine if my name matches the given string */
ext_name_matches(const char * txt)1947     int ext_name_matches(const char *txt) const
1948     {
1949         return (get_ext_len() == get_strlen(txt)
1950                 && memcmp(get_ext_name(), txt, get_ext_len()) == 0);
1951     }
1952 
1953     /* determine if my symbol name matches the given export's */
sym_matches(const CTcPrsExport * exp)1954     int sym_matches(const CTcPrsExport *exp) const
1955     {
1956         return (exp->get_sym_len() == get_sym_len()
1957                 && memcmp(exp->get_sym(), get_sym(), get_sym_len()) == 0);
1958     }
1959 
1960 protected:
1961     /* symbol name - this is the internal compiler symbol being exported */
1962     const char *sym_;
1963     size_t sym_len_;
1964 
1965     /* external name - this is the name visible to the VM loader */
1966     const char *ext_name_;
1967     size_t ext_len_;
1968 
1969     /* next in list */
1970     CTcPrsExport *nxt_;
1971 };
1972 
1973 
1974 /* ------------------------------------------------------------------------ */
1975 /*
1976  *   Parser Symbol Table.  The parser maintains a hierarchy of symbol
1977  *   tables; a local symbol table can be nested inside an enclosing
1978  *   scope's symbol table, and so on up to the top-level block scope,
1979  *   which is enclosed by the global scope.  In addition, at function
1980  *   scope there's a separate table for "goto" labels.
1981  */
1982 
1983 /* find_or_def actions for undefined symbols */
1984 enum tcprs_undef_action
1985 {
1986     /* if undefined, add an "undefined" entry unconditionally */
1987     TCPRS_UNDEF_ADD_UNDEF,
1988 
1989     /* add a "property" entry unconditionally, but warn about it */
1990     TCPRS_UNDEF_ADD_PROP,
1991 
1992     /* add a "property" entry unconditionally, with no warning */
1993     TCPRS_UNDEF_ADD_PROP_NO_WARNING
1994 };
1995 
1996 /* parser symbol table */
1997 class CTcPrsSymtab
1998 {
1999 public:
2000     CTcPrsSymtab(CTcPrsSymtab *parent_scope);
2001     ~CTcPrsSymtab();
2002 
2003     /* allocate parser symbol tables out of the parser memory pool */
2004     void *operator new(size_t siz);
2005 
2006     /*
2007      *   perform static initialization/termination - call once at program
2008      *   startup and shutdown (respectively)
2009      */
2010     static void s_init();
2011     static void s_terminate();
2012 
2013     /* get the enclosing scope's symbol table */
get_parent()2014     CTcPrsSymtab *get_parent() const { return parent_; }
2015 
2016     /* find a symbol; returns null if the symbol isn't defined */
find(const textchar_t * sym,size_t len)2017     class CTcSymbol *find(const textchar_t *sym, size_t len)
2018         { return find(sym, len, 0); }
2019 
find(const textchar_t * sym)2020     class CTcSymbol *find(const textchar_t *sym)
2021         { return find(sym, strlen(sym), 0); }
2022 
2023     /*
2024      *   Find a symbol; returns null if the symbol isn't defined.  If
2025      *   symtab is not null, we'll fill it in with the actual symbol table
2026      *   in which we found the symbol; this might be an enclosing symbol
2027      *   table, since we search up the enclosing scope list.
2028      */
2029     class CTcSymbol *find(const textchar_t *sym, size_t len,
2030                           CTcPrsSymtab **symtab);
2031 
2032     /* find a symbol without changing its referenced status */
2033     class CTcSymbol *find_noref(const textchar_t *sym, size_t len,
2034                                 CTcPrsSymtab **symtab);
2035 
2036     /*
2037      *   Find a symbol; if the symbol isn't defined, log an error and add
2038      *   the symbol as type "undefined".  Because we add a symbol entry if
2039      *   the symbol isn't defined, this *always* returns a valid symbol
2040      *   object.
2041      */
find_or_def_undef(const char * sym,size_t len,int copy_str)2042     class CTcSymbol *find_or_def_undef(const char *sym, size_t len,
2043                                        int copy_str)
2044     {
2045         return find_or_def(sym, len, copy_str, TCPRS_UNDEF_ADD_UNDEF);
2046     }
2047 
2048     /*
2049      *   Find a symbol; if the symbol isn't defined, log a warning and
2050      *   define the symbol as type property.  Because we add an entry if
2051      *   the symbol isn't defined, this *always* returns a valid symbol
2052      *   object.
2053      */
find_or_def_prop(const char * sym,size_t len,int copy_str)2054     class CTcSymbol *find_or_def_prop(const char *sym, size_t len,
2055                                       int copy_str)
2056     {
2057         return find_or_def(sym, len, copy_str, TCPRS_UNDEF_ADD_PROP);
2058     }
2059 
2060     /*
2061      *   Find a symbol; if the symbol isn't defined, define the symbol as
2062      *   type property with no warning.  This should be used when it is
2063      *   unambiguous that a symbol is meant as a property name.  Because we
2064      *   add an entry if the symbol isn't defined, this *always* returns a
2065      *   valid symbol object.
2066      */
find_or_def_prop_explicit(const char * sym,size_t len,int copy_str)2067     class CTcSymbol *find_or_def_prop_explicit(const char *sym, size_t len,
2068                                                int copy_str)
2069     {
2070         return find_or_def(sym, len, copy_str,
2071                            TCPRS_UNDEF_ADD_PROP_NO_WARNING);
2072     }
2073 
2074     /*
2075      *   Find a symbol.  If the symbol isn't defined, and a "self" object
2076      *   is available, define the symbol as a property.  If the symbol
2077      *   isn't defined an no "self" object is available, add an
2078      *   "undefined" entry for the symbol.
2079      */
find_or_def_prop_implied(const char * sym,size_t len,int copy_str,int is_self_avail)2080     class CTcSymbol *find_or_def_prop_implied(const char *sym, size_t len,
2081                                               int copy_str, int is_self_avail)
2082     {
2083         return find_or_def(sym, len, copy_str,
2084                            is_self_avail
2085                            ? TCPRS_UNDEF_ADD_PROP : TCPRS_UNDEF_ADD_UNDEF);
2086     }
2087 
2088     /* add a formal parameter symbol */
2089     void add_formal(const textchar_t *sym, size_t len, int formal_num,
2090                     int copy_str);
2091 
2092     /* add a local variable symbol */
2093     class CTcSymLocal *add_local(const textchar_t *sym, size_t len,
2094                                  int local_num, int copy_str,
2095                                  int init_assigned, int init_referenced);
2096 
2097     /* add a 'goto' symbol */
2098     class CTcSymLabel *add_code_label(const textchar_t *sym, size_t len,
2099                                       int copy_str);
2100 
2101     /* add an entry to the table */
2102     void add_entry(class CTcSymbol *sym);
2103 
2104     /* remove an entry */
2105     void remove_entry(class CTcSymbol *sym);
2106 
2107     /* enumerate entries in the table through a callback */
2108     void enum_entries(void (*func)(void *, class CTcSymbol *), void *ctx);
2109 
2110     /*
2111      *   Scan the symbol table and check for unreferenced locals.  Logs an
2112      *   error for each unreferenced or unassigned local.
2113      */
2114     void check_unreferenced_locals();
2115 
2116     /*
2117      *   Get/set my debugging list index - this is the index of this table
2118      *   in the list for this function or method.  The index values start
2119      *   at 1 - a value of zero indicates that the symbol table isn't part
2120      *   of any list.
2121      */
get_list_index()2122     int get_list_index() const { return list_index_; }
set_list_index(int n)2123     void set_list_index(int n) { list_index_ = n; }
2124 
2125     /* get/set the next entry in the linked list */
get_list_next()2126     CTcPrsSymtab *get_list_next() const { return list_next_; }
set_list_next(CTcPrsSymtab * nxt)2127     void set_list_next(CTcPrsSymtab *nxt) { list_next_ = nxt; }
2128 
2129 protected:
2130     /* add an entry to a global symbol table */
2131     static void add_to_global_symtab(CTcPrsSymtab *tab, CTcSymbol *entry);
2132 
2133     /* get the underlying hash table */
get_hashtab()2134     class CVmHashTable *get_hashtab() const { return hashtab_; }
2135 
2136     /* enumeration callback - check for unreferenced locals */
2137     static void unref_local_cb(void *ctx, class CTcSymbol *sym);
2138 
2139     /*
2140      *   find a symbol, or define a new symbol, according to the given
2141      *   action mode, if the symbol is undefined
2142      */
2143     class CTcSymbol *find_or_def(const textchar_t *sym, size_t len,
2144                                  int copy_str, tcprs_undef_action action);
2145 
2146     /* enclosing scope (parent) symbol table */
2147     CTcPrsSymtab *parent_;
2148 
2149     /* hash table */
2150     class CVmHashTable *hashtab_;
2151 
2152     /* hash function */
2153     static class CVmHashFunc *hash_func_;
2154 
2155     /*
2156      *   Next symbol table in local scope chain.  For each function or
2157      *   method, we keep a simple linear list of the local scopes so that
2158      *   they can be written to the debugging records.  We also keep an
2159      *   index value giving its position in the list, so that we can store
2160      *   references to the table using the list index.
2161      */
2162     CTcPrsSymtab *list_next_;
2163     int list_index_;
2164 };
2165 
2166 
2167 /* ------------------------------------------------------------------------ */
2168 /*
2169  *   Debugger symbol table interface.  This is an abstract interface that
2170  *   debuggers can implement to allow us to search for symbols that are
2171  *   obtained from a compiled program's debugger records.  To keep the
2172  *   compiler independent of the target architecture and the debugger's
2173  *   own internal structures, we define this abstract interface that the
2174  *   debugger must implement.
2175  *
2176  *   Since this type of symbol table is provided by a debugger as a view
2177  *   on the symbol information in a previously compiled program, the
2178  *   parser naturally has no need to add symbols to the table; hence the
2179  *   only required operations are symbol lookups.
2180  */
2181 class CTcPrsDbgSymtab
2182 {
2183 public:
2184     /*
2185      *   Get information on a symbol.  Returns true if the symbol is
2186      *   found, false if not.  If we find the symbol, fills in the
2187      *   information structure with the appropriate data.
2188      */
2189     virtual int find_symbol(const textchar_t *sym, size_t len,
2190                             struct tcprsdbg_sym_info *info) = 0;
2191 };
2192 
2193 /*
2194  *   Debugger local symbol information structure
2195  */
2196 struct tcprsdbg_sym_info
2197 {
2198     /* symbol type */
2199     enum tc_symtype_t sym_type;
2200 
2201     /* local/parameter number */
2202     uint var_id;
2203 
2204     /* context variable index - 0 if it's not a context local */
2205     int ctx_arr_idx;
2206 
2207     /* stack frame index */
2208     uint frame_idx;
2209 };
2210 
2211 
2212 
2213 /* ------------------------------------------------------------------------ */
2214 /*
2215  *   Parse Tree storage manager.
2216  *
2217  *   The parse tree has some special characteristics that make it
2218  *   desirable to use a special memory manager for it.  First, the parse
2219  *   tree consists of many small objects, so we would like to have as
2220  *   little overhead per object for memory tracking as possible.  Second,
2221  *   parse tree objects all have a similar lifetime: we create the entire
2222  *   parse tree as we scan the source, then use it to generate target
2223  *   code, then discard the whole thing.
2224  *
2225  *   To manage memory efficiently for the parse tree, we define our own
2226  *   memory manager for parse tree objects.  The memory manager is very
2227  *   simple, fast, and has minimal per-object overhead.  We simply
2228  *   maintain a list of large blocks, then suballocate requests out of the
2229  *   large blocks.  Each time we run out of space in a block, we allocate
2230  *   a new block.  We do not keep track of any extra tracking information
2231  *   per node, so a node cannot be individually freed; however, the entire
2232  *   block list can be freed at once, which is exactly the behavior we
2233  *   want.
2234  */
2235 class CTcPrsMem
2236 {
2237 public:
2238     CTcPrsMem();
2239     ~CTcPrsMem();
2240 
2241     /* allocate storage */
2242     void *alloc(size_t siz);
2243 
2244     /* save the current pool state, for later resetting */
2245     void save_state(struct tcprsmem_state_t *state);
2246 
2247     /*
2248      *   reset the pool to the given state - delete all objects allocated
2249      *   in the pool since the state was saved
2250      */
2251     void reset(const struct tcprsmem_state_t *state);
2252 
2253     /* reset to initial state */
2254     void reset();
2255 
2256 private:
2257     /* delete all parser memory */
2258     void delete_all();
2259 
2260     /* allocate a new block */
2261     void alloc_block();
2262 
2263     /* head of list of memory blocks */
2264     struct tcprsmem_blk_t *head_;
2265 
2266     /* tail of list and current memory block */
2267     struct tcprsmem_blk_t *tail_;
2268 
2269     /* current allocation offset in last block */
2270     char *free_ptr_;
2271 
2272     /* remaining space available in last block */
2273     size_t rem_;
2274 };
2275 
2276 /*
2277  *   state-saving structure
2278  */
2279 struct tcprsmem_state_t
2280 {
2281     /* current tail of memory block list */
2282     struct tcprsmem_blk_t *tail;
2283 
2284     /* current allocation offset in last block */
2285     char *free_ptr;
2286 
2287     /* current remaining space in last block */
2288     size_t rem;
2289 };
2290 
2291 
2292 /*
2293  *   Provide an overridden operator new for allocating objects explicitly
2294  *   from the pool
2295  */
new(size_t siz,CTcPrsMem * pool)2296 inline void *operator new(size_t siz, CTcPrsMem *pool)
2297 {
2298     return pool->alloc(siz);
2299 }
2300 
2301 /*
2302  *   provide an array operator new as well
2303  */
2304 inline void *operator new[](size_t siz, CTcPrsMem *pool)
2305 {
2306     return pool->alloc(siz);
2307 }
2308 
2309 
2310 /*
2311  *   parse tree memory block
2312  */
2313 struct tcprsmem_blk_t
2314 {
2315     /* next block in the list */
2316     tcprsmem_blk_t *next_;
2317 
2318     /*
2319      *   This block's byte array (the array extends off the end of the
2320      *   structure).
2321      */
2322     char buf_[1];
2323 };
2324 
2325 /* ------------------------------------------------------------------------ */
2326 /*
2327  *   Special array list subclass that uses parser memory
2328  */
2329 class CPrsArrayList: public CArrayList
2330 {
2331 protected:
2332     /*
2333      *   override the memory management functions to use parser memory
2334      */
2335 
alloc_mem(size_t siz)2336     virtual void *alloc_mem(size_t siz)
2337     {
2338         /* allocate from the parser pool */
2339         return G_prsmem->alloc(siz);
2340     }
2341 
realloc_mem(void * p,size_t oldsiz,size_t newsiz)2342     virtual void *realloc_mem(void *p, size_t oldsiz, size_t newsiz)
2343     {
2344         void *pnew;
2345 
2346         /* allocate a new block from the parser pool */
2347         pnew = G_prsmem->alloc(newsiz);
2348 
2349         /* copy from the old block to the new block */
2350         memcpy(pnew, p, oldsiz);
2351 
2352         /* return the new block */
2353         return pnew;
2354     }
2355 
free_mem(void * p)2356     virtual void free_mem(void *p)
2357     {
2358         /*
2359          *   do nothing - the parser pool automatically frees everything as a
2360          *   block when terminating the parser
2361          */
2362     }
2363 };
2364 
2365 
2366 /* ------------------------------------------------------------------------ */
2367 /*
2368  *   Expression Constant Value object.  This object is used to express the
2369  *   value of a constant expression.
2370  */
2371 class CTcConstVal
2372 {
2373 public:
CTcConstVal()2374     CTcConstVal()
2375     {
2376         /* the type is unknown */
2377         typ_ = TC_CVT_UNK;
2378     }
2379 
2380     /*
2381      *   determine if this is a constant value - it is a constant if it
2382      *   has any known value
2383      */
is_const()2384     int is_const() const { return (typ_ != TC_CVT_UNK); }
2385 
2386     /*
2387      *   set the type to unknown - this indicates that there is no valid
2388      *   value, which generally means that the associated expression does
2389      *   not have a constant value
2390      */
set_unknown()2391     void set_unknown() { typ_ = TC_CVT_UNK; }
2392 
2393     /* set from another value */
set(const CTcConstVal * val)2394     void set(const CTcConstVal *val)
2395     {
2396         /* copy the type */
2397         typ_ = val->typ_;
2398 
2399         /* copy the value */
2400         val_ = val->val_;
2401     }
2402 
2403     /* set an integer value */
set_int(long val)2404     void set_int(long val) { typ_ = TC_CVT_INT; val_.intval_ = val; }
2405 
2406     /* set a floating-point value */
set_float(const char * val,size_t len)2407     void set_float(const char *val, size_t len)
2408     {
2409         typ_ = TC_CVT_FLOAT;
2410         val_.floatval_.txt_ = val;
2411         val_.floatval_.len_ = len;
2412     }
2413 
2414     /* set an enumerator value */
set_enum(ulong val)2415     void set_enum(ulong val) { typ_ = TC_CVT_ENUM; val_.enumval_ = val; }
2416 
2417     /* set a single-quoted string value */
2418     void set_sstr(const char *val, size_t len);
2419 
2420     /* set a list value */
2421     void set_list(class CTPNList *lst);
2422 
2423     /* set an object reference value */
set_obj(ulong obj)2424     void set_obj(ulong obj)
2425     {
2426         typ_ = TC_CVT_OBJ;
2427         val_.objval_ = obj;
2428     }
2429 
2430     /* set a property pointer value */
set_prop(uint prop)2431     void set_prop(uint prop)
2432     {
2433         typ_ = TC_CVT_PROP;
2434         val_.propval_ = prop;
2435     }
2436 
2437     /* set a function pointer value */
set_funcptr(class CTcSymFunc * sym)2438     void set_funcptr(class CTcSymFunc *sym)
2439     {
2440         typ_ = TC_CVT_FUNCPTR;
2441         val_.funcptrval_ = sym;
2442     }
2443 
2444     /* set an anonymous function pointer value */
set_anon_funcptr(class CTPNCodeBody * code_body)2445     void set_anon_funcptr(class CTPNCodeBody *code_body)
2446     {
2447         typ_ = TC_CVT_ANONFUNCPTR;
2448         val_.codebodyval_ = code_body;
2449     }
2450 
2451     /* set a nil/true value */
set_nil()2452     void set_nil() { typ_ = TC_CVT_NIL; }
set_true()2453     void set_true() { typ_ = TC_CVT_TRUE; }
2454 
2455     /*
2456      *   Set a vocabulary list placeholder.  This has no actual value
2457      *   during compilation; instead, this is just a placeholder.  During
2458      *   linking, we'll replace each of these with a list of strings
2459      *   giving the actual vocabulary for the property.
2460      */
set_vocab_list()2461     void set_vocab_list() { typ_ = TC_CVT_VOCAB_LIST; }
2462 
2463     /* set a nil/true value based on a boolean value */
set_bool(int val)2464     void set_bool(int val)
2465     {
2466         typ_ = (val ? TC_CVT_TRUE : TC_CVT_NIL);
2467     }
2468 
2469     /* get my type */
get_type()2470     tc_constval_type_t get_type() const { return typ_; }
2471 
2472     /* get my int value (no type checking) */
get_val_int()2473     long get_val_int() const { return val_.intval_; }
2474 
2475     /* get my floating point value (no type checking) */
get_val_float()2476     const char *get_val_float() const { return val_.floatval_.txt_; }
get_val_float_len()2477     size_t get_val_float_len() const { return val_.floatval_.len_; }
2478 
2479     /* get my enumerator value (no type checking) */
get_val_enum()2480     ulong get_val_enum() const { return val_.enumval_; }
2481 
2482     /* get my string value (no type checking) */
get_val_str()2483     const char *get_val_str() const { return val_.strval_.strval_; }
get_val_str_len()2484     size_t get_val_str_len() const { return val_.strval_.strval_len_; }
2485 
2486     /* get my list value (no type checking) */
get_val_list()2487     class CTPNList *get_val_list() const { return val_.listval_; }
2488 
2489     /* get my object reference value (no type checking) */
get_val_obj()2490     ulong get_val_obj() const { return val_.objval_; }
2491 
2492     /* get my property pointer value (no type checking) */
get_val_prop()2493     uint get_val_prop() const { return val_.propval_; }
2494 
2495     /* get my function pointer symbol value (no type checking) */
get_val_funcptr_sym()2496     class CTcSymFunc *get_val_funcptr_sym() const
2497         { return val_.funcptrval_; }
2498 
2499     /* get my anonymous function pointer value (no type checking) */
get_val_anon_func_ptr()2500     class CTPNCodeBody *get_val_anon_func_ptr() const
2501         { return val_.codebodyval_; }
2502 
2503     /*
2504      *   Determine if this value equals a given constant value.  Returns
2505      *   true if so, false if not.  We'll set (*can_compare) to true if
2506      *   the values are comparable, false if the comparison is not
2507      *   meaningful.
2508      */
2509     int is_equal_to(const CTcConstVal *val) const;
2510 
2511     /*
2512      *   Convert an integer, nil, or true value to a string.  Fills in the
2513      *   buffer with the result of the conversion if the value wasn't
2514      *   already a string.  If the value is already a string, we'll simply
2515      *   return a pointer to the original string without making a copy.
2516      *   Returns null if the value is not convertible to a string.
2517      */
2518     const char *cvt_to_str(char *buf, size_t bufl, size_t *result_len);
2519 
2520     /*
2521      *   Get my true/nil value.  Returns false if the value is nil or zero,
2522      *   true if it's anything else.
2523      */
get_val_bool()2524     int get_val_bool() const
2525     {
2526         return !(typ_ == TC_CVT_NIL
2527                  || (typ_ == TC_CVT_INT && get_val_int() == 0));
2528     }
2529 
2530 private:
2531     /* my type */
2532     tc_constval_type_t typ_;
2533 
2534     union
2535     {
2536         /* integer value (valid when typ_ == TC_CVT_INT) */
2537         long intval_;
2538 
2539         /* floating-point value (valid when typ_ == TC_CVT_FLOAT) */
2540         struct
2541         {
2542             const char *txt_;
2543             size_t len_;
2544         }
2545         floatval_;
2546 
2547         /* enumerator value (valid when typ_ == TC_CVT_ENUM) */
2548         ulong enumval_;
2549 
2550         /*
2551          *   String value (valid when typ_ == TC_CVT_TYPE_SSTR).  We need
2552          *   to know the length separately, because the underyling string
2553          *   may not be null-terminated.
2554          */
2555         struct
2556         {
2557             const char *strval_;
2558             size_t strval_len_;
2559         }
2560         strval_;
2561 
2562         /* my list value */
2563         class CTPNList *listval_;
2564 
2565         /* property ID value */
2566         uint propval_;
2567 
2568         /* object reference value */
2569         ulong objval_;
2570 
2571         /*
2572          *   function pointer value - we store the underlying symbol,
2573          *   since function pointers are generally not resolved until late
2574          *   in the compilation
2575          */
2576         class CTcSymFunc *funcptrval_;
2577 
2578         /*
2579          *   code body pointer value - we store the underlying code body
2580          *   for anonymous functions
2581          */
2582         class CTPNCodeBody *codebodyval_;
2583     } val_;
2584 };
2585 
2586 
2587 /* ------------------------------------------------------------------------ */
2588 /*
2589  *   Assignment Types.
2590  */
2591 
2592 enum tc_asitype_t
2593 {
2594     /* simple assignment: x = 1 */
2595     TC_ASI_SIMPLE,
2596 
2597     /* add to: x += 1 */
2598     TC_ASI_ADD,
2599 
2600     /* subtract from: x -= 1 */
2601     TC_ASI_SUB,
2602 
2603     /* multiply by:  x *= 1 */
2604     TC_ASI_MUL,
2605 
2606     /* divide by: x /= 1 */
2607     TC_ASI_DIV,
2608 
2609     /* modulo: x %= 1 */
2610     TC_ASI_MOD,
2611 
2612     /* bitwise-and with: x &= 1 */
2613     TC_ASI_BAND,
2614 
2615     /* bitwise-or with: x |= 1 */
2616     TC_ASI_BOR,
2617 
2618     /* bitwise-xor with: x ^= 1 */
2619     TC_ASI_BXOR,
2620 
2621     /* shift left: x <<= 1 */
2622     TC_ASI_SHL,
2623 
2624     /* shift right: x >>= 1 */
2625     TC_ASI_SHR,
2626 
2627     /* pre-increment */
2628     TC_ASI_PREINC,
2629 
2630     /* pre-decrement */
2631     TC_ASI_PREDEC,
2632 
2633     /* post-increment */
2634     TC_ASI_POSTINC,
2635 
2636     /* post-decrement */
2637     TC_ASI_POSTDEC
2638 };
2639 
2640 
2641 /* ------------------------------------------------------------------------ */
2642 /*
2643  *   Expression Operator Parsers.  We construct a tree of these operator
2644  *   parsers so that we can express the expression grammar in a relatively
2645  *   compact and declarative notation.
2646  */
2647 
2648 /*
2649  *   basic operator parser
2650  */
2651 class CTcPrsOp
2652 {
2653 public:
2654     /*
2655      *   Parse an expression with this operator.  Logs an error and
2656      *   returns non-zero if the expression is not valid; on success,
2657      *   returns zero.
2658      *
2659      *   Fills in *val with the constant value, if any, of the expression.
2660      *   If the expression does not have a constant value, *val's type
2661      *   will be set to TC_CVT_UNKNOWN to indicate this.
2662      *
2663      *   Returns a parse node if successful, or null if an error occurs
2664      *   and the operator parser is unable to make a guess about what was
2665      *   intended.
2666      */
2667     virtual class CTcPrsNode *parse() const = 0;
2668 };
2669 
2670 /*
2671  *   generic left-associative binary operator
2672  */
2673 class CTcPrsOpBin: public CTcPrsOp
2674 {
2675 public:
CTcPrsOpBin()2676     CTcPrsOpBin()
2677     {
2678         /* no left or right subexpression specified */
2679         left_ = right_ = 0;
2680 
2681         /* as-yet unknown operator token */
2682         op_tok_ = TOKT_INVALID;
2683     }
2684 
CTcPrsOpBin(tc_toktyp_t typ)2685     CTcPrsOpBin(tc_toktyp_t typ)
2686     {
2687         /* remember my operator token */
2688         op_tok_ = typ;
2689     }
2690 
CTcPrsOpBin(const CTcPrsOp * left,const CTcPrsOp * right,tc_toktyp_t typ)2691     CTcPrsOpBin(const CTcPrsOp *left, const CTcPrsOp *right, tc_toktyp_t typ)
2692     {
2693         /* remember my left and right sub-operators */
2694         left_ = left;
2695         right_ = right;
2696 
2697         /* remember my operator token */
2698         op_tok_ = typ;
2699     }
2700 
2701     /* parse the binary expression */
2702     class CTcPrsNode *parse() const;
2703 
2704     /* build a new tree out of our left-hand and right-hand subtrees */
2705     virtual class CTcPrsNode
2706         *build_tree(class CTcPrsNode *left,
2707                     class CTcPrsNode *right) const = 0;
2708 
2709     /*
2710      *   Try evaluating a constant result.  If the two values can be
2711      *   combined with the operator to yield a constant value result,
2712      *   create a new parse node for the constant value (or update one of
2713      *   the given subnodes) and return it.  If we can't provide a
2714      *   constant value, return null.
2715      *
2716      *   By default, we'll indicate that the expression does not have a
2717      *   valid constant value.
2718      */
2719     virtual class CTcPrsNode
eval_constant(class CTcPrsNode * left,class CTcPrsNode * right)2720         *eval_constant(class CTcPrsNode *left,
2721                        class CTcPrsNode *right) const
2722     {
2723         /* indicate that we cannot synthesize a constant value */
2724         return 0;
2725     }
2726 
2727     /* get/set my token */
get_op_tok()2728     tc_toktyp_t get_op_tok() const { return op_tok_; }
set_op_tok(tc_toktyp_t tok)2729     void set_op_tok(tc_toktyp_t tok) { op_tok_ = tok; }
2730 
2731 protected:
2732     /* operator that can be parsed for my left-hand side */
2733     const CTcPrsOp *left_;
2734 
2735     /* operator that can be parsed for my right-hand side */
2736     const CTcPrsOp *right_;
2737 
2738     /* my operator token */
2739     tc_toktyp_t op_tok_;
2740 };
2741 
2742 /*
2743  *   Binary Operator Group.  This is a group of operators at a common
2744  *   precedence level.  The group has an array of binary operators that
2745  *   are all at the same level of precedence; we'll evaluate the left
2746  *   suboperator, then check the token in the input stream against each of
2747  *   our group's operators, applying the one that matches, if one matches.
2748  */
2749 class CTcPrsOpBinGroup: public CTcPrsOp
2750 {
2751 public:
CTcPrsOpBinGroup(const CTcPrsOp * left,const CTcPrsOp * right,const class CTcPrsOpBin * const * ops)2752     CTcPrsOpBinGroup(const CTcPrsOp *left, const CTcPrsOp *right,
2753                      const class CTcPrsOpBin *const *ops)
2754     {
2755         /* remember my left and right suboperators */
2756         left_ = left;
2757         right_ = right;
2758 
2759         /* remember the operators in my group */
2760         ops_ = ops;
2761     }
2762 
2763     class CTcPrsNode *parse() const;
2764 
2765 protected:
2766     /* find and apply an operator to the parsed left-hand side */
2767     int find_and_apply_op(CTcPrsNode **lhs) const;
2768 
2769     /* my left and right suboperators */
2770     const CTcPrsOp *left_;
2771     const CTcPrsOp *right_;
2772 
2773     /* group of binary operators at this precedence level */
2774     const class CTcPrsOpBin *const *ops_;
2775 };
2776 
2777 /*
2778  *   Binary operator group for comparison operators.  This is a similar to
2779  *   other binary groups, but also includes the special "is in" and "not
2780  *   in" operators.
2781  */
2782 class CTcPrsOpBinGroupCompare: public CTcPrsOpBinGroup
2783 {
2784 public:
CTcPrsOpBinGroupCompare(const class CTcPrsOp * left,const class CTcPrsOp * right,const class CTcPrsOpBin * const * ops)2785     CTcPrsOpBinGroupCompare(const class CTcPrsOp *left,
2786                             const class CTcPrsOp *right,
2787                             const class CTcPrsOpBin *const *ops)
2788         : CTcPrsOpBinGroup(left, right, ops)
2789     {
2790     }
2791 
2792     class CTcPrsNode *parse() const;
2793 
2794 protected:
2795     /* parse the 'in' list portion of the expression */
2796     class CTPNArglist *parse_inlist() const;
2797 };
2798 
2799 /* comma operator */
2800 class CTcPrsOpComma: public CTcPrsOpBin
2801 {
2802 public:
CTcPrsOpComma(const CTcPrsOp * left,const CTcPrsOp * right)2803     CTcPrsOpComma(const CTcPrsOp *left, const CTcPrsOp *right)
2804         : CTcPrsOpBin(left, right, TOKT_COMMA) { }
2805 
2806     /* evaluate constant result */
2807     class CTcPrsNode
2808         *eval_constant(class CTcPrsNode *left,
2809                        class CTcPrsNode *right) const;
2810 
2811     /* build a new tree out of our left-hand and right-hand subtrees */
2812     class CTcPrsNode
2813         *build_tree(class CTcPrsNode *left,
2814                     class CTcPrsNode *right) const;
2815 };
2816 
2817 /* logical OR */
2818 class CTcPrsOpOr: public CTcPrsOpBin
2819 {
2820 public:
CTcPrsOpOr(const CTcPrsOp * left,const CTcPrsOp * right)2821     CTcPrsOpOr(const CTcPrsOp *left, const CTcPrsOp *right)
2822         : CTcPrsOpBin(left, right, TOKT_OROR) { }
2823 
2824     /* evaluate constant result */
2825     class CTcPrsNode
2826         *eval_constant(class CTcPrsNode *left,
2827                        class CTcPrsNode *right) const;
2828 
2829     /* build a new tree out of our left-hand and right-hand subtrees */
2830     class CTcPrsNode
2831         *build_tree(class CTcPrsNode *left,
2832                     class CTcPrsNode *right) const;
2833 };
2834 
2835 /* logical AND */
2836 class CTcPrsOpAnd: public CTcPrsOpBin
2837 {
2838 public:
CTcPrsOpAnd(const CTcPrsOp * left,const CTcPrsOp * right)2839     CTcPrsOpAnd(const CTcPrsOp *left, const CTcPrsOp *right)
2840         : CTcPrsOpBin(left, right, TOKT_ANDAND) { }
2841 
2842     /* evaluate constant result */
2843     class CTcPrsNode
2844         *eval_constant(class CTcPrsNode *left,
2845                        class CTcPrsNode *right) const;
2846 
2847     /* build a new tree out of our left-hand and right-hand subtrees */
2848     class CTcPrsNode
2849         *build_tree(class CTcPrsNode *left,
2850                     class CTcPrsNode *right) const;
2851 };
2852 
2853 /* general magnitude comparison operators */
2854 class CTcPrsOpRel: public CTcPrsOpBin
2855 {
2856 public:
CTcPrsOpRel(tc_toktyp_t typ)2857     CTcPrsOpRel(tc_toktyp_t typ) : CTcPrsOpBin(typ) { }
2858 
2859     /* evaluate constant result */
2860     class CTcPrsNode
2861         *eval_constant(class CTcPrsNode *left,
2862                        class CTcPrsNode *right) const;
2863 
2864 protected:
2865     /*
2866      *   Get the result true/false value, given the result of the
2867      *   comparison.  For example, if this is a greater-than operator,
2868      *   this should return TRUE if comp > 0, FALSE otherwise.
2869      */
2870     virtual int get_bool_val(int comparison_value) const = 0;
2871 };
2872 
2873 /* comparison - greater than */
2874 class CTcPrsOpGt: public CTcPrsOpRel
2875 {
2876 public:
CTcPrsOpGt()2877     CTcPrsOpGt() : CTcPrsOpRel(TOKT_GT) { }
2878 
2879     /* get the boolean value for a comparison sense */
get_bool_val(int comp)2880     int get_bool_val(int comp) const { return comp > 0; }
2881 
2882     /* build a new tree out of our left-hand and right-hand subtrees */
2883     class CTcPrsNode
2884         *build_tree(class CTcPrsNode *left,
2885                     class CTcPrsNode *right) const;
2886 };
2887 
2888 /* comparison - greater than or equal to */
2889 class CTcPrsOpGe: public CTcPrsOpRel
2890 {
2891 public:
CTcPrsOpGe()2892     CTcPrsOpGe() : CTcPrsOpRel(TOKT_GE) { }
2893 
2894     /* get the boolean value for a comparison sense */
get_bool_val(int comp)2895     int get_bool_val(int comp) const { return comp >= 0; }
2896 
2897     /* build a new tree out of our left-hand and right-hand subtrees */
2898     class CTcPrsNode
2899         *build_tree(class CTcPrsNode *left,
2900                     class CTcPrsNode *right) const;
2901 };
2902 
2903 /* comparison - less than */
2904 class CTcPrsOpLt: public CTcPrsOpRel
2905 {
2906 public:
CTcPrsOpLt()2907     CTcPrsOpLt() : CTcPrsOpRel(TOKT_LT) { }
2908 
2909     /* get the boolean value for a comparison sense */
get_bool_val(int comp)2910     int get_bool_val(int comp) const { return comp < 0; }
2911 
2912     /* build a new tree out of our left-hand and right-hand subtrees */
2913     class CTcPrsNode
2914         *build_tree(class CTcPrsNode *left,
2915                     class CTcPrsNode *right) const;
2916 };
2917 
2918 /* comparison - less than or equal to */
2919 class CTcPrsOpLe: public CTcPrsOpRel
2920 {
2921 public:
CTcPrsOpLe()2922     CTcPrsOpLe() : CTcPrsOpRel(TOKT_LE) { }
2923 
2924     /* get the boolean value for a comparison sense */
get_bool_val(int comp)2925     int get_bool_val(int comp) const { return comp <= 0; }
2926 
2927     /* build a new tree out of our left-hand and right-hand subtrees */
2928     class CTcPrsNode
2929         *build_tree(class CTcPrsNode *left,
2930                     class CTcPrsNode *right) const;
2931 };
2932 
2933 /*
2934  *   Equality/inequality comparison
2935  */
2936 class CTcPrsOpEqComp: public CTcPrsOpBin
2937 {
2938 public:
CTcPrsOpEqComp(tc_toktyp_t typ)2939     CTcPrsOpEqComp(tc_toktyp_t typ) : CTcPrsOpBin(typ) { }
2940 
2941     /* evaluate constant result */
2942     class CTcPrsNode
2943         *eval_constant(class CTcPrsNode *left,
2944                        class CTcPrsNode *right) const;
2945 
2946 protected:
2947     /* get the boolean value to use if the operands are equal */
2948     virtual int get_bool_val(int ops_equal) const = 0;
2949 };
2950 
2951 
2952 /*
2953  *   Equality comparison
2954  */
2955 class CTcPrsOpEq: public CTcPrsOpEqComp
2956 {
2957 public:
2958     /* start out in C mode - use '==' operator by default */
CTcPrsOpEq()2959     CTcPrsOpEq()
2960         : CTcPrsOpEqComp(TOKT_EQEQ) { }
2961 
2962     /* set the current equality operator */
set_eq_op(tc_toktyp_t op)2963     void set_eq_op(tc_toktyp_t op) { op_tok_ = op; }
2964 
2965     /* build a new tree out of our left-hand and right-hand subtrees */
2966     class CTcPrsNode
2967         *build_tree(class CTcPrsNode *left,
2968                     class CTcPrsNode *right) const;
2969 
2970     /* get the boolean value to use if the operands are equal */
get_bool_val(int ops_equal)2971     virtual int get_bool_val(int ops_equal) const { return ops_equal; }
2972 };
2973 
2974 /*
2975  *   Inequality comparison
2976  */
2977 class CTcPrsOpNe: public CTcPrsOpEqComp
2978 {
2979 public:
CTcPrsOpNe()2980     CTcPrsOpNe() : CTcPrsOpEqComp(TOKT_NE) { }
2981 
2982     /* build a new tree out of our left-hand and right-hand subtrees */
2983     class CTcPrsNode
2984         *build_tree(class CTcPrsNode *left,
2985                     class CTcPrsNode *right) const;
2986 
2987     /* get the boolean value to use if the operands are equal */
get_bool_val(int ops_equal)2988     virtual int get_bool_val(int ops_equal) const { return !ops_equal; }
2989 };
2990 
2991 /*
2992  *   binary arithmetic operators
2993  */
2994 class CTcPrsOpArith: public CTcPrsOpBin
2995 {
2996 public:
CTcPrsOpArith(tc_toktyp_t typ)2997     CTcPrsOpArith(tc_toktyp_t typ)
2998         : CTcPrsOpBin(typ) { }
2999 
CTcPrsOpArith(const CTcPrsOp * left,const CTcPrsOp * right,tc_toktyp_t typ)3000     CTcPrsOpArith(const CTcPrsOp *left, const CTcPrsOp *right,
3001                   tc_toktyp_t typ)
3002         : CTcPrsOpBin(left, right, typ) { }
3003 
3004     /* evaluate constant result */
3005     class CTcPrsNode
3006         *eval_constant(class CTcPrsNode *left,
3007                        class CTcPrsNode *right) const;
3008 
3009 protected:
3010     /* calculate the result */
3011     virtual long calc_result(long val1, long val2) const = 0;
3012 };
3013 
3014 /* bitwise OR */
3015 class CTcPrsOpBOr: public CTcPrsOpArith
3016 {
3017 public:
CTcPrsOpBOr(const CTcPrsOp * left,const CTcPrsOp * right)3018     CTcPrsOpBOr(const CTcPrsOp *left, const CTcPrsOp *right)
3019         : CTcPrsOpArith(left, right, TOKT_OR) { }
3020 
3021     /* build a new tree out of our left-hand and right-hand subtrees */
3022     class CTcPrsNode
3023         *build_tree(class CTcPrsNode *left,
3024                     class CTcPrsNode *right) const;
3025 
3026 protected:
3027     /* calculate the result */
calc_result(long val1,long val2)3028     virtual long calc_result(long val1, long val2) const
3029         { return val1 | val2; }
3030 };
3031 
3032 /* bitwise XOR */
3033 class CTcPrsOpBXor: public CTcPrsOpArith
3034 {
3035 public:
CTcPrsOpBXor(const CTcPrsOp * left,const CTcPrsOp * right)3036     CTcPrsOpBXor(const CTcPrsOp *left, const CTcPrsOp *right)
3037         : CTcPrsOpArith(left, right, TOKT_XOR) { }
3038 
3039     /* build a new tree out of our left-hand and right-hand subtrees */
3040     class CTcPrsNode
3041         *build_tree(class CTcPrsNode *left,
3042                     class CTcPrsNode *right) const;
3043 
3044 protected:
3045     /* calculate the result */
calc_result(long val1,long val2)3046     virtual long calc_result(long val1, long val2) const
3047         { return val1 ^ val2; }
3048 };
3049 
3050 /* bitwise AND */
3051 class CTcPrsOpBAnd: public CTcPrsOpArith
3052 {
3053 public:
CTcPrsOpBAnd(const CTcPrsOp * left,const CTcPrsOp * right)3054     CTcPrsOpBAnd(const CTcPrsOp *left, const CTcPrsOp *right)
3055         : CTcPrsOpArith(left, right, TOKT_AND) { }
3056 
3057     /* build a new tree out of our left-hand and right-hand subtrees */
3058     class CTcPrsNode
3059         *build_tree(class CTcPrsNode *left,
3060                     class CTcPrsNode *right) const;
3061 
3062 protected:
3063     /* calculate the result */
calc_result(long val1,long val2)3064     virtual long calc_result(long val1, long val2) const
3065         { return val1 & val2; }
3066 };
3067 
3068 /*
3069  *   shift left
3070  */
3071 class CTcPrsOpShl: public CTcPrsOpArith
3072 {
3073 public:
CTcPrsOpShl()3074     CTcPrsOpShl() : CTcPrsOpArith(TOKT_SHL) { }
3075 
3076     /* build a new tree out of our left-hand and right-hand subtrees */
3077     class CTcPrsNode
3078         *build_tree(class CTcPrsNode *left,
3079                     class CTcPrsNode *right) const;
3080 
3081 protected:
calc_result(long a,long b)3082     long calc_result(long a, long b) const { return a << b; }
3083 };
3084 
3085 /*
3086  *   shift right
3087  */
3088 class CTcPrsOpShr: public CTcPrsOpArith
3089 {
3090 public:
CTcPrsOpShr()3091     CTcPrsOpShr() : CTcPrsOpArith(TOKT_SHR) { }
3092 
3093     /* build a new tree out of our left-hand and right-hand subtrees */
3094     class CTcPrsNode
3095         *build_tree(class CTcPrsNode *left,
3096                     class CTcPrsNode *right) const;
3097 
3098 protected:
calc_result(long a,long b)3099     long calc_result(long a, long b) const { return a >> b; }
3100 };
3101 
3102 /*
3103  *   multiply
3104  */
3105 class CTcPrsOpMul: public CTcPrsOpArith
3106 {
3107 public:
CTcPrsOpMul()3108     CTcPrsOpMul() : CTcPrsOpArith(TOKT_TIMES) { }
3109 
3110     /* build a new tree out of our left-hand and right-hand subtrees */
3111     class CTcPrsNode
3112         *build_tree(class CTcPrsNode *left,
3113                     class CTcPrsNode *right) const;
3114 
3115 protected:
calc_result(long a,long b)3116     long calc_result(long a, long b) const { return a * b; }
3117 };
3118 
3119 /*
3120  *   divide
3121  */
3122 class CTcPrsOpDiv: public CTcPrsOpArith
3123 {
3124 public:
CTcPrsOpDiv()3125     CTcPrsOpDiv()
3126         : CTcPrsOpArith(TOKT_DIV) { }
3127 
CTcPrsOpDiv(tc_toktyp_t tok)3128     CTcPrsOpDiv(tc_toktyp_t tok)
3129         : CTcPrsOpArith(tok) { }
3130 
3131     /* build a new tree out of our left-hand and right-hand subtrees */
3132     class CTcPrsNode
3133         *build_tree(class CTcPrsNode *left,
3134                     class CTcPrsNode *right) const;
3135 
3136 protected:
3137     long calc_result(long a, long b) const;
3138 };
3139 
3140 
3141 /*
3142  *   mod - inherit from divide operator to pick up divide-by-zero checking
3143  */
3144 class CTcPrsOpMod: public CTcPrsOpDiv
3145 {
3146 public:
CTcPrsOpMod()3147     CTcPrsOpMod() : CTcPrsOpDiv(TOKT_MOD) { }
3148 
3149     /* build a new tree out of our left-hand and right-hand subtrees */
3150     class CTcPrsNode
3151         *build_tree(class CTcPrsNode *left,
3152                     class CTcPrsNode *right) const;
3153 
3154 protected:
3155     long calc_result(long a, long b) const;
3156 };
3157 
3158 /*
3159  *   add
3160  */
3161 class CTcPrsOpAdd: public CTcPrsOpArith
3162 {
3163 public:
CTcPrsOpAdd()3164     CTcPrsOpAdd() : CTcPrsOpArith(TOKT_PLUS) { }
3165 
3166     /* build a new tree out of our left-hand and right-hand subtrees */
3167     class CTcPrsNode
3168         *build_tree(class CTcPrsNode *left,
3169                     class CTcPrsNode *right) const;
3170 
3171     /* evaluate constant result */
3172     class CTcPrsNode
3173         *eval_constant(class CTcPrsNode *left,
3174                        class CTcPrsNode *right) const;
3175 
3176 protected:
calc_result(long a,long b)3177     long calc_result(long a, long b) const { return a + b; }
3178 };
3179 
3180 /*
3181  *   subtract
3182  */
3183 class CTcPrsOpSub: public CTcPrsOpArith
3184 {
3185 public:
CTcPrsOpSub()3186     CTcPrsOpSub() : CTcPrsOpArith(TOKT_MINUS) { }
3187 
3188     /* build a new tree out of our left-hand and right-hand subtrees */
3189     class CTcPrsNode
3190         *build_tree(class CTcPrsNode *left,
3191                     class CTcPrsNode *right) const;
3192 
3193     /* evaluate constant result */
3194     class CTcPrsNode
3195         *eval_constant(class CTcPrsNode *left,
3196                        class CTcPrsNode *right) const;
3197 
3198 protected:
calc_result(long a,long b)3199     long calc_result(long a, long b) const { return a - b; }
3200 };
3201 
3202 /*
3203  *   Unary Operators
3204  */
3205 class CTcPrsOpUnary: public CTcPrsOp
3206 {
3207 public:
3208     class CTcPrsNode *parse() const;
3209 
3210     /*
3211      *   evaluate a constant subscript expression; returns a constant
3212      *   parse node expression if the subscript can be evaluated to a
3213      *   compile-time constant, or null if not
3214      */
3215     static class CTcPrsNode
3216         *eval_const_subscript(class CTcPrsNode *lhs,
3217                               class CTcPrsNode *subscript);
3218 
3219     /*
3220      *   evaluate a constant NOT expression; returns a constant parse node
3221      *   expression if the logical negation can be evaluated to a
3222      *   compile-time constant, or null if not
3223      */
3224     static class CTcPrsNode *eval_const_not(class CTcPrsNode *lhs);
3225 
3226     /* parse a double-quoted string with embedded expressions */
3227     static class CTcPrsNode *parse_dstr_embed();
3228 
3229     /* parse a list */
3230     static class CTcPrsNode *parse_list();
3231 
3232     /* parse a primary expression */
3233     static class CTcPrsNode *parse_primary();
3234 
3235 protected:
3236     /* parse an anonymous function */
3237     static class CTcPrsNode *parse_anon_func(int short_form);
3238 
3239     /* parse a logical NOT operator */
3240     static class CTcPrsNode *parse_not(CTcPrsNode *sub);
3241 
3242     /* parse a bitwise NOT operator */
3243     static class CTcPrsNode *parse_bnot(CTcPrsNode *sub);
3244 
3245     /* parse an address-of operator */
3246     class CTcPrsNode *parse_addr() const;
3247 
3248     /* parse an arithmetic positive operator */
3249     static class CTcPrsNode *parse_pos(CTcPrsNode *sub);
3250 
3251     /* parse an arithmetic negative operator */
3252     static class CTcPrsNode *parse_neg(CTcPrsNode *sub);
3253 
3254     /* parse a pre- or post-increment operator */
3255     static class CTcPrsNode *parse_inc(int pre, CTcPrsNode *sub);
3256 
3257     /* parse a pre- or post-decrement operator */
3258     static class CTcPrsNode *parse_dec(int pre, CTcPrsNode *sub);
3259 
3260     /* parse a 'new' operator */
3261     static class CTcPrsNode *parse_new(CTcPrsNode *sub, int is_transient);
3262 
3263     /* parse a 'delete' operator */
3264     static class CTcPrsNode *parse_delete(CTcPrsNode *sub);
3265 
3266     /* parse a postfix expression */
3267     static class CTcPrsNode *parse_postfix(int allow_member_expr,
3268                                            int allow_call_expr);
3269 
3270     /* parse a function or method call */
3271     static class CTcPrsNode *parse_call(CTcPrsNode *lhs);
3272 
3273     /* parse an argument list */
3274     static class CTPNArglist *parse_arg_list();
3275 
3276     /* parse a subscript */
3277     static class CTcPrsNode *parse_subscript(CTcPrsNode *lhs);
3278 
3279     /* parse a member selection ('.' operator) */
3280     static class CTcPrsNode *parse_member(CTcPrsNode *lhs);
3281 
3282     /* parse an "inherited" expression */
3283     static class CTcPrsNode *parse_inherited();
3284 
3285     /* parse a "delegated" expression */
3286     static class CTcPrsNode *parse_delegated();
3287 
3288     /* local symbol enumeration callback for anonymous function setup */
3289     static void enum_for_anon(void *ctx, class CTcSymbol *sym);
3290 
3291     /* local symbol enumeration for anon function - follow-up */
3292     static void enum_for_anon2(void *ctx, class CTcSymbol *sym);
3293 };
3294 
3295 /*
3296  *   tertiary conditional operator
3297  */
3298 class CTcPrsOpIf: public CTcPrsOp
3299 {
3300 public:
3301     class CTcPrsNode *parse() const;
3302 };
3303 
3304 /*
3305  *   Assignment operators (including the regular assignment, "="/":=",
3306  *   plus all calculate-and-assign operators: "+=", "-=", etc)
3307  */
3308 class CTcPrsOpAsi: public CTcPrsOp
3309 {
3310 public:
CTcPrsOpAsi()3311     CTcPrsOpAsi()
3312     {
3313         /* start out with the C-mode simple assignment operator */
3314         asi_op_ = TOKT_EQ;
3315     }
3316 
3317     /* parse an assignment */
3318     class CTcPrsNode *parse() const;
3319 
3320     /* set the current simple assignment operator */
set_asi_op(tc_toktyp_t tok)3321     void set_asi_op(tc_toktyp_t tok) { asi_op_ = tok; }
3322 
3323 private:
3324     /* current simple assignment operator */
3325     tc_toktyp_t asi_op_;
3326 };
3327 
3328 #endif /* TCPRS_H */
3329 
3330