1 /* $Header: d:/cvsroot/tads/tads3/TCPNBASE.H,v 1.3 1999/07/11 00:46:53 MJRoberts Exp $ */
2 
3 /*
4  *   Copyright (c) 1999, 2002 Michael J. Roberts.  All Rights Reserved.
5  *
6  *   Please see the accompanying license file, LICENSE.TXT, for information
7  *   on using and copying this software.
8  */
9 /*
10 Name
11   tcpn.h - Parse Node - base class
12 Function
13   Defines the target-independent base class for parse nodes
14 Notes
15   All expression parse nodes are derived from the target-specific
16   subclass of this class.  The target-independent base class is
17   CTcPrsNodeBase; the target-specific class is CTcPrsNode.
18 Modified
19   05/10/99 MJRoberts  - Creation
20 */
21 
22 #ifndef TCPN_H
23 #define TCPN_H
24 
25 #include "vmhash.h"
26 
27 /* ------------------------------------------------------------------------ */
28 /*
29  *   Parse Tree Allocation Object.  This is a base class that can be used
30  *   for tree objects that are to be allocated from the parser node pool.
31  */
32 class CTcPrsAllocObj
33 {
34 public:
35     /*
36      *   Override operator new() - allocate all parse node objects out of
37      *   the parse node pool.
38      */
39     void *operator new(size_t siz);
40 };
41 
42 
43 /* ------------------------------------------------------------------------ */
44 /*
45  *   adjust_for_debug() information structure
46  */
47 struct tcpn_debug_info
48 {
49     /* true -> speculative evaluation mode */
50     int speculative;
51 
52     /*
53      *   stack level - 0 is the active level, 1 is the first enclosing
54      *   level, and so on
55      */
56     int stack_level;
57 };
58 
59 /* ------------------------------------------------------------------------ */
60 /*
61  *   Parse Tree Expression Node - base class.  As we parse an expression,
62  *   we build a tree of these objects to describe the source code.
63  *
64  *   This class is subclassed for each type of parsing node: each type of
65  *   statement has a node type, some statements have helper node types for
66  *   parts of statements, and each expression operator has a node type.
67  *   These subclasses contain the information specific to the type of
68  *   parsing construct represented.
69  *
70  *   Each parsing subclass is then further subclassed for each target
71  *   architecture.  This final subclass contains the code generator for
72  *   the node in the target architecture.
73  *
74  *   The target-independent base version of each subclass is called
75  *   CTPNXxxBase.  The target-specific subclass derived from this base
76  *   class is CTPNXxx.  For example, the final subclass for constant
77  *   nodes, which is derived from the target-independent base class
78  *   CTPNConstBase, is CTPNConst.  (Note that each target uses the same
79  *   name for the final subclass, so we can only link one target
80  *   architecture into a given build of the compiler.  Each additional
81  *   target requires a separate compiler executable with the appropriate
82  *   CTPNConst classes linked in.)
83  */
84 class CTcPrsNodeBase: public CTcPrsAllocObj
85 {
86 public:
87     /*
88      *   Generate code for the expression for the target architecture.
89      *   This method is defined only by the final target-specific
90      *   subclasses.
91      *
92      *   This method is used to generate code to evaluate the expression
93      *   as an rvalue.
94      *
95      *   If 'discard' is true, it indicates that any value yielded by the
96      *   expression will not be used, in which case the generated code
97      *   need not leave the result of the expression on the stack.  We can
98      *   generate code more efficiently for certain types of expressions
99      *   when we know that we're evaluating them only for side effects.
100      *   For example, an assignment expression has a result value, but
101      *   this value need not be pushed onto the stack if it will simply be
102      *   discarded.  Also, an operator like "+" that has no side effects
103      *   of its own can merely evaluate its operands for their side
104      *   effects, but need not compute its own result if that result would
105      *   simply be discarded.
106      *
107      *   If 'for_condition' is true, it indicates that the result of the
108      *   expression will be used directly for a conditional of some kind
109      *   (for a "?:" operator, an "if" statement, a "while" statement, or
110      *   the like).  In some cases, we can avoid extra conversions to some
111      *   values when they're going to be used directly for a comparison;
112      *   for example, the "&&" operator must return a true/nil value, but
113      *   the code generator may be able to avoid the extra conversion when
114      *   the value will be used for an "if" statement's conditional value.
115      */
116     virtual void gen_code(int discard, int for_condition) = 0;
117 
118     /*
119      *   Get the constant value of the parse node, if available.  Most
120      *   parse nodes have no constant value, so by default this returns
121      *   null.  Only constant parse nodes can provide a constant value, so
122      *   they should override this.
123      */
get_const_val()124     virtual class CTcConstVal *get_const_val() { return 0; }
125 
126     /* determine if the node has a constant value */
is_const()127     int is_const() { return get_const_val() != 0; }
128 
129     /* determine if I have a given constant integer value */
is_const_int(int val)130     int is_const_int(int val)
131     {
132         return (is_const()
133                 && get_const_val()->get_type() == TC_CVT_INT
134                 && get_const_val()->get_val_int() == val);
135     }
136 
137     /*
138      *   Set the constant value of the parse node from that of another
139      *   node.  The caller must already have checked that this node and
140      *   the value being assigned are both valid constant values.
141      */
set_const_val(class CTcPrsNode * src)142     void set_const_val(class CTcPrsNode *src)
143     {
144         /* set my constant value from the source's constant value */
145         get_const_val()->set(((CTcPrsNodeBase *)src)->get_const_val());
146     }
147 
148     /*
149      *   Check to see if this expression can possibly be a valid lvalue.
150      *   Return true if so, false if not.  This check is made before
151      *   symbol resolution; when it is not certain whether or not a symbol
152      *   expression can be an lvalue, assume it can be at this point.  By
153      *   default, we'll return false; operator nodes whose result can be
154      *   used as an lvalue should override this to return true.
155      */
check_lvalue()156     virtual int check_lvalue() const { return FALSE; }
157 
158     /*
159      *   Check to see if this expression is an valid lvalue, after
160      *   resolving symbols in the given scope.  Returns true if so, false
161      *   if not.
162      */
check_lvalue_resolved(class CTcPrsSymtab * symtab)163     virtual int check_lvalue_resolved(class CTcPrsSymtab *symtab) const
164         { return FALSE; }
165 
166     /*
167      *   Check to see if this expression can possibly be a valid address
168      *   value, so that the address-of ("&") operator can be applied.
169      *   Returns true if it is possible, false if not.  The only type of
170      *   expression whose address can be taken is a simple symbol.  The
171      *   address of a symbol can be taken only if the symbol is a function
172      *   or property name, but we won't know this at parse time, so we'll
173      *   indicate that any symbol is acceptable.  By default, this returns
174      *   false, since the address of most expressions cannot be taken.
175      */
has_addr()176     virtual int has_addr() const { return FALSE; }
177 
178     /*
179      *   Check to see if this expression is an address expression of some
180      *   kind (i.e., of class CTPNAddrBase, or of a class derived from
181      *   CTPNAddrBase).  Returns true if so, false if not.
182      */
is_addr()183     virtual int is_addr() const { return FALSE; }
184 
185     /*
186      *   Determine if this node is of type double-quoted string (dstring).
187      *   Returns true if so, false if not.  By default, we return false.
188      */
is_dstring()189     virtual int is_dstring() const { return FALSE; }
190 
191     /*
192      *   Determine if this is a simple assignment operator node.  Returns
193      *   true if so, false if not.  By default, we return false.
194      */
is_simple_asi()195     virtual int is_simple_asi() const { return FALSE; }
196 
197     /*
198      *   Determine if this node yields a value when evaluated.  Returns
199      *   true if so, false if not.  When it cannot be determined at
200      *   compile-time whether or not the node has a value (for example,
201      *   for a call to a pointer to a function whose return type is not
202      *   declared), this should indicate that a value is returned.
203      *
204      *   Most nodes yield a value when executed, so we'll return true by
205      *   default.
206      */
has_return_value()207     virtual int has_return_value() const { return TRUE; }
208 
209     /*
210      *   Determine if this node yields a return value when called as a
211      *   function.  We assume by default that it does.
212      */
has_return_value_on_call()213     virtual int has_return_value_on_call() const { return TRUE; }
214 
215     /*
216      *   Get the text of the symbol for this node, if any.  If the node is
217      *   not some kind of symbol node, this returns null.
218      */
get_sym_text()219     virtual const textchar_t *get_sym_text() const { return 0; }
get_sym_text_len()220     virtual size_t get_sym_text_len() const { return 0; }
221 
222     /*
223      *   Fold constant expressions, given a finished symbol table.  We do
224      *   most of our constant folding during the initial parsing, but some
225      *   constant folding must wait until the symbol table is finished; in
226      *   particular, we can't figure out what to do with symbols until we
227      *   know what the symbols mean.
228      *
229      *   For most nodes, this function should merely recurse into subnodes
230      *   and fold constants.  Nodes that are affected by symbol
231      *   resolution, directly or indirectly, should override this.
232      *
233      *   For example, a list can change from unknown to constant during
234      *   this operation.  If the list contains a symbol, the list will
235      *   initially be set to unknown, since the symbol could turn out to
236      *   be a property evaluation, which would be non-constant, or an
237      *   object name, which would be constant.
238      *
239      *   Returns the folded version of the node, or simply 'this' if no
240      *   folding takes place.
241      */
242     virtual class CTcPrsNode *fold_constants(class CTcPrsSymtab *symtab) = 0;
243 
244     /*
245      *   generate a constant value node for the address of this node;
246      *   returns null if the symbol has no address
247      */
fold_addr_const(class CTcPrsSymtab *)248     virtual class CTcPrsNode *fold_addr_const(class CTcPrsSymtab *)
249     {
250         /* by default, we have no address */
251         return 0;
252     }
253 
254     /*
255      *   Adjust the expression for use as a debugger expression.  Code
256      *   generation for debugger expressions is somewhat different than
257      *   for normal expressions; this routine should allocate a new node,
258      *   if necessary, for debugger use.  Returns the current node if no
259      *   changes are necessary, or a new node if changes are needed.
260      *
261      *   If 'speculative' is true, the expression is being evaluated
262      *   speculatively by the debugger.  This means that the user hasn't
263      *   explicitly asked for the expression to be evaluated, but rather
264      *   the debugger is making a guess that the expression might be of
265      *   interest to the user and is making an unsolicited attempt to
266      *   offer it to the user.  Because the debugger is only guessing that
267      *   the expression is interesting, the expression must not be
268      *   evaluated if it has any side effects at all.
269      */
270     virtual class CTcPrsNode *adjust_for_debug(const tcpn_debug_info *info);
271 };
272 
273 /* ------------------------------------------------------------------------ */
274 /*
275  *   Symbol Table Entry.  Each symbol has an entry in one of the symbol
276  *   tables:
277  *
278  *   - The global symbol table contains object, property, and built-in
279  *   functions from the default function set.
280  *
281  *   - Local symbol tables contain local variables and parameters.  Local
282  *   tables have block-level scope.
283  *
284  *   - Label symbol tables contain code labels (for "goto" statements).
285  *   Label tables have function-level or method-level scope.
286  */
287 
288 /*
289  *   Basic symbol table entry.  The target
290  */
291 class CTcSymbolBase: public CVmHashEntryCS
292 {
293 public:
CTcSymbolBase(const char * str,size_t len,int copy,tc_symtype_t typ)294     CTcSymbolBase(const char *str, size_t len, int copy, tc_symtype_t typ)
295         : CVmHashEntryCS(str, len, copy)
296     {
297         typ_ = typ;
298     }
299 
300     /* allocate symbol entries from the parser memory pool */
301     void *operator new(size_t siz);
302 
303     /* get the symbol type */
get_type()304     tc_symtype_t get_type() const { return typ_; }
305 
306     /* get the symbol text and length */
get_sym()307     const char *get_sym() const { return getstr(); }
get_sym_len()308     size_t get_sym_len() const { return getlen(); }
309 
310     /*
311      *   Generate a constant value node for this symbol, if possible;
312      *   returns null if the symbol does not evaluate to a compile-time
313      *   constant value.  An object name, for example, evaluates to a
314      *   compile-time constant equal to the object reference; a property
315      *   name, in contrast, is (when not qualified by another operator) an
316      *   invocation of the property, hence must be executed at run time,
317      *   hence is not a compile-time constant.
318      */
fold_constant()319     virtual class CTcPrsNode *fold_constant()
320     {
321         /* by default, a symbol's value is not a constant */
322         return 0;
323     }
324 
325     /*
326      *   generate a constant value node for the address of this symbol;
327      *   returns null if the symbol has no address
328      */
fold_addr_const()329     virtual class CTcPrsNode *fold_addr_const()
330     {
331         /* by default, a symbol has no address */
332         return 0;
333     }
334 
335     /* determine if this symbol can be used as an lvalue */
check_lvalue()336     virtual int check_lvalue() const { return FALSE; }
337 
338     /* determine if this symbol can have its address taken */
has_addr()339     virtual int has_addr() const { return FALSE; }
340 
341     /* determine if I have a return value when evaluated */
has_return_value_on_call()342     virtual int has_return_value_on_call() const { return TRUE; }
343 
344     /*
345      *   Write the symbol to a symbol export file.  By default, we'll
346      *   write the type and symbol name to the file.  Some subclasses
347      *   might wish to override this to write additional data, or to write
348      *   something different or nothing at all (for example, built-in
349      *   function symbols are not written to a symbol export file).
350      *
351      *   When a subclass does override this, it must write the type as a
352      *   UINT2 value as the first thing written to the file.  The generic
353      *   file reader switches on this type code to determine what to call
354      *   to load the entry, then calls the subclass-specific loader to do
355      *   the actual work.
356      *
357      *   Returns true if we wrote the symbol to the file, false if not.
358      *   (False doesn't indicate an error - it indicates that we chose not
359      *   to store the symbol because the symbol is not of a type that we
360      *   want to put in the export file.)
361      */
362     virtual int write_to_sym_file(class CVmFile *fp);
363 
364     /* write the symbol name (with a UINT2 length prefix) to a file */
365     int write_name_to_file(class CVmFile *fp);
366 
367     /*
368      *   Write the symbol to an object file.  By default, we'll write the
369      *   type and symbol name to the file.  Some subclasses might wish to
370      *   override this to write additional data, or to write something
371      *   different or nothing at all (for example, built-in function
372      *   symbols are not written to an object file).
373      *
374      *   When a subclass does override this, it must write the type as a
375      *   UINT2 value as the first thing written to the file.  The generic
376      *   file reader switches on this type code to determine what to call
377      *   to load the entry, then calls the subclass-specific loader to do
378      *   the actual work.
379      *
380      *   Returns true if we wrote the symbol to the file, false if not.
381      *   (False doesn't indicate an error - it indicates that we chose not
382      *   to store the symbol because the symbol is not of a type that we
383      *   want to put in the export file.)
384      */
385     virtual int write_to_obj_file(class CVmFile *fp);
386 
387     /*
388      *   Write the symbol's cross references to the object file.  This can
389      *   write references to other symbols by storing the other symbol's
390      *   index in the object file.  Most symbols don't have any cross
391      *   references, so this does nothing by default.
392      *
393      *   If this writes anything, the first thing written must be a UINT4
394      *   giving the object file index of this symbol.  On loading, we'll
395      *   read this and look up the loaded symbol.
396      */
write_refs_to_obj_file(class CVmFile *)397     virtual int write_refs_to_obj_file(class CVmFile *) { return FALSE; }
398 
399     /*
400      *   perform basic writing to a file - this performs common work that
401      *   can be used for object or symbol files
402      */
403     int write_to_file_gen(CVmFile *fp);
404 
405     /*
406      *   Read a symbol from a symbol file, returning the new symbol
407      */
408     static class CTcSymbol *read_from_sym_file(class CVmFile *fp);
409 
410     /*
411      *   Load a symbol from an object file.  Stores the symbol in the
412      *   global symbol table, and fills in the appropriate translation
413      *   mapping table when necessary.  Returns zero on success; logs
414      *   error messages and return non-zero on failure.
415      */
416     static int load_from_obj_file(class CVmFile *fp,
417                                   const textchar_t *fname,
418                                   tctarg_obj_id_t *obj_xlat,
419                                   tctarg_prop_id_t *prop_xlat,
420                                   ulong *enum_xlat);
421 
422     /*
423      *   Load references from the object file - reads the information that
424      *   write_refs_to_obj_file() wrote, except that the caller will have
425      *   read the first UINT4 giving the symbol's object file index before
426      *   calling this routine.
427      */
load_refs_from_obj_file(class CVmFile *,const textchar_t *,tctarg_obj_id_t *,tctarg_prop_id_t *)428     virtual void load_refs_from_obj_file(class CVmFile *,
429                                          const textchar_t * /*obj_fname*/,
430                                          tctarg_obj_id_t * /*obj_xlat*/,
431                                          tctarg_prop_id_t * /*prop_xlat*/)
432     {
433         /* by default, do nothing */
434     }
435 
436     /*
437      *   Log an object file loading conflict with this symbol.  The given
438      *   type is the new type found in the object file of the given name.
439      */
440     void log_objfile_conflict(const textchar_t *fname, tc_symtype_t new_type)
441         const;
442 
443     /*
444      *   Get a pointer to the head of the fixup list for this symbol.
445      *   Symbols such as functions that keep a list of fixups for
446      *   references to the symbol must override this to provide a fixup
447      *   list head; by default, symbols keep no fixup list, so we'll just
448      *   return null.
449      */
get_fixup_list_anchor()450     virtual struct CTcAbsFixup **get_fixup_list_anchor() { return 0; }
451 
452     /*
453      *   Set my code stream anchor object.  By default, symbols don't keep
454      *   track of any stream anchors.  Symbols that refer to code or data
455      *   stream locations directly must keep an anchor, since they must
456      *   keep track of their fixup list in order to fix up generated
457      *   references to the symbol.  This must be overridden by any
458      *   subclasses that keep anchors.
459      */
set_anchor(struct CTcStreamAnchor *)460     virtual void set_anchor(struct CTcStreamAnchor *) { }
461 
462     /*
463      *   Determine if this symbol is external and unresolved.  By default,
464      *   a symbol cannot be external at all, so this will return false.
465      *   Subclasses for symbol types that can be external should override
466      *   this to return true if the symbol is an unresolved external
467      *   reference.
468      */
is_unresolved_extern()469     virtual int is_unresolved_extern() const { return FALSE; }
470 
471     /*
472      *   Mark the symbol as referenced.  Some symbol types keep track of
473      *   whether they've been referenced or not; those types can override
474      *   this to keep track.  This method is called each time the symbol
475      *   is found in the symbol table via the find() or find_or_def()
476      *   methods.  By default, we do nothing.
477      */
mark_referenced()478     virtual void mark_referenced() { }
479 
480     /*
481      *   Apply internal fixups.  If the symbol keeps its own internal
482      *   fixup information, it can translate the fixups here.  By default,
483      *   this does nothing.
484      */
apply_internal_fixups()485     virtual void apply_internal_fixups() { }
486 
487     /*
488      *   Build dictionary entries for this symbol.  Most symbols do
489      *   nothing here; objects which can have associated vocabulary words
490      *   should insert their vocabulary into the dictionary.
491      */
build_dictionary()492     virtual void build_dictionary() { }
493 
494     /*
495      *   Create a new "context variable" version of this symbol for use in
496      *   an anonymous function.  This is only needed for symbols that can
497      *   exist in a local scope.
498      */
new_ctx_var()499     virtual class CTcSymbol *new_ctx_var() const { return 0; }
500 
501     /*
502      *   Apply context variable conversion.  If this symbol has not been
503      *   referenced, this should simply remove the symbol from the symbol
504      *   table.  Otherwise, this should apply the necessary conversions to
505      *   the original symbol from which this symbol was created to ensure
506      *   that the original and this symbol share a context variable slot.
507      *
508      *   Returns true if a conversion was performed (i.e., the symbol was
509      *   referenced), false if not.
510      */
apply_ctx_var_conv(class CTcPrsSymtab *,class CTPNCodeBody *)511     virtual int apply_ctx_var_conv(class CTcPrsSymtab *,
512                                    class CTPNCodeBody *)
513         { return FALSE; }
514 
515     /*
516      *   Finalize context variable conversion.  This should do nothing if
517      *   the variable hasn't already been notified that it's a context
518      *   variable (how this happens varies by symbol type - see locals in
519      *   particular).  This is called with the variable's own scope active
520      *   in the parser, so the final variable assignments for the symbol
521      *   can be made.
522      */
finish_ctx_var_conv()523     virtual void finish_ctx_var_conv() { }
524 
525     /*
526      *   Check for local references.  For variables that can exist in
527      *   local scope, such as locals, this will be called when all of the
528      *   code for the scope has been parsed; this should check to see if
529      *   the symbol has been referenced in the scope, and display an
530      *   appropriate warning message if not.
531      */
check_local_references()532     virtual void check_local_references() { }
533 
534     /*
535      *   Add an entry for this symbol to a "runtime symbol table," which is
536      *   a symbol table that we can pass to the interpreter.  This must be
537      *   overridden by each symbol type for each target architecture,
538      *   because the nature of the runtime symbol table varies by target
539      *   architecture.
540      *
541      *   By default, this does nothing.  Symbol types that don't need to
542      *   generate runtime symbol table entries don't need to override this.
543      */
add_runtime_symbol(class CVmRuntimeSymbols *)544     virtual void add_runtime_symbol(class CVmRuntimeSymbols *) { }
545 
546 protected:
547     /*
548      *   Base routine to read from a symbol file - reads the symbol name.
549      *   Returns a pointer to the symbol name (stored in tokenizer memory
550      *   that will remain valid throughout the compilation) on success; on
551      *   failure, logs an error and returns null.
552      */
553     static const char *base_read_from_sym_file(class CVmFile *fp);
554 
555     /* symbol type */
556     tc_symtype_t typ_;
557 };
558 
559 #endif /* TCPN_H */
560