1 /* $Header: d:/cvsroot/tads/tads3/TCPNBASE.H,v 1.3 1999/07/11 00:46:53 MJRoberts Exp $ */ 2 3 /* 4 * Copyright (c) 1999, 2002 Michael J. Roberts. All Rights Reserved. 5 * 6 * Please see the accompanying license file, LICENSE.TXT, for information 7 * on using and copying this software. 8 */ 9 /* 10 Name 11 tcpn.h - Parse Node - base class 12 Function 13 Defines the target-independent base class for parse nodes 14 Notes 15 All expression parse nodes are derived from the target-specific 16 subclass of this class. The target-independent base class is 17 CTcPrsNodeBase; the target-specific class is CTcPrsNode. 18 Modified 19 05/10/99 MJRoberts - Creation 20 */ 21 22 #ifndef TCPN_H 23 #define TCPN_H 24 25 #include "vmhash.h" 26 27 /* ------------------------------------------------------------------------ */ 28 /* 29 * Parse Tree Allocation Object. This is a base class that can be used 30 * for tree objects that are to be allocated from the parser node pool. 31 */ 32 class CTcPrsAllocObj 33 { 34 public: 35 /* 36 * Override operator new() - allocate all parse node objects out of 37 * the parse node pool. 38 */ 39 void *operator new(size_t siz); 40 }; 41 42 43 /* ------------------------------------------------------------------------ */ 44 /* 45 * adjust_for_debug() information structure 46 */ 47 struct tcpn_debug_info 48 { 49 /* true -> speculative evaluation mode */ 50 int speculative; 51 52 /* 53 * stack level - 0 is the active level, 1 is the first enclosing 54 * level, and so on 55 */ 56 int stack_level; 57 }; 58 59 /* ------------------------------------------------------------------------ */ 60 /* 61 * Parse Tree Expression Node - base class. As we parse an expression, 62 * we build a tree of these objects to describe the source code. 63 * 64 * This class is subclassed for each type of parsing node: each type of 65 * statement has a node type, some statements have helper node types for 66 * parts of statements, and each expression operator has a node type. 67 * These subclasses contain the information specific to the type of 68 * parsing construct represented. 69 * 70 * Each parsing subclass is then further subclassed for each target 71 * architecture. This final subclass contains the code generator for 72 * the node in the target architecture. 73 * 74 * The target-independent base version of each subclass is called 75 * CTPNXxxBase. The target-specific subclass derived from this base 76 * class is CTPNXxx. For example, the final subclass for constant 77 * nodes, which is derived from the target-independent base class 78 * CTPNConstBase, is CTPNConst. (Note that each target uses the same 79 * name for the final subclass, so we can only link one target 80 * architecture into a given build of the compiler. Each additional 81 * target requires a separate compiler executable with the appropriate 82 * CTPNConst classes linked in.) 83 */ 84 class CTcPrsNodeBase: public CTcPrsAllocObj 85 { 86 public: 87 /* 88 * Generate code for the expression for the target architecture. 89 * This method is defined only by the final target-specific 90 * subclasses. 91 * 92 * This method is used to generate code to evaluate the expression 93 * as an rvalue. 94 * 95 * If 'discard' is true, it indicates that any value yielded by the 96 * expression will not be used, in which case the generated code 97 * need not leave the result of the expression on the stack. We can 98 * generate code more efficiently for certain types of expressions 99 * when we know that we're evaluating them only for side effects. 100 * For example, an assignment expression has a result value, but 101 * this value need not be pushed onto the stack if it will simply be 102 * discarded. Also, an operator like "+" that has no side effects 103 * of its own can merely evaluate its operands for their side 104 * effects, but need not compute its own result if that result would 105 * simply be discarded. 106 * 107 * If 'for_condition' is true, it indicates that the result of the 108 * expression will be used directly for a conditional of some kind 109 * (for a "?:" operator, an "if" statement, a "while" statement, or 110 * the like). In some cases, we can avoid extra conversions to some 111 * values when they're going to be used directly for a comparison; 112 * for example, the "&&" operator must return a true/nil value, but 113 * the code generator may be able to avoid the extra conversion when 114 * the value will be used for an "if" statement's conditional value. 115 */ 116 virtual void gen_code(int discard, int for_condition) = 0; 117 118 /* 119 * Get the constant value of the parse node, if available. Most 120 * parse nodes have no constant value, so by default this returns 121 * null. Only constant parse nodes can provide a constant value, so 122 * they should override this. 123 */ get_const_val()124 virtual class CTcConstVal *get_const_val() { return 0; } 125 126 /* determine if the node has a constant value */ is_const()127 int is_const() { return get_const_val() != 0; } 128 129 /* determine if I have a given constant integer value */ is_const_int(int val)130 int is_const_int(int val) 131 { 132 return (is_const() 133 && get_const_val()->get_type() == TC_CVT_INT 134 && get_const_val()->get_val_int() == val); 135 } 136 137 /* 138 * Set the constant value of the parse node from that of another 139 * node. The caller must already have checked that this node and 140 * the value being assigned are both valid constant values. 141 */ set_const_val(class CTcPrsNode * src)142 void set_const_val(class CTcPrsNode *src) 143 { 144 /* set my constant value from the source's constant value */ 145 get_const_val()->set(((CTcPrsNodeBase *)src)->get_const_val()); 146 } 147 148 /* 149 * Check to see if this expression can possibly be a valid lvalue. 150 * Return true if so, false if not. This check is made before 151 * symbol resolution; when it is not certain whether or not a symbol 152 * expression can be an lvalue, assume it can be at this point. By 153 * default, we'll return false; operator nodes whose result can be 154 * used as an lvalue should override this to return true. 155 */ check_lvalue()156 virtual int check_lvalue() const { return FALSE; } 157 158 /* 159 * Check to see if this expression is an valid lvalue, after 160 * resolving symbols in the given scope. Returns true if so, false 161 * if not. 162 */ check_lvalue_resolved(class CTcPrsSymtab * symtab)163 virtual int check_lvalue_resolved(class CTcPrsSymtab *symtab) const 164 { return FALSE; } 165 166 /* 167 * Check to see if this expression can possibly be a valid address 168 * value, so that the address-of ("&") operator can be applied. 169 * Returns true if it is possible, false if not. The only type of 170 * expression whose address can be taken is a simple symbol. The 171 * address of a symbol can be taken only if the symbol is a function 172 * or property name, but we won't know this at parse time, so we'll 173 * indicate that any symbol is acceptable. By default, this returns 174 * false, since the address of most expressions cannot be taken. 175 */ has_addr()176 virtual int has_addr() const { return FALSE; } 177 178 /* 179 * Check to see if this expression is an address expression of some 180 * kind (i.e., of class CTPNAddrBase, or of a class derived from 181 * CTPNAddrBase). Returns true if so, false if not. 182 */ is_addr()183 virtual int is_addr() const { return FALSE; } 184 185 /* 186 * Determine if this node is of type double-quoted string (dstring). 187 * Returns true if so, false if not. By default, we return false. 188 */ is_dstring()189 virtual int is_dstring() const { return FALSE; } 190 191 /* 192 * Determine if this is a simple assignment operator node. Returns 193 * true if so, false if not. By default, we return false. 194 */ is_simple_asi()195 virtual int is_simple_asi() const { return FALSE; } 196 197 /* 198 * Determine if this node yields a value when evaluated. Returns 199 * true if so, false if not. When it cannot be determined at 200 * compile-time whether or not the node has a value (for example, 201 * for a call to a pointer to a function whose return type is not 202 * declared), this should indicate that a value is returned. 203 * 204 * Most nodes yield a value when executed, so we'll return true by 205 * default. 206 */ has_return_value()207 virtual int has_return_value() const { return TRUE; } 208 209 /* 210 * Determine if this node yields a return value when called as a 211 * function. We assume by default that it does. 212 */ has_return_value_on_call()213 virtual int has_return_value_on_call() const { return TRUE; } 214 215 /* 216 * Get the text of the symbol for this node, if any. If the node is 217 * not some kind of symbol node, this returns null. 218 */ get_sym_text()219 virtual const textchar_t *get_sym_text() const { return 0; } get_sym_text_len()220 virtual size_t get_sym_text_len() const { return 0; } 221 222 /* 223 * Fold constant expressions, given a finished symbol table. We do 224 * most of our constant folding during the initial parsing, but some 225 * constant folding must wait until the symbol table is finished; in 226 * particular, we can't figure out what to do with symbols until we 227 * know what the symbols mean. 228 * 229 * For most nodes, this function should merely recurse into subnodes 230 * and fold constants. Nodes that are affected by symbol 231 * resolution, directly or indirectly, should override this. 232 * 233 * For example, a list can change from unknown to constant during 234 * this operation. If the list contains a symbol, the list will 235 * initially be set to unknown, since the symbol could turn out to 236 * be a property evaluation, which would be non-constant, or an 237 * object name, which would be constant. 238 * 239 * Returns the folded version of the node, or simply 'this' if no 240 * folding takes place. 241 */ 242 virtual class CTcPrsNode *fold_constants(class CTcPrsSymtab *symtab) = 0; 243 244 /* 245 * generate a constant value node for the address of this node; 246 * returns null if the symbol has no address 247 */ fold_addr_const(class CTcPrsSymtab *)248 virtual class CTcPrsNode *fold_addr_const(class CTcPrsSymtab *) 249 { 250 /* by default, we have no address */ 251 return 0; 252 } 253 254 /* 255 * Adjust the expression for use as a debugger expression. Code 256 * generation for debugger expressions is somewhat different than 257 * for normal expressions; this routine should allocate a new node, 258 * if necessary, for debugger use. Returns the current node if no 259 * changes are necessary, or a new node if changes are needed. 260 * 261 * If 'speculative' is true, the expression is being evaluated 262 * speculatively by the debugger. This means that the user hasn't 263 * explicitly asked for the expression to be evaluated, but rather 264 * the debugger is making a guess that the expression might be of 265 * interest to the user and is making an unsolicited attempt to 266 * offer it to the user. Because the debugger is only guessing that 267 * the expression is interesting, the expression must not be 268 * evaluated if it has any side effects at all. 269 */ 270 virtual class CTcPrsNode *adjust_for_debug(const tcpn_debug_info *info); 271 }; 272 273 /* ------------------------------------------------------------------------ */ 274 /* 275 * Symbol Table Entry. Each symbol has an entry in one of the symbol 276 * tables: 277 * 278 * - The global symbol table contains object, property, and built-in 279 * functions from the default function set. 280 * 281 * - Local symbol tables contain local variables and parameters. Local 282 * tables have block-level scope. 283 * 284 * - Label symbol tables contain code labels (for "goto" statements). 285 * Label tables have function-level or method-level scope. 286 */ 287 288 /* 289 * Basic symbol table entry. The target 290 */ 291 class CTcSymbolBase: public CVmHashEntryCS 292 { 293 public: CTcSymbolBase(const char * str,size_t len,int copy,tc_symtype_t typ)294 CTcSymbolBase(const char *str, size_t len, int copy, tc_symtype_t typ) 295 : CVmHashEntryCS(str, len, copy) 296 { 297 typ_ = typ; 298 } 299 300 /* allocate symbol entries from the parser memory pool */ 301 void *operator new(size_t siz); 302 303 /* get the symbol type */ get_type()304 tc_symtype_t get_type() const { return typ_; } 305 306 /* get the symbol text and length */ get_sym()307 const char *get_sym() const { return getstr(); } get_sym_len()308 size_t get_sym_len() const { return getlen(); } 309 310 /* 311 * Generate a constant value node for this symbol, if possible; 312 * returns null if the symbol does not evaluate to a compile-time 313 * constant value. An object name, for example, evaluates to a 314 * compile-time constant equal to the object reference; a property 315 * name, in contrast, is (when not qualified by another operator) an 316 * invocation of the property, hence must be executed at run time, 317 * hence is not a compile-time constant. 318 */ fold_constant()319 virtual class CTcPrsNode *fold_constant() 320 { 321 /* by default, a symbol's value is not a constant */ 322 return 0; 323 } 324 325 /* 326 * generate a constant value node for the address of this symbol; 327 * returns null if the symbol has no address 328 */ fold_addr_const()329 virtual class CTcPrsNode *fold_addr_const() 330 { 331 /* by default, a symbol has no address */ 332 return 0; 333 } 334 335 /* determine if this symbol can be used as an lvalue */ check_lvalue()336 virtual int check_lvalue() const { return FALSE; } 337 338 /* determine if this symbol can have its address taken */ has_addr()339 virtual int has_addr() const { return FALSE; } 340 341 /* determine if I have a return value when evaluated */ has_return_value_on_call()342 virtual int has_return_value_on_call() const { return TRUE; } 343 344 /* 345 * Write the symbol to a symbol export file. By default, we'll 346 * write the type and symbol name to the file. Some subclasses 347 * might wish to override this to write additional data, or to write 348 * something different or nothing at all (for example, built-in 349 * function symbols are not written to a symbol export file). 350 * 351 * When a subclass does override this, it must write the type as a 352 * UINT2 value as the first thing written to the file. The generic 353 * file reader switches on this type code to determine what to call 354 * to load the entry, then calls the subclass-specific loader to do 355 * the actual work. 356 * 357 * Returns true if we wrote the symbol to the file, false if not. 358 * (False doesn't indicate an error - it indicates that we chose not 359 * to store the symbol because the symbol is not of a type that we 360 * want to put in the export file.) 361 */ 362 virtual int write_to_sym_file(class CVmFile *fp); 363 364 /* write the symbol name (with a UINT2 length prefix) to a file */ 365 int write_name_to_file(class CVmFile *fp); 366 367 /* 368 * Write the symbol to an object file. By default, we'll write the 369 * type and symbol name to the file. Some subclasses might wish to 370 * override this to write additional data, or to write something 371 * different or nothing at all (for example, built-in function 372 * symbols are not written to an object file). 373 * 374 * When a subclass does override this, it must write the type as a 375 * UINT2 value as the first thing written to the file. The generic 376 * file reader switches on this type code to determine what to call 377 * to load the entry, then calls the subclass-specific loader to do 378 * the actual work. 379 * 380 * Returns true if we wrote the symbol to the file, false if not. 381 * (False doesn't indicate an error - it indicates that we chose not 382 * to store the symbol because the symbol is not of a type that we 383 * want to put in the export file.) 384 */ 385 virtual int write_to_obj_file(class CVmFile *fp); 386 387 /* 388 * Write the symbol's cross references to the object file. This can 389 * write references to other symbols by storing the other symbol's 390 * index in the object file. Most symbols don't have any cross 391 * references, so this does nothing by default. 392 * 393 * If this writes anything, the first thing written must be a UINT4 394 * giving the object file index of this symbol. On loading, we'll 395 * read this and look up the loaded symbol. 396 */ write_refs_to_obj_file(class CVmFile *)397 virtual int write_refs_to_obj_file(class CVmFile *) { return FALSE; } 398 399 /* 400 * perform basic writing to a file - this performs common work that 401 * can be used for object or symbol files 402 */ 403 int write_to_file_gen(CVmFile *fp); 404 405 /* 406 * Read a symbol from a symbol file, returning the new symbol 407 */ 408 static class CTcSymbol *read_from_sym_file(class CVmFile *fp); 409 410 /* 411 * Load a symbol from an object file. Stores the symbol in the 412 * global symbol table, and fills in the appropriate translation 413 * mapping table when necessary. Returns zero on success; logs 414 * error messages and return non-zero on failure. 415 */ 416 static int load_from_obj_file(class CVmFile *fp, 417 const textchar_t *fname, 418 tctarg_obj_id_t *obj_xlat, 419 tctarg_prop_id_t *prop_xlat, 420 ulong *enum_xlat); 421 422 /* 423 * Load references from the object file - reads the information that 424 * write_refs_to_obj_file() wrote, except that the caller will have 425 * read the first UINT4 giving the symbol's object file index before 426 * calling this routine. 427 */ load_refs_from_obj_file(class CVmFile *,const textchar_t *,tctarg_obj_id_t *,tctarg_prop_id_t *)428 virtual void load_refs_from_obj_file(class CVmFile *, 429 const textchar_t * /*obj_fname*/, 430 tctarg_obj_id_t * /*obj_xlat*/, 431 tctarg_prop_id_t * /*prop_xlat*/) 432 { 433 /* by default, do nothing */ 434 } 435 436 /* 437 * Log an object file loading conflict with this symbol. The given 438 * type is the new type found in the object file of the given name. 439 */ 440 void log_objfile_conflict(const textchar_t *fname, tc_symtype_t new_type) 441 const; 442 443 /* 444 * Get a pointer to the head of the fixup list for this symbol. 445 * Symbols such as functions that keep a list of fixups for 446 * references to the symbol must override this to provide a fixup 447 * list head; by default, symbols keep no fixup list, so we'll just 448 * return null. 449 */ get_fixup_list_anchor()450 virtual struct CTcAbsFixup **get_fixup_list_anchor() { return 0; } 451 452 /* 453 * Set my code stream anchor object. By default, symbols don't keep 454 * track of any stream anchors. Symbols that refer to code or data 455 * stream locations directly must keep an anchor, since they must 456 * keep track of their fixup list in order to fix up generated 457 * references to the symbol. This must be overridden by any 458 * subclasses that keep anchors. 459 */ set_anchor(struct CTcStreamAnchor *)460 virtual void set_anchor(struct CTcStreamAnchor *) { } 461 462 /* 463 * Determine if this symbol is external and unresolved. By default, 464 * a symbol cannot be external at all, so this will return false. 465 * Subclasses for symbol types that can be external should override 466 * this to return true if the symbol is an unresolved external 467 * reference. 468 */ is_unresolved_extern()469 virtual int is_unresolved_extern() const { return FALSE; } 470 471 /* 472 * Mark the symbol as referenced. Some symbol types keep track of 473 * whether they've been referenced or not; those types can override 474 * this to keep track. This method is called each time the symbol 475 * is found in the symbol table via the find() or find_or_def() 476 * methods. By default, we do nothing. 477 */ mark_referenced()478 virtual void mark_referenced() { } 479 480 /* 481 * Apply internal fixups. If the symbol keeps its own internal 482 * fixup information, it can translate the fixups here. By default, 483 * this does nothing. 484 */ apply_internal_fixups()485 virtual void apply_internal_fixups() { } 486 487 /* 488 * Build dictionary entries for this symbol. Most symbols do 489 * nothing here; objects which can have associated vocabulary words 490 * should insert their vocabulary into the dictionary. 491 */ build_dictionary()492 virtual void build_dictionary() { } 493 494 /* 495 * Create a new "context variable" version of this symbol for use in 496 * an anonymous function. This is only needed for symbols that can 497 * exist in a local scope. 498 */ new_ctx_var()499 virtual class CTcSymbol *new_ctx_var() const { return 0; } 500 501 /* 502 * Apply context variable conversion. If this symbol has not been 503 * referenced, this should simply remove the symbol from the symbol 504 * table. Otherwise, this should apply the necessary conversions to 505 * the original symbol from which this symbol was created to ensure 506 * that the original and this symbol share a context variable slot. 507 * 508 * Returns true if a conversion was performed (i.e., the symbol was 509 * referenced), false if not. 510 */ apply_ctx_var_conv(class CTcPrsSymtab *,class CTPNCodeBody *)511 virtual int apply_ctx_var_conv(class CTcPrsSymtab *, 512 class CTPNCodeBody *) 513 { return FALSE; } 514 515 /* 516 * Finalize context variable conversion. This should do nothing if 517 * the variable hasn't already been notified that it's a context 518 * variable (how this happens varies by symbol type - see locals in 519 * particular). This is called with the variable's own scope active 520 * in the parser, so the final variable assignments for the symbol 521 * can be made. 522 */ finish_ctx_var_conv()523 virtual void finish_ctx_var_conv() { } 524 525 /* 526 * Check for local references. For variables that can exist in 527 * local scope, such as locals, this will be called when all of the 528 * code for the scope has been parsed; this should check to see if 529 * the symbol has been referenced in the scope, and display an 530 * appropriate warning message if not. 531 */ check_local_references()532 virtual void check_local_references() { } 533 534 /* 535 * Add an entry for this symbol to a "runtime symbol table," which is 536 * a symbol table that we can pass to the interpreter. This must be 537 * overridden by each symbol type for each target architecture, 538 * because the nature of the runtime symbol table varies by target 539 * architecture. 540 * 541 * By default, this does nothing. Symbol types that don't need to 542 * generate runtime symbol table entries don't need to override this. 543 */ add_runtime_symbol(class CVmRuntimeSymbols *)544 virtual void add_runtime_symbol(class CVmRuntimeSymbols *) { } 545 546 protected: 547 /* 548 * Base routine to read from a symbol file - reads the symbol name. 549 * Returns a pointer to the symbol name (stored in tokenizer memory 550 * that will remain valid throughout the compilation) on success; on 551 * failure, logs an error and returns null. 552 */ 553 static const char *base_read_from_sym_file(class CVmFile *fp); 554 555 /* symbol type */ 556 tc_symtype_t typ_; 557 }; 558 559 #endif /* TCPN_H */ 560