1 #ifdef RCSID
2 static char RCSid[] =
3 "$Header: d:/cvsroot/tads/tads3/TCPRSIMG.CPP,v 1.1 1999/07/11 00:46:53 MJRoberts Exp $";
4 #endif
5
6 /*
7 * Copyright (c) 1999, 2002 Michael J. Roberts. All Rights Reserved.
8 *
9 * Please see the accompanying license file, LICENSE.TXT, for information
10 * on using and copying this software.
11 */
12 /*
13 Name
14 tcprsimg.cpp - TADS 3 Compiler Parser - image writing functions
15 Function
16
17 Notes
18
19 Modified
20 04/30/99 MJRoberts - Creation
21 */
22
23 #include <stdlib.h>
24 #include <string.h>
25 #include <stdio.h>
26 #include <assert.h>
27
28 #include "os.h"
29 #include "t3std.h"
30 #include "tcprs.h"
31 #include "tctarg.h"
32 #include "tcgen.h"
33 #include "vmhash.h"
34 #include "tcmain.h"
35 #include "vmfile.h"
36 #include "tctok.h"
37
38
39 /* ------------------------------------------------------------------------ */
40 /*
41 * Read an object file and load the global symbol table
42 */
load_object_file(class CVmFile * fp,const textchar_t * fname,tctarg_obj_id_t * obj_xlat,tctarg_prop_id_t * prop_xlat,ulong * enum_xlat)43 int CTcParser::load_object_file(class CVmFile *fp, const textchar_t *fname,
44 tctarg_obj_id_t *obj_xlat,
45 tctarg_prop_id_t *prop_xlat,
46 ulong *enum_xlat)
47 {
48 ulong sym_cnt;
49 ulong dict_cnt;
50 ulong i;
51 ulong anon_cnt;
52 ulong nonsym_cnt;
53 ulong prod_cnt;
54 ulong exp_cnt;
55
56 /* read the number of symbol index entries */
57 sym_cnt = (long)fp->read_int4();
58 if (sym_cnt != 0)
59 {
60 /* allocate space for the symbol index list */
61 obj_sym_list_ = (CTcSymbol **)
62 t3malloc(sym_cnt * sizeof(obj_sym_list_[0]));
63
64 /* the list is empty so far */
65 obj_file_sym_idx_ = 0;
66 }
67
68 /* read the number of dictionary symbols */
69 dict_cnt = (ulong)fp->read_int4();
70
71 /* if there are any symbols, read them */
72 if (dict_cnt != 0)
73 {
74 /* allocate space for the dictionary index list */
75 obj_dict_list_ = (CTcDictEntry **)
76 t3malloc(dict_cnt * sizeof(obj_dict_list_[0]));
77
78 /* nothing in the list yet */
79 obj_file_dict_idx_ = 0;
80 }
81
82 /* read the number of symbols in the file */
83 sym_cnt = (ulong)fp->read_int4();
84
85 /* read the symbols */
86 for (i = 0 ; i < sym_cnt ; ++i)
87 {
88 /* load a symbol */
89 if (CTcSymbol::load_from_obj_file(fp, fname,
90 obj_xlat, prop_xlat, enum_xlat))
91 return 1;
92 }
93
94 /* read the number of anonymous object symbols */
95 anon_cnt = (ulong)fp->read_int4();
96
97 /* read the anonymous object symbols */
98 for (i = 0 ; i < anon_cnt ; ++i)
99 {
100 /* load the next anonymous object symbol */
101 if (CTcSymObj::load_from_obj_file(fp, fname, obj_xlat, TRUE))
102 return 1;
103 }
104
105 /* read the non-symbol object ID's */
106 nonsym_cnt = (ulong)fp->read_int4();
107 for (i = 0 ; i < nonsym_cnt ; ++i)
108 {
109 tctarg_obj_id_t id;
110
111 /* read the next non-symbol object ID */
112 id = (tctarg_obj_id_t)fp->read_int4();
113
114 /*
115 * allocate a new ID for the object, and set the translation
116 * table for the new ID - this will ensure that references to
117 * this non-symbol object are properly fixed up
118 */
119 obj_xlat[id] = G_cg->new_obj_id();
120 }
121
122 /* read the number of symbol cross-reference sections in the file */
123 sym_cnt = (ulong)fp->read_int4();
124
125 /* read the symbol cross-references */
126 for (i = 0 ; i < sym_cnt ; ++i)
127 {
128 ulong idx;
129 CTcSymbol *sym;
130
131 /* read the symbol index */
132 idx = (ulong)fp->read_int4();
133
134 /* get the symbol from the index list */
135 sym = get_objfile_sym(idx);
136
137 /* load the symbol's reference information */
138 sym->load_refs_from_obj_file(fp, fname, obj_xlat, prop_xlat);
139 }
140
141 /* read the number of anonymous object cross-references */
142 anon_cnt = (ulong)fp->read_int4();
143
144 /* read the anonymous object cross-references */
145 for (i = 0 ; i < anon_cnt ; ++i)
146 {
147 ulong idx;
148 CTcSymbol *sym;
149
150 /* read the symbol index */
151 idx = (ulong)fp->read_int4();
152
153 /* get the symbol from the index list */
154 sym = get_objfile_sym(idx);
155
156 /* load the symbol's reference information */
157 sym->load_refs_from_obj_file(fp, fname, obj_xlat, prop_xlat);
158 }
159
160 /* read the master grammar rule count */
161 prod_cnt = (ulong)fp->read_int4();
162
163 /* read the master grammar rule list */
164 for (i = 0 ; i < prod_cnt ; ++i)
165 {
166 /* read the next grammar production */
167 CTcGramProdEntry::load_from_obj_file(fp, prop_xlat, enum_xlat, 0);
168 }
169
170 /* read the number of named grammar rules */
171 prod_cnt = (ulong)fp->read_int4();
172
173 /* read the private grammar rules */
174 for (i = 0 ; i < prod_cnt ; ++i)
175 {
176 CTcSymObj *match_sym;
177
178 /* read the match object defining the rule */
179 match_sym = get_objfile_objsym(fp->read_int4());
180
181 /* read the private rule list */
182 CTcGramProdEntry::load_from_obj_file(
183 fp, prop_xlat, enum_xlat, match_sym);
184 }
185
186 /* read the export symbol list */
187 exp_cnt = (ulong)fp->read_int4();
188 for (i = 0 ; i < exp_cnt ; ++i)
189 {
190 CTcPrsExport *exp;
191
192 /* read the next entry */
193 exp = CTcPrsExport::read_from_obj_file(fp);
194
195 /* if that failed, the whole load fails */
196 if (exp == 0)
197 return 1;
198
199 /* add it to our list */
200 add_export_to_list(exp);
201 }
202
203 /* done with the symbol index list - free it */
204 if (obj_sym_list_ != 0)
205 {
206 /* free it and forget it */
207 t3free(obj_sym_list_);
208 obj_sym_list_ = 0;
209 }
210
211 /* done with the dictionary index list - free it */
212 if (obj_dict_list_ != 0)
213 {
214 /* free the memory and forget it */
215 t3free(obj_dict_list_);
216 obj_dict_list_ = 0;
217 }
218
219 /* success */
220 return 0;
221 }
222
223
224 /* ------------------------------------------------------------------------ */
225 /*
226 * Generate code and write the image file
227 */
build_image(class CVmFile * image_fp,uchar xor_mask,const char tool_data[4])228 void CTPNStmProg::build_image(class CVmFile *image_fp, uchar xor_mask,
229 const char tool_data[4])
230 {
231 /* generate code */
232 if (gen_code_for_build())
233 return;
234
235 /* scan the symbol table for unresolved external references */
236 if (G_prs->check_unresolved_externs())
237 return;
238
239 /*
240 * Finally, our task of constructing the program is complete. All
241 * that remains is to write the image file. Tell the code generator
242 * to begin the process.
243 */
244 G_cg->write_to_image(image_fp, xor_mask, tool_data);
245 }
246
247 /* ------------------------------------------------------------------------ */
248 /*
249 * Generate code and write the object file
250 */
build_object_file(class CVmFile * object_fp,class CTcMake * make_obj)251 void CTPNStmProg::build_object_file(class CVmFile *object_fp,
252 class CTcMake *make_obj)
253 {
254 /* generate code */
255 if (gen_code_for_build())
256 return;
257
258 /*
259 * Finally, our task of constructing the program is complete. All
260 * that remains is to write the image file. Tell the code generator
261 * to begin the process.
262 */
263 G_cg->write_to_object_file(object_fp, make_obj);
264 }
265
266 /* ------------------------------------------------------------------------ */
267 /*
268 * Generate code for a build, in preparation for writing an image file
269 * or an object file.
270 */
gen_code_for_build()271 int CTPNStmProg::gen_code_for_build()
272 {
273 /* notify the tokenizer that parsing is done */
274 G_tok->parsing_done();
275
276 /* notify the code generator that we're finished parsing */
277 G_cg->parsing_done();
278
279 /* set the global symbol table in the code streams */
280 G_cs_main->set_symtab(G_prs->get_global_symtab());
281 G_cs_static->set_symtab(G_prs->get_global_symtab());
282
283 /* generate code for the entire program */
284 gen_code(TRUE, TRUE);
285
286 /*
287 * if we encountered any errors generating code, don't bother
288 * writing the image
289 */
290 if (G_tcmain->get_error_count() != 0)
291 return 1;
292
293 /* return success */
294 return 0;
295 }
296
297 /* ------------------------------------------------------------------------ */
298 /*
299 * Check for unresolved external symbols. Logs an error for each
300 * unresolved external.
301 */
check_unresolved_externs()302 int CTcParser::check_unresolved_externs()
303 {
304 int errcnt;
305
306 /* note the previous error count */
307 errcnt = G_tcmain->get_error_count();
308
309 /* enumerate the entries with our unresolved check callback */
310 get_global_symtab()->enum_entries(&enum_sym_extref, this);
311
312 /*
313 * if the error count increased, we logged errors for unresolved
314 * symbols
315 */
316 return (G_tcmain->get_error_count() > errcnt);
317 }
318
319 /*
320 * Enumeration callback - check for unresolved external references. For
321 * each object or function still marked "external," we'll log an error.
322 */
enum_sym_extref(void *,CTcSymbol * sym)323 void CTcParser::enum_sym_extref(void *, CTcSymbol *sym)
324 {
325 /* if it's an external symbol, log an error */
326 if (sym->is_unresolved_extern())
327 G_tcmain->log_error(0, 0, TC_SEV_ERROR, TCERR_UNRESOLVED_EXTERN,
328 (int)sym->get_sym_len(), sym->get_sym());
329 }
330
331
332 /* ------------------------------------------------------------------------ */
333 /*
334 * Build dictionaries. We go through all objects and insert their
335 * vocabulary words into their dictionaries.
336 */
build_dictionaries()337 void CTcParser::build_dictionaries()
338 {
339 CTcDictEntry *dict;
340 CTcSymObj *sym;
341
342 /*
343 * enumerate our symbols to insert dictionary words - this will
344 * populate each dictionary's hash table with a complete list of the
345 * words and object associations for the dictionary
346 */
347 get_global_symtab()->enum_entries(&enum_sym_dict, this);
348
349 /* do the same for the anonymous objects */
350 for (sym = anon_obj_head_ ; sym != 0 ; sym = (CTcSymObj *)sym->nxt_)
351 sym->build_dictionary();
352
353 /* generate the object stream for each dictionary */
354 for (dict = dict_head_ ; dict != 0 ; dict = dict->get_next())
355 {
356 /* generate the code (static data, actually) for this dictionary */
357 G_cg->gen_code_for_dict(dict);
358 }
359 }
360
361 /*
362 * enumeration callback - build dictionaries
363 */
enum_sym_dict(void *,CTcSymbol * sym)364 void CTcParser::enum_sym_dict(void *, CTcSymbol *sym)
365 {
366 /* tell this symbol to build its dictionary entries */
367 sym->build_dictionary();
368 }
369
370 /* ------------------------------------------------------------------------ */
371 /*
372 * Build grammar productions
373 */
build_grammar_productions()374 void CTcParser::build_grammar_productions()
375 {
376 CTcGramProdEntry *entry;
377
378 /*
379 * First, run through the symbol table and merge all of the private
380 * grammar rules into the master grammar rule list. Since we've
381 * finished linking, we've already applied all modify/replace
382 * overrides, hence each symbol table entry referring to an object
383 * will contain its final private grammar rule list. So, we can
384 * safely merge the private lists into the master lists at this point,
385 * since no more modifications to private lists are possible.
386 */
387 get_global_symtab()->enum_entries(&build_grammar_cb, this);
388
389 /*
390 * iterate over the master list of productions and generate the image
391 * data for each one
392 */
393 for (entry = gramprod_head_ ; entry != 0 ; entry = entry->get_next())
394 {
395 /* build this entry */
396 G_cg->gen_code_for_gramprod(entry);
397 }
398 }
399
400 /*
401 * Symbol table enumeration callback - merge match object private grammar
402 * rules into the master grammar rule list.
403 */
build_grammar_cb(void *,CTcSymbol * sym)404 void CTcParser::build_grammar_cb(void *, CTcSymbol *sym)
405 {
406 /* if this is an object, merge its private grammar list */
407 if (sym->get_type() == TC_SYM_OBJ)
408 ((CTcSymObj *)sym)->merge_grammar_entry();
409 }
410
411 /* ------------------------------------------------------------------------ */
412 /*
413 * Apply self-reference object ID fixups. This traverses the symbol
414 * table and applies each object's list of fixups. This can be called
415 * once after loading all object files.
416 */
apply_internal_fixups()417 void CTcParser::apply_internal_fixups()
418 {
419 CTcSymObj *anon_obj;
420
421 /* enumerate the entries with our callback */
422 get_global_symtab()->enum_entries(&enum_sym_internal_fixup, this);
423
424 /* apply internal fixups to our anonymous objects */
425 for (anon_obj = anon_obj_head_ ; anon_obj != 0 ;
426 anon_obj = (CTcSymObj *)anon_obj->nxt_)
427 {
428 /* apply internal fixups to this symbol */
429 anon_obj->apply_internal_fixups();
430 }
431 }
432
433 /*
434 * Enumeration callback - apply internal ID fixups
435 */
enum_sym_internal_fixup(void *,CTcSymbol * sym)436 void CTcParser::enum_sym_internal_fixup(void *, CTcSymbol *sym)
437 {
438 /* apply its self-reference fixups */
439 sym->apply_internal_fixups();
440 }
441
442 /* ------------------------------------------------------------------------ */
443 /*
444 * Basic symbol class - image/object file functions
445 */
446
447 /*
448 * Read a symbol from an object file
449 */
load_from_obj_file(CVmFile * fp,const textchar_t * fname,tctarg_obj_id_t * obj_xlat,tctarg_prop_id_t * prop_xlat,ulong * enum_xlat)450 int CTcSymbolBase::load_from_obj_file(CVmFile *fp, const textchar_t *fname,
451 tctarg_obj_id_t *obj_xlat,
452 tctarg_prop_id_t *prop_xlat,
453 ulong *enum_xlat)
454 {
455 tc_symtype_t typ;
456
457 /*
458 * read the type - this is the one thing we know is always present
459 * for every symbol (the rest of the data might vary per subclass)
460 */
461 typ = (tc_symtype_t)fp->read_uint2();
462
463 /* create the object based on the type */
464 switch(typ)
465 {
466 case TC_SYM_FUNC:
467 return CTcSymFunc::load_from_obj_file(fp, fname);
468
469 case TC_SYM_OBJ:
470 return CTcSymObj::load_from_obj_file(fp, fname, obj_xlat, FALSE);
471
472 case TC_SYM_PROP:
473 return CTcSymProp::load_from_obj_file(fp, fname, prop_xlat);
474
475 case TC_SYM_ENUM:
476 return CTcSymEnum::load_from_obj_file(fp, fname, enum_xlat);
477
478 case TC_SYM_BIF:
479 return CTcSymBif::load_from_obj_file(fp, fname);
480
481 case TC_SYM_METACLASS:
482 return CTcSymMetaclass::load_from_obj_file(fp, fname, obj_xlat);
483
484 default:
485 /* other types should not be in an object file */
486 G_tcmain->log_error(0, 0, TC_SEV_ERROR, TCERR_OBJFILE_INV_TYPE);
487
488 /* return an error indication */
489 return 1;
490 }
491 }
492
493 /*
494 * Log a conflict with another symbol from an object file
495 */
log_objfile_conflict(const textchar_t * fname,tc_symtype_t new_type) const496 void CTcSymbolBase::log_objfile_conflict(const textchar_t *fname,
497 tc_symtype_t new_type) const
498 {
499 static const textchar_t *type_name[] =
500 {
501 "unknown", "function", "object", "property", "local",
502 "parameter", "intrinsic function", "native function", "code label",
503 "intrinsic class", "enum"
504 };
505
506 /*
507 * if the types differ, log an error indicating the different types;
508 * otherwise, simply log an error indicating the redefinition
509 */
510 if (new_type != get_type())
511 {
512 /* the types differ - show the two types */
513 G_tcmain->log_error(0, 0, TC_SEV_ERROR, TCERR_OBJFILE_REDEF_SYM_TYPE,
514 (int)get_sym_len(), get_sym(),
515 type_name[get_type()], type_name[new_type],
516 fname);
517 }
518 else
519 {
520 /* the types are the same */
521 G_tcmain->log_error(0, 0, TC_SEV_ERROR, TCERR_OBJFILE_REDEF_SYM,
522 (int)get_sym_len(), get_sym(),
523 type_name[new_type], fname);
524 }
525 }
526
527
528 /* ------------------------------------------------------------------------ */
529 /*
530 * Function Symbol subclass - image/object file functions
531 */
532
533 /*
534 * Load from an object file
535 */
load_from_obj_file(CVmFile * fp,const textchar_t * fname)536 int CTcSymFuncBase::load_from_obj_file(CVmFile *fp,
537 const textchar_t *fname)
538 {
539 const char *txt;
540 size_t len;
541 char buf[9];
542 int is_extern;
543 int ext_replace;
544 int ext_modify;
545 int has_retval;
546 int varargs;
547 int argc;
548 int mod_base_cnt;
549 CTcSymFunc *sym;
550
551 /* read the symbol name information */
552 txt = base_read_from_sym_file(fp);
553 len = strlen(txt);
554
555 /* read our extra data */
556 fp->read_bytes(buf, 9);
557 argc = osrp2(buf);
558 varargs = buf[2];
559 has_retval = buf[3];
560 is_extern = buf[4];
561 ext_replace = buf[5];
562 ext_modify = buf[6];
563 mod_base_cnt = osrp2(buf + 7);
564
565 /* look up any existing symbol */
566 sym = (CTcSymFunc *)G_prs->get_global_symtab()->find(txt, len);
567
568 /*
569 * If this symbol is already defined, make sure the original
570 * definition is a function, and make sure that it's only defined
571 * (not referenced as external) once. If it's not defined, define
572 * it anew.
573 */
574 if (sym == 0)
575 {
576 /*
577 * It's not defined yet - create the new definition and add it
578 * to the symbol table.
579 */
580 sym = new CTcSymFunc(txt, len, FALSE, argc, varargs, has_retval,
581 is_extern);
582 G_prs->get_global_symtab()->add_entry(sym);
583
584 /* it's an error if we're replacing a previously undefined function */
585 if (ext_replace || ext_modify)
586 G_tcmain->log_error(0, 0, TC_SEV_ERROR,
587 TCERR_OBJFILE_REPFUNC_BEFORE_ORIG,
588 (int)len, txt, fname);
589 }
590 else if (sym->get_type() != TC_SYM_FUNC
591 || (!sym->is_extern()
592 && !is_extern && !ext_replace && !ext_modify))
593 {
594 /*
595 * It's already defined, but it's not a function, or this is a
596 * non-extern/replacd definition and the symbol is already
597 * defined non-extern - log a symbol type conflict error.
598 */
599 sym->log_objfile_conflict(fname, TC_SYM_FUNC);
600
601 /*
602 * proceed despite the error, since this is merely a symbol
603 * conflict and not a file corruption - create a fake symbol to
604 * hold the information, so that we can read the data and thus
605 * keep in sync with the file, but don't bother adding the fake
606 * symbol object to the symbol table
607 */
608 sym = new CTcSymFunc(txt, len, FALSE, argc, varargs, has_retval,
609 is_extern);
610 }
611 else if (sym->get_argc() != argc
612 || sym->is_varargs() != varargs
613 || sym->has_retval() != has_retval)
614 {
615 /* the symbol has an incompatible definition - log the error */
616 G_tcmain->log_error(0, 0, TC_SEV_ERROR, TCERR_OBJFILE_FUNC_INCOMPAT,
617 (int)len, txt, fname);
618 }
619
620 /*
621 * if this is a non-extern definition, we now have the object
622 * defined -- remove the 'extern' flag from the symbol table entry
623 * in this case
624 */
625 if (!is_extern)
626 {
627 /* mark the symbol as defined */
628 sym->set_extern(FALSE);
629
630 /*
631 * if we're replacing it, delete the original; if we're modifying
632 * it, chain the original into our modify list
633 */
634 if (ext_replace)
635 {
636 int i;
637
638 /*
639 * mark the previous code anchor as obsolete so that we
640 * don't write its code to the image file
641 */
642 if (sym->get_anchor() != 0)
643 sym->get_anchor()->set_replaced(TRUE);
644
645 /*
646 * Mark all of the modified base function code offsets as
647 * replaced as well.
648 */
649 for (i = 0 ; i < sym->get_mod_base_offset_count() ; ++i)
650 {
651 CTcStreamAnchor *anchor;
652
653 /* get the anchor for this offset */
654 anchor = G_cs->find_anchor(sym->get_mod_base_offset(i));
655
656 /* mark it as replaced */
657 if (anchor != 0)
658 anchor->set_replaced(TRUE);
659 }
660
661 /*
662 * We can now forget everything in the modify base list, as
663 * everything in the list is being replaced and is thus no
664 * longer relevant.
665 */
666 sym->clear_mod_base_offsets();
667 }
668 else if (ext_modify)
669 {
670 /*
671 * We're modifying an external symbol. The anchor to the code
672 * stream object that we previously loaded is actually the
673 * anchor to the modified base object, not to the new meaning
674 * of the symbol, so detach the anchor from our symbol.
675 */
676 sym->get_anchor()->detach_from_symbol();
677
678 /*
679 * The object file has a fixup list for references to the
680 * external base object that we're modifying. In other words,
681 * these are external references from the object file we're
682 * loading to the now-nameless code stream object that we're
683 * replacing, which is the code stream object at our anchor.
684 * So, load those fixups into the anchor's new internal fixup
685 * list. It's important to note that these aren't references
686 * to this symbol - they're specifically references to the
687 * modified base code stream object.
688 */
689 CTcAbsFixup::load_fixup_list_from_object_file(
690 fp, fname, sym->get_anchor()->fixup_list_head_);
691
692 /*
693 * Add the old code stream anchor to the list of modified base
694 * offsets for the function. The function we're reading from
695 * the object file modifies this as a base function, so we need
696 * to add this to the list of modified base functions.
697 */
698 sym->add_mod_base_offset(sym->get_anchor()->get_ofs());
699
700 /*
701 * Complete the dissociation from the anchor by forgetting the
702 * anchor in the symbol. This will allow the code stream
703 * object that's associated with this symbol in the current
704 * file to take over the anchor duty for this symbol, which
705 * will ensure that all fixups that reference this symbol will
706 * be resolved to the new code stream object.
707 */
708 sym->set_anchor(0);
709 }
710 }
711
712 /*
713 * Read the list of modified base function offsets. Each entry is a
714 * code stream offset, so adjust each using the base code stream offset
715 * for this object file.
716 */
717 for ( ; mod_base_cnt != 0 ; --mod_base_cnt)
718 {
719 int i;
720
721 /* read them */
722 for (i = 0 ; i < mod_base_cnt ; ++i)
723 {
724 /* read the offset, adjusting to the object file start position */
725 ulong ofs = fp->read_int4() + G_cs->get_object_file_start_ofs();
726
727 /* append this item */
728 sym->add_mod_base_offset(ofs);
729 }
730 }
731
732 /* if it's extern, load the fixup list */
733 if (is_extern)
734 {
735 /*
736 * This is an external reference, so we must load our fixup
737 * list, adding it to any fixup list that already exists with
738 * the symbol.
739 */
740 CTcAbsFixup::
741 load_fixup_list_from_object_file(fp, fname, &sym->fixups_);
742 }
743
744 /* success */
745 return 0;
746 }
747
748 /* ------------------------------------------------------------------------ */
749 /*
750 * object symbol entry base - image/object file functions
751 */
752
753 /*
754 * Load from an object file
755 */
load_from_obj_file(CVmFile * fp,const textchar_t * fname,tctarg_obj_id_t * obj_xlat,int anon)756 int CTcSymObjBase::load_from_obj_file(CVmFile *fp,
757 const textchar_t *fname,
758 tctarg_obj_id_t *obj_xlat,
759 int anon)
760 {
761 /*
762 * do the main loading - if it fails to return a symbol, return
763 * failure (i.e., non-zero)
764 */
765 return (load_from_obj_file_main(fp, fname, obj_xlat, 0, 0, anon) == 0);
766 }
767
768 /*
769 * Load a modified base object from an object file
770 */
771 CTcSymObj *CTcSymObjBase::
load_from_obj_file_modbase(class CVmFile * fp,const textchar_t * fname,tctarg_obj_id_t * obj_xlat,const textchar_t * mod_name,size_t mod_name_len,int anon)772 load_from_obj_file_modbase(class CVmFile *fp, const textchar_t *fname,
773 tctarg_obj_id_t *obj_xlat,
774 const textchar_t *mod_name,
775 size_t mod_name_len, int anon)
776 {
777 /* skip the type prefix - we know it's an object */
778 fp->read_uint2();
779
780 /* load the object and return the symbol */
781 return load_from_obj_file_main(fp, fname, obj_xlat,
782 mod_name, mod_name_len, anon);
783 }
784
785 /*
786 * Load from an object file. This main routine does most of the work,
787 * and returns the loaded symbol.
788 *
789 * 'mod_name' is the primary symbol name for a stack of 'modify'
790 * objects. Each of the objects in a 'modify' stack, except for the
791 * topmost (i.e., last defined) object, has a fake symbol name, since
792 * the program can't refer directly to the base object once modified.
793 * However, while loading, we must know the actual name for the entire
794 * stack, so that we can link the bottom of the stack in this object
795 * file to the top of the stack in another object file if the bottom of
796 * our stack is declared external (i.e., this object file's source code
797 * used 'modify' with an external object). If we're loading a top-level
798 * object, not a modified object, 'mod_name' should be null.
799 */
800 CTcSymObj *CTcSymObjBase::
load_from_obj_file_main(CVmFile * fp,const textchar_t * fname,tctarg_obj_id_t * obj_xlat,const textchar_t * mod_name,size_t mod_name_len,int anon)801 load_from_obj_file_main(CVmFile *fp, const textchar_t *fname,
802 tctarg_obj_id_t *obj_xlat,
803 const textchar_t *mod_name, size_t mod_name_len,
804 int anon)
805 {
806 const char *txt;
807 size_t len;
808 char buf[32];
809 ulong id;
810 int is_extern;
811 int stream_ofs_valid;
812 ulong stream_ofs;
813 CTcSymObj *sym;
814 CTcSymObj *mod_base_sym;
815 int modify_flag;
816 int ext_modify_flag;
817 int ext_replace_flag;
818 int modified_flag;
819 int class_flag;
820 CTcIdFixup *fixups;
821 CTcObjPropDel *del_prop_head;
822 tc_metaclass_t meta;
823 uint dict_idx;
824 int use_fake_sym;
825 uint obj_file_idx;
826 int trans_flag;
827
828 /* presume we won't have to use a fake symbol */
829 use_fake_sym = FALSE;
830
831 /* presume we won't be able to read a stream offset */
832 stream_ofs_valid = FALSE;
833
834 /* read the symbol name information if it's not anonymous */
835 if (!anon)
836 {
837 /* read the symbol name */
838 txt = base_read_from_sym_file(fp);
839 if (txt == 0)
840 return 0;
841 }
842 else
843 {
844 /* use ".anon" as our symbol name placeholder */
845 txt = ".anon";
846 }
847
848 /* get the symbol len */
849 len = strlen(txt);
850
851 /* read our extra data */
852 fp->read_bytes(buf, 17);
853 id = osrp4(buf);
854 is_extern = buf[4];
855 ext_replace_flag = buf[5];
856 modified_flag = buf[6];
857 modify_flag = buf[7];
858 ext_modify_flag = buf[8];
859 class_flag = buf[9];
860 trans_flag = buf[10];
861 meta = (tc_metaclass_t)osrp2(buf + 11);
862 dict_idx = osrp2(buf + 13);
863 obj_file_idx = osrp2(buf + 15);
864
865 /*
866 * if we're not external, read our stream offset, and adjust for the
867 * object stream base in the object file
868 */
869 if (!is_extern)
870 {
871 CTcDataStream *stream;
872
873 /* get the appropriate stream */
874 stream = get_stream_from_meta(meta);
875
876 /* read the relative stream offset */
877 stream_ofs = fp->read_int4();
878
879 /*
880 * Ensure the stream offset was actually valid. It must be valid
881 * unless the object has no stream (for example, dictionary and
882 * grammar production objects are not generated until link time,
883 * hence they don't have to have - indeed, can't have - valid
884 * stream offsets when we're loading an object file).
885 */
886 assert(stream_ofs != 0xffffffff || stream == 0);
887
888 /* determine if it's valid */
889 if (stream_ofs != 0xffffffff)
890 {
891 /* adjust it relative to this object file's stream base */
892 stream_ofs += stream->get_object_file_start_ofs();
893
894 /* note that it's valid */
895 stream_ofs_valid = TRUE;
896 }
897 else
898 {
899 /* the stream offset is not valid */
900 stream_ofs_valid = FALSE;
901 }
902 }
903
904 /* we have no deleted properties yet */
905 del_prop_head = 0;
906
907 /* if this is a 'modify' object, read some additional data */
908 if (modify_flag)
909 {
910 uint cnt;
911
912 /* read the deleted property list */
913 for (cnt = fp->read_uint2() ; cnt != 0 ; --cnt)
914 {
915 const char *prop_name;
916 CTcSymProp *prop_sym;
917
918 /* read the symbol name from the file */
919 prop_name = base_read_from_sym_file(fp);
920 if (prop_name == 0)
921 return 0;
922
923 /*
924 * find the property symbol, or define it if it's not
925 * already defined as a property
926 */
927 prop_sym = (CTcSymProp *)G_prs->get_global_symtab()
928 ->find_or_def_prop(prop_name, strlen(prop_name),
929 FALSE);
930
931 /* make sure it's a property */
932 if (prop_sym->get_type() != TC_SYM_PROP)
933 {
934 /* it's not a property - log the conflict */
935 prop_sym->log_objfile_conflict(fname, TC_SYM_PROP);
936 }
937 else
938 {
939 /* add the entry to my list */
940 add_del_prop_to_list(&del_prop_head, prop_sym);
941 }
942 }
943 }
944
945 /* read the self-reference fixup list */
946 fixups = 0;
947 CTcIdFixup::load_object_file(fp, 0, 0, TCGEN_XLAT_OBJ,
948 4, fname, &fixups);
949
950 /*
951 * if this is a 'modify' object, load the base object - this is the
952 * original version of the object, which this object modifies
953 */
954 if (modify_flag)
955 {
956 /*
957 * Load the base object - pass the top-level object's name
958 * (which is our own name if the caller didn't pass an enclosing
959 * top-level object to us). Note that we must read, and can
960 * immediately discard, the type data in the object file - we
961 * know that the base symbol is going to be an object, since we
962 * always write it out at this specific place in the file, but
963 * we will have written the type information anyway; thus, we
964 * don't need the type information, but we must at least skip it
965 * in the file.
966 */
967 mod_base_sym =
968 load_from_obj_file_modbase(fp, fname, obj_xlat,
969 mod_name != 0 ? mod_name : txt,
970 mod_name != 0 ? mod_name_len : len,
971 FALSE);
972
973 /* if that failed, return failure */
974 if (mod_base_sym == 0)
975 return 0;
976 }
977 else
978 {
979 /* we have no 'modify' base symbol */
980 mod_base_sym = 0;
981 }
982
983 /*
984 * If this is a 'modifed extern' symbol, it's just a placeholder to
985 * connect the bottom object in the stack of modified objects in
986 * this file with the top object in another object file.
987 */
988 if (is_extern && modified_flag)
989 {
990 CTcSymObj *mod_sym;
991
992 /*
993 * We're modifying an external object. This must be the bottom
994 * object in the stack for this object file, and serves as a
995 * placeholder for the top object in a stack in another object
996 * file. We must find the object with the name of our top-level
997 * object (not the fake name for this modified base object, but
998 * the real name for the top-level object, because the symbol
999 * we're modifying in the other file is the top object in its
1000 * stack, if any). So, look up the symbol in the other file,
1001 * which must already be loaded.
1002 */
1003 sym = (CTcSymObj *)
1004 G_prs->get_global_symtab()->find(mod_name, mod_name_len);
1005
1006 /*
1007 * If the original base symbol wasn't an object of metaclass
1008 * "TADS Object", we can't modify it.
1009 */
1010 if (sym != 0
1011 && (sym->get_type() != TC_SYM_OBJ
1012 || sym->get_metaclass() != TC_META_TADSOBJ))
1013 {
1014 /* log an error */
1015 G_tcmain->log_error(0, 0, TC_SEV_ERROR,
1016 TCERR_OBJFILE_CANNOT_MOD_OR_REP_TYPE,
1017 (int)sym->get_sym_len(), sym->get_sym(),
1018 fname);
1019
1020 /* forget the symbol */
1021 sym = 0;
1022 }
1023
1024 /* create a synthesized object to hold the original definition */
1025 mod_sym = synthesize_modified_obj_sym(FALSE);
1026
1027 /* transfer data to the new fake symbol */
1028 if (sym != 0)
1029 {
1030 /*
1031 * 'sym' has the original version of the object from the
1032 * other object file - the original object file must be
1033 * loaded before an object file that modifies a symbol it
1034 * exports, so 'sym' will definitely be present in this case
1035 * (it's an error - undefined external symbol - that we will
1036 * have already caught if it's not defined). We want to
1037 * hijack 'sym' for our own use, since 'modify' replaces the
1038 * symbol's meaning with the new object data.
1039 *
1040 * Transfer the self-reference fixup list from the original
1041 * version of the object to the new synthesized object --
1042 * all of the self-references must now refer to the
1043 * renumbered object.
1044 *
1045 * This is really all we need to do to renumber the object.
1046 * By moving the self-fixup list to the new fake object, we
1047 * ensure that the original object will use its new number,
1048 * which leaves the original number for our use in the new,
1049 * modifying object (i.e., the one we're loading now). Note
1050 * that we'll replace the self-fixup list for this symbol
1051 * with the fixup list of the modifying symbol, below.
1052 */
1053 mod_sym->set_fixups(sym->get_fixups());
1054
1055 /*
1056 * Give the modified fake symbol the original pre-modified
1057 * object data stream. The fake symbol owns the
1058 * pre-modified data stream because it's the pre-modified
1059 * object.
1060 */
1061 mod_sym->set_stream_ofs(sym->get_stream_ofs());
1062
1063 /*
1064 * transfer the 'modify' base symbol from the original
1065 * version of this symbol to the new fake version
1066 */
1067 mod_sym->set_mod_base_sym(sym->get_mod_base_sym());
1068
1069 /* transfer the property deletion list */
1070 mod_sym->set_del_prop_head(sym->get_first_del_prop());
1071 sym->set_del_prop_head(0);
1072
1073 /*
1074 * mark the original object as a 'class' object - it might
1075 * have been compiled as a normal instance in its own
1076 * translation unit, but it's now a class because it's the
1077 * base class for this link-time 'modify'
1078 */
1079 mod_sym->mark_compiled_as_class();
1080
1081 /* transfer the dictionary to the base symbol */
1082 mod_sym->set_dict(sym->get_dict());
1083
1084 /* copy the class flag to the base symbol */
1085 mod_sym->set_is_class(sym->is_class());
1086
1087 /* set our class flag to the one from the original symbol */
1088 class_flag = sym->is_class();
1089
1090 /*
1091 * transfer the superclass list from the original symbol to
1092 * the modified base symbol
1093 */
1094 mod_sym->set_sc_head(sym->get_sc_head());
1095 sym->set_sc_head(0);
1096
1097 /* transfer the vocabulary list to the modified base symbol */
1098 mod_sym->set_vocab_head(sym->get_vocab_head());
1099 sym->set_vocab_head(0);
1100 }
1101
1102 /* do the remaining loading into the synthesized placeholder */
1103 sym = mod_sym;
1104 }
1105 else if (modified_flag)
1106 {
1107 /*
1108 * The symbol was modified, so the name is fake. Because the
1109 * name is tied to the object ID, which can change between the
1110 * the time of writing the object file and now, when we're
1111 * loading the object file, we must synthesize a new fake name
1112 * in the context of the loaded object file. The name is based
1113 * on the object number, which is why it must be re-synthesized
1114 * - the object number in this scheme can be different than the
1115 * original object number in the object file.
1116 */
1117 sym = synthesize_modified_obj_sym(FALSE);
1118
1119 /* set the appropriate metaclass */
1120 sym->set_metaclass(meta);
1121 }
1122 else if (anon)
1123 {
1124 /*
1125 * we will definitely not find a previous entry for an anonymous
1126 * symbol, because there's no name to look up
1127 */
1128 sym = 0;
1129 }
1130 else
1131 {
1132 /*
1133 * normal object - look up a previous definition of the symbol
1134 * in the global symbol table
1135 */
1136 sym = (CTcSymObj *)G_prs->get_global_symtab()->find(txt, len);
1137 }
1138
1139 /*
1140 * If this symbol is already defined, make sure the original
1141 * definition is an object, and make sure that it's only defined
1142 * (not referenced as external) once. If it's not defined, define
1143 * it anew.
1144 */
1145 if (sym != 0 && sym->get_type() != TC_SYM_OBJ)
1146 {
1147 /*
1148 * It's already defined, but it's not an object - log a symbol
1149 * type conflict error
1150 */
1151 sym->log_objfile_conflict(fname, TC_SYM_OBJ);
1152
1153 /*
1154 * proceed despite the error, since this is merely a symbol
1155 * conflict and not a file corruption - create a fake symbol to
1156 * hold the data of the original symbol so we can continue
1157 * loading
1158 */
1159 sym = 0;
1160 use_fake_sym = TRUE;
1161 }
1162 else if ((ext_replace_flag || ext_modify_flag)
1163 && sym != 0 && sym->get_metaclass() != TC_META_TADSOBJ)
1164 {
1165 /* cannot modify or replace anything but an ordinary object */
1166 G_tcmain->log_error(0, 0, TC_SEV_ERROR,
1167 TCERR_OBJFILE_CANNOT_MOD_OR_REP_TYPE,
1168 (int)sym->get_sym_len(), sym->get_sym(),
1169 fname);
1170
1171 /* forget that we're doing a replacement */
1172 ext_replace_flag = ext_modify_flag = FALSE;
1173 }
1174 else if (sym != 0
1175 && (sym->get_metaclass() == TC_META_DICT
1176 || sym->get_metaclass() == TC_META_GRAMPROD))
1177 {
1178 /*
1179 * If this is a dictionary or grammar production object, and the
1180 * original definition was of the same metaclass, allow the
1181 * multiple definitions without conflict - just treat the new
1182 * definition as external. These object types don't require a
1183 * primary definition - every time such an object is defined,
1184 * it's a definition, but the same definition can appear in
1185 * multiple object files without conflict. Simply act as though
1186 * this new declaration is extern after all in this case.
1187 */
1188 if (meta == sym->get_metaclass())
1189 {
1190 /*
1191 * it's another one of the same type - allow it without
1192 * conflict; act as though this new definition is external
1193 */
1194 if (!sym->is_extern())
1195 is_extern = FALSE;
1196 }
1197 else
1198 {
1199 /* the other one's of a different type - log a conflict */
1200 sym->log_objfile_conflict(fname, TC_SYM_OBJ);
1201
1202 /* proceed with a fake symbol */
1203 sym = 0;
1204 use_fake_sym = TRUE;
1205 }
1206 }
1207 else if ((ext_replace_flag || ext_modify_flag)
1208 && (sym == 0 || sym->is_extern()))
1209 {
1210 /*
1211 * This symbol isn't defined yet, or is only defined as an
1212 * external, but the new symbol is marked as 'replace' or
1213 * 'modify' - it's an error, because the original version of an
1214 * object must always be loaded before the replaced or modified
1215 * version
1216 */
1217 G_tcmain->log_error(0, 0, TC_SEV_ERROR,
1218 TCERR_OBJFILE_MODREPOBJ_BEFORE_ORIG,
1219 (int)len, txt, fname);
1220
1221 /* forget the symbol */
1222 sym = 0;
1223 }
1224 else if (sym != 0
1225 && !sym->is_extern()
1226 && !(is_extern || ext_modify_flag || ext_replace_flag
1227 || modified_flag))
1228 {
1229 /*
1230 * the symbol was already defined, and this is a new actual
1231 * definition (not external, and not replace or modify) -- this
1232 * is an error because it means the same object is defined more
1233 * than once
1234 */
1235 sym->log_objfile_conflict(fname, TC_SYM_OBJ);
1236
1237 /*
1238 * proceed despite the error, since this is merely a symbol
1239 * conflict and not a file corruption - create a fake symbol to
1240 * hold the data of the original symbol so we can continue
1241 * loading
1242 */
1243 sym = 0;
1244 use_fake_sym = TRUE;
1245 }
1246 else if (sym != 0 && meta != sym->get_metaclass())
1247 {
1248 /*
1249 * the new symbol and the old symbol have different metaclasses
1250 * - it's a conflict
1251 */
1252 sym->log_objfile_conflict(fname, TC_SYM_OBJ);
1253
1254 /* proceed with a fake symbol */
1255 sym = 0;
1256 use_fake_sym = TRUE;
1257 }
1258
1259 /* create the object if necessary */
1260 if (sym == 0)
1261 {
1262 /*
1263 * The symbol isn't defined yet - create the new definition and
1264 * add it to the symbol table. Allocate a new object ID for the
1265 * symbol in the normal fashion.
1266 */
1267 sym = new CTcSymObj(txt, len, FALSE, G_cg->new_obj_id(),
1268 is_extern, meta, 0);
1269
1270 /*
1271 * if we're using a fake symbol, don't bother adding the symbol
1272 * to the symbol table, since its only function is to allow us
1273 * to finish reading the object file data (we won't actually try
1274 * to link when using a fake symbol, since this always means
1275 * that an error has made linking impossible; we'll proceed
1276 * anyway so that we catch any other errors that remain to be
1277 * found)
1278 *
1279 * similarly, don't add the symbol if it's anonymous
1280 */
1281 if (!use_fake_sym && !anon)
1282 G_prs->get_global_symtab()->add_entry(sym);
1283
1284 /* if it's anonymous, add it to the anonymous symbol list */
1285 if (anon)
1286 G_prs->add_anon_obj(sym);
1287 }
1288
1289 /*
1290 * If we're replacing the object, tell the code generator to get rid
1291 * of the old object definition in the object stream -- delete the
1292 * definition at the symbol's old stream offset.
1293 */
1294 if (ext_replace_flag)
1295 G_cg->notify_replace_object(sym->get_stream_ofs());
1296
1297 /*
1298 * If this is a non-extern definition, we now have the object
1299 * defined -- remove the 'extern' flag from the symbol table entry,
1300 * and set the symbol's data to the data we just read. Do not
1301 * transfer data to the symbol if this is an extern, since we want
1302 * to use the existing data from the originally loaded object.
1303 */
1304 if (!is_extern)
1305 {
1306 /* clear the external flag */
1307 sym->set_extern(FALSE);
1308
1309 /* set the object's stream offset, if we read one */
1310 if (stream_ofs_valid)
1311 sym->set_stream_ofs(stream_ofs);
1312
1313 /* set the base 'modify' symbol if this symbol modifies another */
1314 if (mod_base_sym != 0)
1315 sym->set_mod_base_sym(mod_base_sym);
1316
1317 /* set the new symbol's fixup list */
1318 sym->set_fixups(fixups);
1319
1320 /* set the new symbol's deleted property list */
1321 sym->set_del_prop_head(del_prop_head);
1322
1323 /*
1324 * set the symbol's class flag - only add the class flag,
1325 * because we might have already set the class flag for this
1326 * symbol based on the external definition
1327 */
1328 if (class_flag)
1329 sym->set_is_class(class_flag);
1330 }
1331
1332 /* add this symbol to the load file object index list */
1333 G_prs->add_sym_from_obj_file(obj_file_idx, sym);
1334
1335 /* set the dictionary, if one was specified */
1336 if (dict_idx != 0)
1337 sym->set_dict(G_prs->get_obj_dict(dict_idx));
1338
1339 /*
1340 * if this is a dictionary symbol, add it to the dictionary fixup
1341 * list
1342 */
1343 if (meta == TC_META_DICT)
1344 G_prs->add_dict_from_obj_file(sym);
1345
1346
1347 /*
1348 * Set the translation table entry for the symbol. We know the
1349 * original ID local to the object file, and we know the new global
1350 * object ID.
1351 */
1352 obj_xlat[id] = sym->get_obj_id();
1353
1354 /* success */
1355 return sym;
1356 }
1357
1358 /*
1359 * Apply our self-reference fixups
1360 */
apply_internal_fixups()1361 void CTcSymObjBase::apply_internal_fixups()
1362 {
1363 CTcIdFixup *fixup;
1364 CTcObjPropDel *entry;
1365 CTcSymObj *mod_base;
1366
1367 /* run through our list and apply each fixup */
1368 for (fixup = fixups_ ; fixup != 0 ; fixup = fixup->nxt_)
1369 fixup->apply_fixup(obj_id_, 4);
1370
1371 /*
1372 * If we're a 'modify' object, and we were based at compile-time on
1373 * an object external to the translation unit in which this modified
1374 * version of the object was defined, we'll have a property deletion
1375 * list to be applied at link time. Now is the time - go through
1376 * our list and delete each property in each of our 'modify' base
1377 * classes. Don't delete the properties in our own object,
1378 * obviously - just in our modified base classes.
1379 */
1380 for (mod_base = mod_base_sym_ ; mod_base != 0 ;
1381 mod_base = mod_base->get_mod_base_sym())
1382 {
1383 /* delete each property in our deletion list in this base class */
1384 for (entry = first_del_prop_ ; entry != 0 ; entry = entry->nxt_)
1385 {
1386 /* delete this property from the base object */
1387 mod_base->delete_prop_from_mod_base(entry->prop_sym_->get_prop());
1388
1389 /* remove it from the base object's vocabulary list */
1390 mod_base->delete_vocab_prop(entry->prop_sym_->get_prop());
1391 }
1392 }
1393 }
1394
1395 /*
1396 * Merge my private grammar rules into the master rule list for the
1397 * associated grammar production object.
1398 */
merge_grammar_entry()1399 void CTcSymObjBase::merge_grammar_entry()
1400 {
1401 CTcSymObj *prod_sym;
1402 CTcGramProdEntry *master_entry;
1403
1404 /* if I don't have a grammar list, there's nothing to do */
1405 if (grammar_entry_ == 0)
1406 return;
1407
1408 /* get the grammar production object my rules are associated with */
1409 prod_sym = grammar_entry_->get_prod_sym();
1410
1411 /* get the master list for the production */
1412 master_entry = G_prs->get_gramprod_entry(prod_sym);
1413
1414 /* move the alternatives from my private list to the master list */
1415 grammar_entry_->move_alts_to(master_entry);
1416 }
1417
1418
1419 /* ------------------------------------------------------------------------ */
1420 /*
1421 * metaclass symbol base - image/object file functions
1422 */
1423
1424 /*
1425 * load from an object file
1426 */
1427 int CTcSymMetaclassBase::
load_from_obj_file(CVmFile * fp,const textchar_t * fname,tctarg_obj_id_t * obj_xlat)1428 load_from_obj_file(CVmFile *fp, const textchar_t *fname,
1429 tctarg_obj_id_t *obj_xlat)
1430 {
1431 const char *txt;
1432 size_t len;
1433 int meta_idx;
1434 int prop_cnt;
1435 CTcSymMetaclass *sym;
1436 char buf[TOK_SYM_MAX_LEN + 1];
1437 CTcSymMetaProp *prop;
1438 int was_defined;
1439 tctarg_obj_id_t class_obj;
1440
1441 /* read the symbol name */
1442 txt = base_read_from_sym_file(fp);
1443 len = strlen(txt);
1444
1445 /* read the metaclass index, class object ID, and property count */
1446 fp->read_bytes(buf, 8);
1447 meta_idx = osrp2(buf);
1448 class_obj = osrp4(buf + 2);
1449 prop_cnt = osrp2(buf + 6);
1450
1451 /* check for a previous definition */
1452 sym = (CTcSymMetaclass *)G_prs->get_global_symtab()->find(txt, len);
1453 if (sym == 0)
1454 {
1455 /* it's not defined yet - create the new definition */
1456 sym = new CTcSymMetaclass(txt, len, FALSE, meta_idx,
1457 G_cg->new_obj_id());
1458 G_prs->get_global_symtab()->add_entry(sym);
1459
1460 /* note that it wasn't yet defined */
1461 was_defined = FALSE;
1462
1463 /* set the metaclass symbol pointer in the dependency table */
1464 G_cg->set_meta_sym(meta_idx, sym);
1465 }
1466 else if (sym->get_type() != TC_SYM_METACLASS)
1467 {
1468 /* log a conflict */
1469 sym->log_objfile_conflict(fname, TC_SYM_METACLASS);
1470
1471 /* forget the symbol */
1472 sym = 0;
1473 was_defined = FALSE;
1474 }
1475 else
1476 {
1477 /* if the metaclass index doesn't match, it's an error */
1478 if (sym->get_meta_idx() != meta_idx)
1479 G_tcmain->log_error(0, 0, TC_SEV_ERROR,
1480 TCERR_OBJFILE_METACLASS_IDX_CONFLICT,
1481 (int)len, txt, fname);
1482
1483 /* note that it was previously defined */
1484 was_defined = TRUE;
1485
1486 /* start with the first property */
1487 prop = sym->get_prop_head();
1488 }
1489
1490 /* set the ID translation for the class object */
1491 if (sym != 0)
1492 obj_xlat[class_obj] = sym->get_class_obj();
1493
1494 /* read the property names */
1495 for ( ; prop_cnt != 0 ; --prop_cnt)
1496 {
1497 int is_static;
1498
1499 /* read the property symbol name */
1500 txt = base_read_from_sym_file(fp);
1501 len = strlen(txt);
1502
1503 /* read the flags */
1504 fp->read_bytes(buf, 1);
1505 is_static = ((buf[0] & 1) != 0);
1506
1507 /* check what we're doing */
1508 if (sym == 0)
1509 {
1510 /*
1511 * we have a conflict, so we're just scanning the names to
1512 * keep in sync with the file - ignore it
1513 */
1514 }
1515 else if (was_defined)
1516 {
1517 /*
1518 * the metaclass was previously defined - simply check to
1519 * ensure that this property matches the corresponding
1520 * property (by list position) in the original definition
1521 */
1522 if (prop == 0)
1523 {
1524 /*
1525 * we're past the end of the original definition's
1526 * property list - this is okay, as we can simply add
1527 * the properties in the new list (which must be a more
1528 * recent definition than the original one)
1529 */
1530 sym->add_prop(txt, len, fname, is_static);
1531 }
1532 else if (prop->prop_->get_sym_len() != len
1533 || memcmp(prop->prop_->get_sym(), txt, len) != 0)
1534 {
1535 /* this one doesn't match - it's an error */
1536 G_tcmain->log_error(0, 0, TC_SEV_ERROR,
1537 TCERR_OBJFILE_METACLASS_PROP_CONFLICT,
1538 (int)len, txt,
1539 (int)prop->prop_->get_sym_len(),
1540 prop->prop_->get_sym(), fname);
1541 }
1542
1543 /* move on to the next property in the list */
1544 if (prop != 0)
1545 prop = prop->nxt_;
1546 }
1547 else
1548 {
1549 /*
1550 * we're defining the metaclass anew - add this property to
1551 * the metaclass's property list
1552 */
1553 sym->add_prop(txt, len, fname, is_static);
1554 }
1555 }
1556
1557 /* read our modifier object flag */
1558 fp->read_bytes(buf, 1);
1559 if (buf[0] != 0)
1560 {
1561 /* laod the new object */
1562 CTcSymObj *mod_obj;
1563
1564 /* we have a modification object - load it */
1565 mod_obj = CTcSymObj::load_from_obj_file_modbase(
1566 fp, fname, obj_xlat, 0, 0, FALSE);
1567
1568 /*
1569 * if the metaclass already has a modification object, then the
1570 * bottom of the chain we just loaded modifies the top of the
1571 * existing chain
1572 */
1573 if (sym->get_mod_obj() != 0)
1574 {
1575 CTcSymObj *obj;
1576 CTcSymObj *prv;
1577
1578 /*
1579 * Set the bottom of the new chain to point to the top of
1580 * the existing chain. The bottom object in each object
1581 * file's modification chain is always a dummy root object;
1582 * we'll thus find the second to last object in the new
1583 * chain, and replace the pointer to its dummy root
1584 * superclass with a pointer to the top of the
1585 * previously-loaded chain that we're modifying.
1586 */
1587
1588 /* find the second-to-last object in the new chain */
1589 for (prv = 0, obj = mod_obj ;
1590 obj != 0 && obj->get_mod_base_sym() != 0 ;
1591 prv = obj, obj = obj->get_mod_base_sym()) ;
1592
1593 /*
1594 * if we found the second-to-last object, set up the link
1595 * back into the old chain
1596 */
1597 if (prv != 0)
1598 prv->set_mod_base_sym(sym->get_mod_obj());
1599 }
1600
1601 /* point the metaclass to the modification object */
1602 sym->set_mod_obj(mod_obj);
1603 }
1604
1605 /* return success - the file appears well-formed */
1606 return 0;
1607 }
1608
1609
1610 /* ------------------------------------------------------------------------ */
1611 /*
1612 * property symbol entry base - image/object file functions
1613 */
1614
1615 /*
1616 * Load from an object file
1617 */
load_from_obj_file(class CVmFile * fp,const textchar_t * fname,tctarg_prop_id_t * prop_xlat)1618 int CTcSymPropBase::load_from_obj_file(class CVmFile *fp,
1619 const textchar_t *fname,
1620 tctarg_prop_id_t *prop_xlat)
1621 {
1622 const char *txt;
1623 size_t len;
1624 ulong id;
1625 CTcSymProp *sym;
1626
1627 /* read the symbol name information */
1628 txt = base_read_from_sym_file(fp);
1629 len = strlen(txt);
1630
1631 /* read our property ID */
1632 id = (ulong)fp->read_int4();
1633
1634 /*
1635 * If this symbol is already defined, make sure the original
1636 * definition is a property. If it's not defined, define it anew.
1637 */
1638 sym = (CTcSymProp *)G_prs->get_global_symtab()->find(txt, len);
1639 if (sym == 0)
1640 {
1641 /*
1642 * It's not defined yet - create the new definition and add it
1643 * to the symbol table. Allocate a new property ID for the
1644 * symbol in the normal fashion.
1645 */
1646 sym = new CTcSymProp(txt, len, FALSE, G_cg->new_prop_id());
1647 G_prs->get_global_symtab()->add_entry(sym);
1648 }
1649 else if (sym->get_type() != TC_SYM_PROP)
1650 {
1651 /*
1652 * It's not already defined as a property - log a symbol type
1653 * conflict error
1654 */
1655 sym->log_objfile_conflict(fname, TC_SYM_PROP);
1656
1657 /*
1658 * proceed despite the error, since this is merely a symbol
1659 * conflict and not a file corruption
1660 */
1661 return 0;
1662 }
1663
1664 /*
1665 * Set the translation table entry for the symbol. We know the
1666 * original ID local to the object file, and we know the new global
1667 * property ID.
1668 */
1669 prop_xlat[id] = sym->get_prop();
1670
1671 /* success */
1672 return 0;
1673 }
1674
1675 /* ------------------------------------------------------------------------ */
1676 /*
1677 * enumerator symbol entry base - image/object file functions
1678 */
1679
1680 /*
1681 * Load from an object file
1682 */
load_from_obj_file(class CVmFile * fp,const textchar_t * fname,ulong * enum_xlat)1683 int CTcSymEnumBase::load_from_obj_file(class CVmFile *fp,
1684 const textchar_t *fname,
1685 ulong *enum_xlat)
1686 {
1687 const char *txt;
1688 size_t len;
1689 ulong id;
1690 CTcSymEnum *sym;
1691 char buf[32];
1692 int is_token;
1693
1694 /* read the symbol name information */
1695 txt = base_read_from_sym_file(fp);
1696 len = strlen(txt);
1697
1698 /* read our enumerator ID */
1699 id = (ulong)fp->read_int4();
1700
1701 /* read our flags */
1702 fp->read_bytes(buf, 1);
1703
1704 /* get the 'token' flag */
1705 is_token = ((buf[0] & 1) != 0);
1706
1707 /*
1708 * If this symbol is already defined, make sure the original
1709 * definition is an enum. If it's not defined, define it anew.
1710 */
1711 sym = (CTcSymEnum *)G_prs->get_global_symtab()->find(txt, len);
1712 if (sym == 0)
1713 {
1714 /*
1715 * It's not defined yet - create the new definition and add it
1716 * to the symbol table. Allocate a new enumerator ID for the
1717 * symbol in the normal fashion.
1718 */
1719 sym = new CTcSymEnum(txt, len, FALSE, G_prs->new_enum_id(), is_token);
1720 G_prs->get_global_symtab()->add_entry(sym);
1721 }
1722 else if (sym->get_type() != TC_SYM_ENUM)
1723 {
1724 /*
1725 * It's not already defined as an enumerator - log a symbol type
1726 * conflict error
1727 */
1728 sym->log_objfile_conflict(fname, TC_SYM_ENUM);
1729
1730 /*
1731 * proceed despite the error, since this is merely a symbol
1732 * conflict and not a file corruption
1733 */
1734 return 0;
1735 }
1736
1737 /*
1738 * Set the translation table entry for the symbol. We know the
1739 * original ID local to the object file, and we know the new global
1740 * enum ID.
1741 */
1742 enum_xlat[id] = sym->get_enum_id();
1743
1744 /* success */
1745 return 0;
1746 }
1747
1748 /* ------------------------------------------------------------------------ */
1749 /*
1750 * Built-in function symbol base - image/object file functions
1751 */
1752
1753 /*
1754 * load from an object file
1755 */
load_from_obj_file(class CVmFile * fp,const textchar_t * fname)1756 int CTcSymBifBase::load_from_obj_file(class CVmFile *fp,
1757 const textchar_t *fname)
1758 {
1759 const char *txt;
1760 size_t len;
1761 CTcSymBif *sym;
1762 char buf[10];
1763 int func_set_id;
1764 int func_idx;
1765 int has_retval;
1766 int min_argc;
1767 int max_argc;
1768 int varargs;
1769
1770 /* read the symbol name information */
1771 txt = base_read_from_sym_file(fp);
1772 len = strlen(txt);
1773
1774 /* read our additional information */
1775 fp->read_bytes(buf, 10);
1776 varargs = buf[0];
1777 has_retval = buf[1];
1778 min_argc = osrp2(buf+2);
1779 max_argc = osrp2(buf+4);
1780 func_set_id = osrp2(buf+6);
1781 func_idx = osrp2(buf+8);
1782
1783 /*
1784 * If this symbol is already defined, make sure the new definition
1785 * matches the original definition - built-in function sets must be
1786 * identical in all object files loaded. If it's not already
1787 * defined, add it now.
1788 */
1789 sym = (CTcSymBif *)G_prs->get_global_symtab()->find(txt, len);
1790 if (sym == 0)
1791 {
1792 /*
1793 * it's not defined yet - create the new definition and add it
1794 * to the symbol table
1795 */
1796 sym = new CTcSymBif(txt, len, FALSE, func_set_id, func_idx,
1797 has_retval, min_argc, max_argc, varargs);
1798 G_prs->get_global_symtab()->add_entry(sym);
1799 }
1800 else if (sym->get_type() != TC_SYM_BIF)
1801 {
1802 /* log the error */
1803 sym->log_objfile_conflict(fname, TC_SYM_BIF);
1804 }
1805 else if (sym->get_func_set_id() != func_set_id
1806 || sym->get_func_idx() != func_idx
1807 || sym->get_min_argc() != min_argc
1808 || sym->get_max_argc() != max_argc
1809 || sym->is_varargs() != varargs
1810 || sym->has_retval() != has_retval)
1811 {
1812 /*
1813 * this function is already defined but has different settings
1814 * -- we cannot reconcile the different usages of the function,
1815 * so this is an error
1816 */
1817 G_tcmain->log_error(0, 0, TC_SEV_ERROR, TCERR_OBJFILE_BIF_INCOMPAT,
1818 (int)len, txt, fname);
1819 }
1820 else
1821 {
1822 /*
1823 * everything about the symbol matches - there's no need to
1824 * redefine the symbol, since it's already set up exactly as we
1825 * need it to be
1826 */
1827 }
1828
1829 /* continue reading the file */
1830 return 0;
1831 }
1832
1833 /* ------------------------------------------------------------------------ */
1834 /*
1835 * Grammar production list entry
1836 */
1837
1838 /*
1839 * load from an object file
1840 */
load_from_obj_file(CVmFile * fp,const tctarg_prop_id_t * prop_xlat,const ulong * enum_xlat,CTcSymObj * private_owner)1841 void CTcGramProdEntry::load_from_obj_file(
1842 CVmFile *fp, const tctarg_prop_id_t *prop_xlat, const ulong *enum_xlat,
1843 CTcSymObj *private_owner)
1844 {
1845 uint idx;
1846 ulong cnt;
1847 CTcSymObj *obj;
1848 CTcGramProdEntry *prod;
1849 ulong flags;
1850
1851 /*
1852 * read the object file index of the production object, and get the
1853 * production object
1854 */
1855 idx = (uint)fp->read_int4();
1856 obj = G_prs->get_objfile_objsym(idx);
1857
1858 /* declare the production object */
1859 prod = G_prs->declare_gramprod(obj->get_sym(), obj->get_sym_len());
1860
1861 /* if we have a private owner, create a private rule list */
1862 if (private_owner != 0)
1863 prod = private_owner->create_grammar_entry(
1864 obj->get_sym(), obj->get_sym_len());
1865
1866 /* read the flags */
1867 flags = (ulong)fp->read_int4();
1868
1869 /* set the explicitly-declared flag if appropriate */
1870 if (flags & 1)
1871 prod->set_declared(TRUE);
1872
1873 /* read the alternative count */
1874 cnt = (uint)fp->read_int4();
1875
1876 /* read the alternatives */
1877 for ( ; cnt != 0 ; --cnt)
1878 {
1879 CTcGramProdAlt *alt;
1880
1881 /* read an alternative */
1882 alt = CTcGramProdAlt::load_from_obj_file(fp, prop_xlat, enum_xlat);
1883
1884 /* add it to the production's list */
1885 if (prod != 0)
1886 prod->add_alt(alt);
1887 }
1888 }
1889
1890
1891 /* ------------------------------------------------------------------------ */
1892 /*
1893 * Grammar production alternative
1894 */
1895
1896 /*
1897 * load from an object file
1898 */
1899 CTcGramProdAlt *CTcGramProdAlt::
load_from_obj_file(CVmFile * fp,const tctarg_prop_id_t * prop_xlat,const ulong * enum_xlat)1900 load_from_obj_file(CVmFile *fp, const tctarg_prop_id_t *prop_xlat,
1901 const ulong *enum_xlat)
1902 {
1903 uint idx;
1904 ulong cnt;
1905 CTcSymObj *obj;
1906 CTcGramProdAlt *alt;
1907 CTcDictEntry *dict;
1908 int score;
1909 int badness;
1910
1911 /* read my score and badness */
1912 score = fp->read_int2();
1913 badness = fp->read_int2();
1914
1915 /* read my processor object index, and get the associated object */
1916 idx = (uint)fp->read_int4();
1917 obj = G_prs->get_objfile_objsym(idx);
1918
1919 /* read my dictionary object index, and get the associated entry */
1920 idx = (uint)fp->read_int4();
1921 dict = G_prs->get_obj_dict(idx);
1922
1923 /* create the alternative object */
1924 alt = new (G_prsmem) CTcGramProdAlt(obj, dict);
1925
1926 /* set the score badness */
1927 alt->set_score(score);
1928 alt->set_badness(badness);
1929
1930 /* read the number of tokens */
1931 cnt = (ulong)fp->read_int4();
1932
1933 /* read the tokens */
1934 for ( ; cnt != 0 ; --cnt)
1935 {
1936 CTcGramProdTok *tok;
1937
1938 /* read a token */
1939 tok = CTcGramProdTok::load_from_obj_file(fp, prop_xlat, enum_xlat);
1940
1941 /* add it to the alternative's list */
1942 alt->add_tok(tok);
1943 }
1944
1945 /* return the alternative */
1946 return alt;
1947 }
1948
1949
1950 /* ------------------------------------------------------------------------ */
1951 /*
1952 * Grammar production token
1953 */
1954
1955 /*
1956 * load from an object file
1957 */
1958 CTcGramProdTok *CTcGramProdTok::
load_from_obj_file(CVmFile * fp,const tctarg_prop_id_t * prop_xlat,const ulong * enum_xlat)1959 load_from_obj_file(CVmFile *fp, const tctarg_prop_id_t *prop_xlat,
1960 const ulong *enum_xlat)
1961 {
1962 CTcGramProdTok *tok;
1963 CTcSymObj *obj;
1964 tcgram_tok_type typ;
1965 tctarg_prop_id_t prop;
1966 size_t len;
1967 char *txt;
1968 uint idx;
1969 ulong enum_id;
1970 size_t i;
1971
1972 /* create a new token */
1973 tok = new (G_prsmem) CTcGramProdTok();
1974
1975 /* read the type */
1976 typ = (tcgram_tok_type)fp->read_int2();
1977
1978 /* read the data, which depends on the type */
1979 switch(typ)
1980 {
1981 case TCGRAM_PROD:
1982 /* read the production object's object file index */
1983 idx = (uint)fp->read_int4();
1984
1985 /* translate it to an object */
1986 obj = G_prs->get_objfile_objsym(idx);
1987
1988 /* set the production object in the token */
1989 tok->set_match_prod(obj);
1990 break;
1991
1992 case TCGRAM_TOKEN_TYPE:
1993 /* read the token ID, translating to the new enum numbering */
1994 enum_id = enum_xlat[fp->read_int4()];
1995
1996 /* set the token-type match */
1997 tok->set_match_token_type(enum_id);
1998 break;
1999
2000 case TCGRAM_PART_OF_SPEECH:
2001 /* read the property ID, translating to the new numbering system */
2002 prop = prop_xlat[fp->read_int2()];
2003
2004 /* set the part of speech in the token */
2005 tok->set_match_part_of_speech(prop);
2006 break;
2007
2008 case TCGRAM_PART_OF_SPEECH_LIST:
2009 /* read the list length */
2010 len = (size_t)fp->read_int2();
2011
2012 /* set the type */
2013 tok->set_match_part_list();
2014
2015 /* read each element and add it to the list */
2016 for (i = 0 ; i < len ; ++i)
2017 tok->add_match_part_ele(prop_xlat[fp->read_int2()]);
2018
2019 /* done */
2020 break;
2021
2022 case TCGRAM_LITERAL:
2023 /* read the length of the string */
2024 len = (size_t)fp->read_int2();
2025
2026 /* allocate parser memory to hold the text */
2027 txt = (char *)G_prsmem->alloc(len);
2028
2029 /* read the text of the literal */
2030 fp->read_bytes(txt, len);
2031
2032 /* set the literal in the token */
2033 tok->set_match_literal(txt, len);
2034 break;
2035
2036 case TCGRAM_STAR:
2037 /* there's no additional data */
2038 tok->set_match_star();
2039 break;
2040
2041 case TCGRAM_UNKNOWN:
2042 /* no extra data to read */
2043 break;
2044 }
2045
2046 /* read and set the property association */
2047 tok->set_prop_assoc(prop_xlat[fp->read_int2()]);
2048
2049 /* return the token */
2050 return tok;
2051 }
2052