1 #ifdef RCSID
2 static char RCSid[] =
3 "$Header: d:/cvsroot/tads/tads3/TCPRSIMG.CPP,v 1.1 1999/07/11 00:46:53 MJRoberts Exp $";
4 #endif
5 
6 /*
7  *   Copyright (c) 1999, 2002 Michael J. Roberts.  All Rights Reserved.
8  *
9  *   Please see the accompanying license file, LICENSE.TXT, for information
10  *   on using and copying this software.
11  */
12 /*
13 Name
14   tcprsimg.cpp - TADS 3 Compiler Parser - image writing functions
15 Function
16 
17 Notes
18 
19 Modified
20   04/30/99 MJRoberts  - Creation
21 */
22 
23 #include <stdlib.h>
24 #include <string.h>
25 #include <stdio.h>
26 #include <assert.h>
27 
28 #include "os.h"
29 #include "t3std.h"
30 #include "tcprs.h"
31 #include "tctarg.h"
32 #include "tcgen.h"
33 #include "vmhash.h"
34 #include "tcmain.h"
35 #include "vmfile.h"
36 #include "tctok.h"
37 
38 
39 /* ------------------------------------------------------------------------ */
40 /*
41  *   Read an object file and load the global symbol table
42  */
load_object_file(class CVmFile * fp,const textchar_t * fname,tctarg_obj_id_t * obj_xlat,tctarg_prop_id_t * prop_xlat,ulong * enum_xlat)43 int CTcParser::load_object_file(class CVmFile *fp, const textchar_t *fname,
44                                 tctarg_obj_id_t *obj_xlat,
45                                 tctarg_prop_id_t *prop_xlat,
46                                 ulong *enum_xlat)
47 {
48     ulong sym_cnt;
49     ulong dict_cnt;
50     ulong i;
51     ulong anon_cnt;
52     ulong nonsym_cnt;
53     ulong prod_cnt;
54     ulong exp_cnt;
55 
56     /* read the number of symbol index entries */
57     sym_cnt = (long)fp->read_int4();
58     if (sym_cnt != 0)
59     {
60         /* allocate space for the symbol index list */
61         obj_sym_list_ = (CTcSymbol **)
62                         t3malloc(sym_cnt * sizeof(obj_sym_list_[0]));
63 
64         /* the list is empty so far */
65         obj_file_sym_idx_ = 0;
66     }
67 
68     /* read the number of dictionary symbols */
69     dict_cnt = (ulong)fp->read_int4();
70 
71     /* if there are any symbols, read them */
72     if (dict_cnt != 0)
73     {
74         /* allocate space for the dictionary index list */
75         obj_dict_list_ = (CTcDictEntry **)
76                          t3malloc(dict_cnt * sizeof(obj_dict_list_[0]));
77 
78         /* nothing in the list yet */
79         obj_file_dict_idx_ = 0;
80     }
81 
82     /* read the number of symbols in the file */
83     sym_cnt = (ulong)fp->read_int4();
84 
85     /* read the symbols */
86     for (i = 0 ; i < sym_cnt ; ++i)
87     {
88         /* load a symbol */
89         if (CTcSymbol::load_from_obj_file(fp, fname,
90                                           obj_xlat, prop_xlat, enum_xlat))
91             return 1;
92     }
93 
94     /* read the number of anonymous object symbols */
95     anon_cnt = (ulong)fp->read_int4();
96 
97     /* read the anonymous object symbols */
98     for (i = 0 ; i < anon_cnt ; ++i)
99     {
100         /* load the next anonymous object symbol */
101         if (CTcSymObj::load_from_obj_file(fp, fname, obj_xlat, TRUE))
102             return 1;
103     }
104 
105     /* read the non-symbol object ID's */
106     nonsym_cnt = (ulong)fp->read_int4();
107     for (i = 0 ; i < nonsym_cnt ; ++i)
108     {
109         tctarg_obj_id_t id;
110 
111         /* read the next non-symbol object ID */
112         id = (tctarg_obj_id_t)fp->read_int4();
113 
114         /*
115          *   allocate a new ID for the object, and set the translation
116          *   table for the new ID - this will ensure that references to
117          *   this non-symbol object are properly fixed up
118          */
119         obj_xlat[id] = G_cg->new_obj_id();
120     }
121 
122     /* read the number of symbol cross-reference sections in the file */
123     sym_cnt = (ulong)fp->read_int4();
124 
125     /* read the symbol cross-references */
126     for (i = 0 ; i < sym_cnt ; ++i)
127     {
128         ulong idx;
129         CTcSymbol *sym;
130 
131         /* read the symbol index */
132         idx = (ulong)fp->read_int4();
133 
134         /* get the symbol from the index list */
135         sym = get_objfile_sym(idx);
136 
137         /* load the symbol's reference information */
138         sym->load_refs_from_obj_file(fp, fname, obj_xlat, prop_xlat);
139     }
140 
141     /* read the number of anonymous object cross-references */
142     anon_cnt = (ulong)fp->read_int4();
143 
144     /* read the anonymous object cross-references */
145     for (i = 0 ; i < anon_cnt ; ++i)
146     {
147         ulong idx;
148         CTcSymbol *sym;
149 
150         /* read the symbol index */
151         idx = (ulong)fp->read_int4();
152 
153         /* get the symbol from the index list */
154         sym = get_objfile_sym(idx);
155 
156         /* load the symbol's reference information */
157         sym->load_refs_from_obj_file(fp, fname, obj_xlat, prop_xlat);
158     }
159 
160     /* read the master grammar rule count */
161     prod_cnt = (ulong)fp->read_int4();
162 
163     /* read the master grammar rule list */
164     for (i = 0 ; i < prod_cnt ; ++i)
165     {
166         /* read the next grammar production */
167         CTcGramProdEntry::load_from_obj_file(fp, prop_xlat, enum_xlat, 0);
168     }
169 
170     /* read the number of named grammar rules */
171     prod_cnt = (ulong)fp->read_int4();
172 
173     /* read the private grammar rules */
174     for (i = 0 ; i < prod_cnt ; ++i)
175     {
176         CTcSymObj *match_sym;
177 
178         /* read the match object defining the rule */
179         match_sym = get_objfile_objsym(fp->read_int4());
180 
181         /* read the private rule list */
182         CTcGramProdEntry::load_from_obj_file(
183             fp, prop_xlat, enum_xlat, match_sym);
184     }
185 
186     /* read the export symbol list */
187     exp_cnt = (ulong)fp->read_int4();
188     for (i = 0 ; i < exp_cnt ; ++i)
189     {
190         CTcPrsExport *exp;
191 
192         /* read the next entry */
193         exp = CTcPrsExport::read_from_obj_file(fp);
194 
195         /* if that failed, the whole load fails */
196         if (exp == 0)
197             return 1;
198 
199         /* add it to our list */
200         add_export_to_list(exp);
201     }
202 
203     /* done with the symbol index list - free it */
204     if (obj_sym_list_ != 0)
205     {
206         /* free it and forget it */
207         t3free(obj_sym_list_);
208         obj_sym_list_ = 0;
209     }
210 
211     /* done with the dictionary index list - free it */
212     if (obj_dict_list_ != 0)
213     {
214         /* free the memory and forget it */
215         t3free(obj_dict_list_);
216         obj_dict_list_ = 0;
217     }
218 
219     /* success */
220     return 0;
221 }
222 
223 
224 /* ------------------------------------------------------------------------ */
225 /*
226  *   Generate code and write the image file
227  */
build_image(class CVmFile * image_fp,uchar xor_mask,const char tool_data[4])228 void CTPNStmProg::build_image(class CVmFile *image_fp, uchar xor_mask,
229                               const char tool_data[4])
230 {
231     /* generate code */
232     if (gen_code_for_build())
233         return;
234 
235     /* scan the symbol table for unresolved external references */
236     if (G_prs->check_unresolved_externs())
237         return;
238 
239     /*
240      *   Finally, our task of constructing the program is complete.  All
241      *   that remains is to write the image file.  Tell the code generator
242      *   to begin the process.
243      */
244     G_cg->write_to_image(image_fp, xor_mask, tool_data);
245 }
246 
247 /* ------------------------------------------------------------------------ */
248 /*
249  *   Generate code and write the object file
250  */
build_object_file(class CVmFile * object_fp,class CTcMake * make_obj)251 void CTPNStmProg::build_object_file(class CVmFile *object_fp,
252                                     class CTcMake *make_obj)
253 {
254     /* generate code */
255     if (gen_code_for_build())
256         return;
257 
258     /*
259      *   Finally, our task of constructing the program is complete.  All
260      *   that remains is to write the image file.  Tell the code generator
261      *   to begin the process.
262      */
263     G_cg->write_to_object_file(object_fp, make_obj);
264 }
265 
266 /* ------------------------------------------------------------------------ */
267 /*
268  *   Generate code for a build, in preparation for writing an image file
269  *   or an object file.
270  */
gen_code_for_build()271 int CTPNStmProg::gen_code_for_build()
272 {
273     /* notify the tokenizer that parsing is done */
274     G_tok->parsing_done();
275 
276     /* notify the code generator that we're finished parsing */
277     G_cg->parsing_done();
278 
279     /* set the global symbol table in the code streams */
280     G_cs_main->set_symtab(G_prs->get_global_symtab());
281     G_cs_static->set_symtab(G_prs->get_global_symtab());
282 
283     /* generate code for the entire program */
284     gen_code(TRUE, TRUE);
285 
286     /*
287      *   if we encountered any errors generating code, don't bother
288      *   writing the image
289      */
290     if (G_tcmain->get_error_count() != 0)
291         return 1;
292 
293     /* return success */
294     return 0;
295 }
296 
297 /* ------------------------------------------------------------------------ */
298 /*
299  *   Check for unresolved external symbols.  Logs an error for each
300  *   unresolved external.
301  */
check_unresolved_externs()302 int CTcParser::check_unresolved_externs()
303 {
304     int errcnt;
305 
306     /* note the previous error count */
307     errcnt = G_tcmain->get_error_count();
308 
309     /* enumerate the entries with our unresolved check callback */
310     get_global_symtab()->enum_entries(&enum_sym_extref, this);
311 
312     /*
313      *   if the error count increased, we logged errors for unresolved
314      *   symbols
315      */
316     return (G_tcmain->get_error_count() > errcnt);
317 }
318 
319 /*
320  *   Enumeration callback - check for unresolved external references.  For
321  *   each object or function still marked "external," we'll log an error.
322  */
enum_sym_extref(void *,CTcSymbol * sym)323 void CTcParser::enum_sym_extref(void *, CTcSymbol *sym)
324 {
325     /* if it's an external symbol, log an error */
326     if (sym->is_unresolved_extern())
327         G_tcmain->log_error(0, 0, TC_SEV_ERROR, TCERR_UNRESOLVED_EXTERN,
328                             (int)sym->get_sym_len(), sym->get_sym());
329 }
330 
331 
332 /* ------------------------------------------------------------------------ */
333 /*
334  *   Build dictionaries.  We go through all objects and insert their
335  *   vocabulary words into their dictionaries.
336  */
build_dictionaries()337 void CTcParser::build_dictionaries()
338 {
339     CTcDictEntry *dict;
340     CTcSymObj *sym;
341 
342     /*
343      *   enumerate our symbols to insert dictionary words - this will
344      *   populate each dictionary's hash table with a complete list of the
345      *   words and object associations for the dictionary
346      */
347     get_global_symtab()->enum_entries(&enum_sym_dict, this);
348 
349     /* do the same for the anonymous objects */
350     for (sym = anon_obj_head_ ; sym != 0 ; sym = (CTcSymObj *)sym->nxt_)
351         sym->build_dictionary();
352 
353     /* generate the object stream for each dictionary */
354     for (dict = dict_head_ ; dict != 0 ; dict = dict->get_next())
355     {
356         /* generate the code (static data, actually) for this dictionary */
357         G_cg->gen_code_for_dict(dict);
358     }
359 }
360 
361 /*
362  *   enumeration callback - build dictionaries
363  */
enum_sym_dict(void *,CTcSymbol * sym)364 void CTcParser::enum_sym_dict(void *, CTcSymbol *sym)
365 {
366     /* tell this symbol to build its dictionary entries */
367     sym->build_dictionary();
368 }
369 
370 /* ------------------------------------------------------------------------ */
371 /*
372  *   Build grammar productions
373  */
build_grammar_productions()374 void CTcParser::build_grammar_productions()
375 {
376     CTcGramProdEntry *entry;
377 
378     /*
379      *   First, run through the symbol table and merge all of the private
380      *   grammar rules into the master grammar rule list.  Since we've
381      *   finished linking, we've already applied all modify/replace
382      *   overrides, hence each symbol table entry referring to an object
383      *   will contain its final private grammar rule list.  So, we can
384      *   safely merge the private lists into the master lists at this point,
385      *   since no more modifications to private lists are possible.
386      */
387     get_global_symtab()->enum_entries(&build_grammar_cb, this);
388 
389     /*
390      *   iterate over the master list of productions and generate the image
391      *   data for each one
392      */
393     for (entry = gramprod_head_ ; entry != 0 ; entry = entry->get_next())
394     {
395         /* build this entry */
396         G_cg->gen_code_for_gramprod(entry);
397     }
398 }
399 
400 /*
401  *   Symbol table enumeration callback - merge match object private grammar
402  *   rules into the master grammar rule list.
403  */
build_grammar_cb(void *,CTcSymbol * sym)404 void CTcParser::build_grammar_cb(void *, CTcSymbol *sym)
405 {
406     /* if this is an object, merge its private grammar list */
407     if (sym->get_type() == TC_SYM_OBJ)
408         ((CTcSymObj *)sym)->merge_grammar_entry();
409 }
410 
411 /* ------------------------------------------------------------------------ */
412 /*
413  *   Apply self-reference object ID fixups.  This traverses the symbol
414  *   table and applies each object's list of fixups.  This can be called
415  *   once after loading all object files.
416  */
apply_internal_fixups()417 void CTcParser::apply_internal_fixups()
418 {
419     CTcSymObj *anon_obj;
420 
421     /* enumerate the entries with our callback */
422     get_global_symtab()->enum_entries(&enum_sym_internal_fixup, this);
423 
424     /* apply internal fixups to our anonymous objects */
425     for (anon_obj = anon_obj_head_ ; anon_obj != 0 ;
426          anon_obj = (CTcSymObj *)anon_obj->nxt_)
427     {
428         /* apply internal fixups to this symbol */
429         anon_obj->apply_internal_fixups();
430     }
431 }
432 
433 /*
434  *   Enumeration callback - apply internal ID fixups
435  */
enum_sym_internal_fixup(void *,CTcSymbol * sym)436 void CTcParser::enum_sym_internal_fixup(void *, CTcSymbol *sym)
437 {
438     /* apply its self-reference fixups */
439     sym->apply_internal_fixups();
440 }
441 
442 /* ------------------------------------------------------------------------ */
443 /*
444  *   Basic symbol class - image/object file functions
445  */
446 
447 /*
448  *   Read a symbol from an object file
449  */
load_from_obj_file(CVmFile * fp,const textchar_t * fname,tctarg_obj_id_t * obj_xlat,tctarg_prop_id_t * prop_xlat,ulong * enum_xlat)450 int CTcSymbolBase::load_from_obj_file(CVmFile *fp, const textchar_t *fname,
451                                       tctarg_obj_id_t *obj_xlat,
452                                       tctarg_prop_id_t *prop_xlat,
453                                       ulong *enum_xlat)
454 {
455     tc_symtype_t typ;
456 
457     /*
458      *   read the type - this is the one thing we know is always present
459      *   for every symbol (the rest of the data might vary per subclass)
460      */
461     typ = (tc_symtype_t)fp->read_uint2();
462 
463     /* create the object based on the type */
464     switch(typ)
465     {
466     case TC_SYM_FUNC:
467         return CTcSymFunc::load_from_obj_file(fp, fname);
468 
469     case TC_SYM_OBJ:
470         return CTcSymObj::load_from_obj_file(fp, fname, obj_xlat, FALSE);
471 
472     case TC_SYM_PROP:
473         return CTcSymProp::load_from_obj_file(fp, fname, prop_xlat);
474 
475     case TC_SYM_ENUM:
476         return CTcSymEnum::load_from_obj_file(fp, fname, enum_xlat);
477 
478     case TC_SYM_BIF:
479         return CTcSymBif::load_from_obj_file(fp, fname);
480 
481     case TC_SYM_METACLASS:
482         return CTcSymMetaclass::load_from_obj_file(fp, fname, obj_xlat);
483 
484     default:
485         /* other types should not be in an object file */
486         G_tcmain->log_error(0, 0, TC_SEV_ERROR, TCERR_OBJFILE_INV_TYPE);
487 
488         /* return an error indication */
489         return 1;
490     }
491 }
492 
493 /*
494  *   Log a conflict with another symbol from an object file
495  */
log_objfile_conflict(const textchar_t * fname,tc_symtype_t new_type) const496 void CTcSymbolBase::log_objfile_conflict(const textchar_t *fname,
497                                          tc_symtype_t new_type) const
498 {
499     static const textchar_t *type_name[] =
500     {
501         "unknown", "function", "object", "property", "local",
502         "parameter", "intrinsic function", "native function", "code label",
503         "intrinsic class", "enum"
504     };
505 
506     /*
507      *   if the types differ, log an error indicating the different types;
508      *   otherwise, simply log an error indicating the redefinition
509      */
510     if (new_type != get_type())
511     {
512         /* the types differ - show the two types */
513         G_tcmain->log_error(0, 0, TC_SEV_ERROR, TCERR_OBJFILE_REDEF_SYM_TYPE,
514                             (int)get_sym_len(), get_sym(),
515                             type_name[get_type()], type_name[new_type],
516                             fname);
517     }
518     else
519     {
520         /* the types are the same */
521         G_tcmain->log_error(0, 0, TC_SEV_ERROR, TCERR_OBJFILE_REDEF_SYM,
522                             (int)get_sym_len(), get_sym(),
523                             type_name[new_type], fname);
524     }
525 }
526 
527 
528 /* ------------------------------------------------------------------------ */
529 /*
530  *   Function Symbol subclass - image/object file functions
531  */
532 
533 /*
534  *   Load from an object file
535  */
load_from_obj_file(CVmFile * fp,const textchar_t * fname)536 int CTcSymFuncBase::load_from_obj_file(CVmFile *fp,
537                                        const textchar_t *fname)
538 {
539     const char *txt;
540     size_t len;
541     char buf[9];
542     int is_extern;
543     int ext_replace;
544     int ext_modify;
545     int has_retval;
546     int varargs;
547     int argc;
548     int mod_base_cnt;
549     CTcSymFunc *sym;
550 
551     /* read the symbol name information */
552     txt = base_read_from_sym_file(fp);
553     len = strlen(txt);
554 
555     /* read our extra data */
556     fp->read_bytes(buf, 9);
557     argc = osrp2(buf);
558     varargs = buf[2];
559     has_retval = buf[3];
560     is_extern = buf[4];
561     ext_replace = buf[5];
562     ext_modify = buf[6];
563     mod_base_cnt = osrp2(buf + 7);
564 
565     /* look up any existing symbol */
566     sym = (CTcSymFunc *)G_prs->get_global_symtab()->find(txt, len);
567 
568     /*
569      *   If this symbol is already defined, make sure the original
570      *   definition is a function, and make sure that it's only defined
571      *   (not referenced as external) once.  If it's not defined, define
572      *   it anew.
573      */
574     if (sym == 0)
575     {
576         /*
577          *   It's not defined yet - create the new definition and add it
578          *   to the symbol table.
579          */
580         sym = new CTcSymFunc(txt, len, FALSE, argc, varargs, has_retval,
581                              is_extern);
582         G_prs->get_global_symtab()->add_entry(sym);
583 
584         /* it's an error if we're replacing a previously undefined function */
585         if (ext_replace || ext_modify)
586             G_tcmain->log_error(0, 0, TC_SEV_ERROR,
587                                 TCERR_OBJFILE_REPFUNC_BEFORE_ORIG,
588                                 (int)len, txt, fname);
589     }
590     else if (sym->get_type() != TC_SYM_FUNC
591              || (!sym->is_extern()
592                  && !is_extern && !ext_replace && !ext_modify))
593     {
594         /*
595          *   It's already defined, but it's not a function, or this is a
596          *   non-extern/replacd definition and the symbol is already
597          *   defined non-extern - log a symbol type conflict error.
598          */
599         sym->log_objfile_conflict(fname, TC_SYM_FUNC);
600 
601         /*
602          *   proceed despite the error, since this is merely a symbol
603          *   conflict and not a file corruption - create a fake symbol to
604          *   hold the information, so that we can read the data and thus
605          *   keep in sync with the file, but don't bother adding the fake
606          *   symbol object to the symbol table
607          */
608         sym = new CTcSymFunc(txt, len, FALSE, argc, varargs, has_retval,
609                              is_extern);
610     }
611     else if (sym->get_argc() != argc
612              || sym->is_varargs() != varargs
613              || sym->has_retval() != has_retval)
614     {
615         /* the symbol has an incompatible definition - log the error */
616         G_tcmain->log_error(0, 0, TC_SEV_ERROR, TCERR_OBJFILE_FUNC_INCOMPAT,
617                             (int)len, txt, fname);
618     }
619 
620     /*
621      *   if this is a non-extern definition, we now have the object
622      *   defined -- remove the 'extern' flag from the symbol table entry
623      *   in this case
624      */
625     if (!is_extern)
626     {
627         /* mark the symbol as defined */
628         sym->set_extern(FALSE);
629 
630         /*
631          *   if we're replacing it, delete the original; if we're modifying
632          *   it, chain the original into our modify list
633          */
634         if (ext_replace)
635         {
636             int i;
637 
638             /*
639              *   mark the previous code anchor as obsolete so that we
640              *   don't write its code to the image file
641              */
642             if (sym->get_anchor() != 0)
643                 sym->get_anchor()->set_replaced(TRUE);
644 
645             /*
646              *   Mark all of the modified base function code offsets as
647              *   replaced as well.
648              */
649             for (i = 0 ; i < sym->get_mod_base_offset_count() ; ++i)
650             {
651                 CTcStreamAnchor *anchor;
652 
653                 /* get the anchor for this offset */
654                 anchor = G_cs->find_anchor(sym->get_mod_base_offset(i));
655 
656                 /* mark it as replaced */
657                 if (anchor != 0)
658                     anchor->set_replaced(TRUE);
659             }
660 
661             /*
662              *   We can now forget everything in the modify base list, as
663              *   everything in the list is being replaced and is thus no
664              *   longer relevant.
665              */
666             sym->clear_mod_base_offsets();
667         }
668         else if (ext_modify)
669         {
670             /*
671              *   We're modifying an external symbol.  The anchor to the code
672              *   stream object that we previously loaded is actually the
673              *   anchor to the modified base object, not to the new meaning
674              *   of the symbol, so detach the anchor from our symbol.
675              */
676             sym->get_anchor()->detach_from_symbol();
677 
678             /*
679              *   The object file has a fixup list for references to the
680              *   external base object that we're modifying.  In other words,
681              *   these are external references from the object file we're
682              *   loading to the now-nameless code stream object that we're
683              *   replacing, which is the code stream object at our anchor.
684              *   So, load those fixups into the anchor's new internal fixup
685              *   list.  It's important to note that these aren't references
686              *   to this symbol - they're specifically references to the
687              *   modified base code stream object.
688              */
689             CTcAbsFixup::load_fixup_list_from_object_file(
690                 fp, fname, sym->get_anchor()->fixup_list_head_);
691 
692             /*
693              *   Add the old code stream anchor to the list of modified base
694              *   offsets for the function.  The function we're reading from
695              *   the object file modifies this as a base function, so we need
696              *   to add this to the list of modified base functions.
697              */
698             sym->add_mod_base_offset(sym->get_anchor()->get_ofs());
699 
700             /*
701              *   Complete the dissociation from the anchor by forgetting the
702              *   anchor in the symbol.  This will allow the code stream
703              *   object that's associated with this symbol in the current
704              *   file to take over the anchor duty for this symbol, which
705              *   will ensure that all fixups that reference this symbol will
706              *   be resolved to the new code stream object.
707              */
708             sym->set_anchor(0);
709         }
710     }
711 
712     /*
713      *   Read the list of modified base function offsets.  Each entry is a
714      *   code stream offset, so adjust each using the base code stream offset
715      *   for this object file.
716      */
717     for ( ; mod_base_cnt != 0 ; --mod_base_cnt)
718     {
719         int i;
720 
721         /* read them */
722         for (i = 0 ; i < mod_base_cnt ; ++i)
723         {
724             /* read the offset, adjusting to the object file start position */
725             ulong ofs = fp->read_int4() + G_cs->get_object_file_start_ofs();
726 
727             /* append this item */
728             sym->add_mod_base_offset(ofs);
729         }
730     }
731 
732     /* if it's extern, load the fixup list */
733     if (is_extern)
734     {
735         /*
736          *   This is an external reference, so we must load our fixup
737          *   list, adding it to any fixup list that already exists with
738          *   the symbol.
739          */
740         CTcAbsFixup::
741             load_fixup_list_from_object_file(fp, fname, &sym->fixups_);
742     }
743 
744     /* success */
745     return 0;
746 }
747 
748 /* ------------------------------------------------------------------------ */
749 /*
750  *   object symbol entry base - image/object file functions
751  */
752 
753 /*
754  *   Load from an object file
755  */
load_from_obj_file(CVmFile * fp,const textchar_t * fname,tctarg_obj_id_t * obj_xlat,int anon)756 int CTcSymObjBase::load_from_obj_file(CVmFile *fp,
757                                       const textchar_t *fname,
758                                       tctarg_obj_id_t *obj_xlat,
759                                       int anon)
760 {
761     /*
762      *   do the main loading - if it fails to return a symbol, return
763      *   failure (i.e., non-zero)
764      */
765     return (load_from_obj_file_main(fp, fname, obj_xlat, 0, 0, anon) == 0);
766 }
767 
768 /*
769  *   Load a modified base object from an object file
770  */
771 CTcSymObj *CTcSymObjBase::
load_from_obj_file_modbase(class CVmFile * fp,const textchar_t * fname,tctarg_obj_id_t * obj_xlat,const textchar_t * mod_name,size_t mod_name_len,int anon)772    load_from_obj_file_modbase(class CVmFile *fp, const textchar_t *fname,
773                               tctarg_obj_id_t *obj_xlat,
774                               const textchar_t *mod_name,
775                               size_t mod_name_len, int anon)
776 {
777     /* skip the type prefix - we know it's an object */
778     fp->read_uint2();
779 
780     /* load the object and return the symbol */
781     return load_from_obj_file_main(fp, fname, obj_xlat,
782                                    mod_name, mod_name_len, anon);
783 }
784 
785 /*
786  *   Load from an object file.  This main routine does most of the work,
787  *   and returns the loaded symbol.
788  *
789  *   'mod_name' is the primary symbol name for a stack of 'modify'
790  *   objects.  Each of the objects in a 'modify' stack, except for the
791  *   topmost (i.e., last defined) object, has a fake symbol name, since
792  *   the program can't refer directly to the base object once modified.
793  *   However, while loading, we must know the actual name for the entire
794  *   stack, so that we can link the bottom of the stack in this object
795  *   file to the top of the stack in another object file if the bottom of
796  *   our stack is declared external (i.e., this object file's source code
797  *   used 'modify' with an external object).  If we're loading a top-level
798  *   object, not a modified object, 'mod_name' should be null.
799  */
800 CTcSymObj *CTcSymObjBase::
load_from_obj_file_main(CVmFile * fp,const textchar_t * fname,tctarg_obj_id_t * obj_xlat,const textchar_t * mod_name,size_t mod_name_len,int anon)801    load_from_obj_file_main(CVmFile *fp, const textchar_t *fname,
802                            tctarg_obj_id_t *obj_xlat,
803                            const textchar_t *mod_name, size_t mod_name_len,
804                            int anon)
805 {
806     const char *txt;
807     size_t len;
808     char buf[32];
809     ulong id;
810     int is_extern;
811     int stream_ofs_valid;
812     ulong stream_ofs;
813     CTcSymObj *sym;
814     CTcSymObj *mod_base_sym;
815     int modify_flag;
816     int ext_modify_flag;
817     int ext_replace_flag;
818     int modified_flag;
819     int class_flag;
820     CTcIdFixup *fixups;
821     CTcObjPropDel *del_prop_head;
822     tc_metaclass_t meta;
823     uint dict_idx;
824     int use_fake_sym;
825     uint obj_file_idx;
826     int trans_flag;
827 
828     /* presume we won't have to use a fake symbol */
829     use_fake_sym = FALSE;
830 
831     /* presume we won't be able to read a stream offset */
832     stream_ofs_valid = FALSE;
833 
834     /* read the symbol name information if it's not anonymous */
835     if (!anon)
836     {
837         /* read the symbol name */
838         txt = base_read_from_sym_file(fp);
839         if (txt == 0)
840             return 0;
841     }
842     else
843     {
844         /* use ".anon" as our symbol name placeholder */
845         txt = ".anon";
846     }
847 
848     /* get the symbol len */
849     len = strlen(txt);
850 
851     /* read our extra data */
852     fp->read_bytes(buf, 17);
853     id = osrp4(buf);
854     is_extern = buf[4];
855     ext_replace_flag = buf[5];
856     modified_flag = buf[6];
857     modify_flag = buf[7];
858     ext_modify_flag = buf[8];
859     class_flag = buf[9];
860     trans_flag = buf[10];
861     meta = (tc_metaclass_t)osrp2(buf + 11);
862     dict_idx = osrp2(buf + 13);
863     obj_file_idx = osrp2(buf + 15);
864 
865     /*
866      *   if we're not external, read our stream offset, and adjust for the
867      *   object stream base in the object file
868      */
869     if (!is_extern)
870     {
871         CTcDataStream *stream;
872 
873         /* get the appropriate stream */
874         stream = get_stream_from_meta(meta);
875 
876         /* read the relative stream offset */
877         stream_ofs = fp->read_int4();
878 
879         /*
880          *   Ensure the stream offset was actually valid.  It must be valid
881          *   unless the object has no stream (for example, dictionary and
882          *   grammar production objects are not generated until link time,
883          *   hence they don't have to have - indeed, can't have - valid
884          *   stream offsets when we're loading an object file).
885          */
886         assert(stream_ofs != 0xffffffff || stream == 0);
887 
888         /* determine if it's valid */
889         if (stream_ofs != 0xffffffff)
890         {
891             /* adjust it relative to this object file's stream base */
892             stream_ofs += stream->get_object_file_start_ofs();
893 
894             /* note that it's valid */
895             stream_ofs_valid = TRUE;
896         }
897         else
898         {
899             /* the stream offset is not valid */
900             stream_ofs_valid = FALSE;
901         }
902     }
903 
904     /* we have no deleted properties yet */
905     del_prop_head = 0;
906 
907     /* if this is a 'modify' object, read some additional data */
908     if (modify_flag)
909     {
910         uint cnt;
911 
912         /* read the deleted property list */
913         for (cnt = fp->read_uint2() ; cnt != 0 ; --cnt)
914         {
915             const char *prop_name;
916             CTcSymProp *prop_sym;
917 
918             /* read the symbol name from the file */
919             prop_name = base_read_from_sym_file(fp);
920             if (prop_name == 0)
921                 return 0;
922 
923             /*
924              *   find the property symbol, or define it if it's not
925              *   already defined as a property
926              */
927             prop_sym = (CTcSymProp *)G_prs->get_global_symtab()
928                        ->find_or_def_prop(prop_name, strlen(prop_name),
929                                           FALSE);
930 
931             /* make sure it's a property */
932             if (prop_sym->get_type() != TC_SYM_PROP)
933             {
934                 /* it's not a property - log the conflict */
935                 prop_sym->log_objfile_conflict(fname, TC_SYM_PROP);
936             }
937             else
938             {
939                 /* add the entry to my list */
940                 add_del_prop_to_list(&del_prop_head, prop_sym);
941             }
942         }
943     }
944 
945     /* read the self-reference fixup list */
946     fixups = 0;
947     CTcIdFixup::load_object_file(fp, 0, 0, TCGEN_XLAT_OBJ,
948                                  4, fname, &fixups);
949 
950     /*
951      *   if this is a 'modify' object, load the base object - this is the
952      *   original version of the object, which this object modifies
953      */
954     if (modify_flag)
955     {
956         /*
957          *   Load the base object - pass the top-level object's name
958          *   (which is our own name if the caller didn't pass an enclosing
959          *   top-level object to us).  Note that we must read, and can
960          *   immediately discard, the type data in the object file - we
961          *   know that the base symbol is going to be an object, since we
962          *   always write it out at this specific place in the file, but
963          *   we will have written the type information anyway; thus, we
964          *   don't need the type information, but we must at least skip it
965          *   in the file.
966          */
967         mod_base_sym =
968             load_from_obj_file_modbase(fp, fname, obj_xlat,
969                                        mod_name != 0 ? mod_name : txt,
970                                        mod_name != 0 ? mod_name_len : len,
971                                        FALSE);
972 
973         /* if that failed, return failure */
974         if (mod_base_sym == 0)
975             return 0;
976     }
977     else
978     {
979         /* we have no 'modify' base symbol */
980         mod_base_sym = 0;
981     }
982 
983     /*
984      *   If this is a 'modifed extern' symbol, it's just a placeholder to
985      *   connect the bottom object in the stack of modified objects in
986      *   this file with the top object in another object file.
987      */
988     if (is_extern && modified_flag)
989     {
990         CTcSymObj *mod_sym;
991 
992         /*
993          *   We're modifying an external object.  This must be the bottom
994          *   object in the stack for this object file, and serves as a
995          *   placeholder for the top object in a stack in another object
996          *   file.  We must find the object with the name of our top-level
997          *   object (not the fake name for this modified base object, but
998          *   the real name for the top-level object, because the symbol
999          *   we're modifying in the other file is the top object in its
1000          *   stack, if any).  So, look up the symbol in the other file,
1001          *   which must already be loaded.
1002          */
1003         sym = (CTcSymObj *)
1004               G_prs->get_global_symtab()->find(mod_name, mod_name_len);
1005 
1006         /*
1007          *   If the original base symbol wasn't an object of metaclass
1008          *   "TADS Object", we can't modify it.
1009          */
1010         if (sym != 0
1011             && (sym->get_type() != TC_SYM_OBJ
1012                 || sym->get_metaclass() != TC_META_TADSOBJ))
1013         {
1014             /* log an error */
1015             G_tcmain->log_error(0, 0, TC_SEV_ERROR,
1016                                 TCERR_OBJFILE_CANNOT_MOD_OR_REP_TYPE,
1017                                 (int)sym->get_sym_len(), sym->get_sym(),
1018                                 fname);
1019 
1020             /* forget the symbol */
1021             sym = 0;
1022         }
1023 
1024         /* create a synthesized object to hold the original definition */
1025         mod_sym = synthesize_modified_obj_sym(FALSE);
1026 
1027         /* transfer data to the new fake symbol */
1028         if (sym != 0)
1029         {
1030             /*
1031              *   'sym' has the original version of the object from the
1032              *   other object file - the original object file must be
1033              *   loaded before an object file that modifies a symbol it
1034              *   exports, so 'sym' will definitely be present in this case
1035              *   (it's an error - undefined external symbol - that we will
1036              *   have already caught if it's not defined).  We want to
1037              *   hijack 'sym' for our own use, since 'modify' replaces the
1038              *   symbol's meaning with the new object data.
1039              *
1040              *   Transfer the self-reference fixup list from the original
1041              *   version of the object to the new synthesized object --
1042              *   all of the self-references must now refer to the
1043              *   renumbered object.
1044              *
1045              *   This is really all we need to do to renumber the object.
1046              *   By moving the self-fixup list to the new fake object, we
1047              *   ensure that the original object will use its new number,
1048              *   which leaves the original number for our use in the new,
1049              *   modifying object (i.e., the one we're loading now).  Note
1050              *   that we'll replace the self-fixup list for this symbol
1051              *   with the fixup list of the modifying symbol, below.
1052              */
1053             mod_sym->set_fixups(sym->get_fixups());
1054 
1055             /*
1056              *   Give the modified fake symbol the original pre-modified
1057              *   object data stream.  The fake symbol owns the
1058              *   pre-modified data stream because it's the pre-modified
1059              *   object.
1060              */
1061             mod_sym->set_stream_ofs(sym->get_stream_ofs());
1062 
1063             /*
1064              *   transfer the 'modify' base symbol from the original
1065              *   version of this symbol to the new fake version
1066              */
1067             mod_sym->set_mod_base_sym(sym->get_mod_base_sym());
1068 
1069             /* transfer the property deletion list */
1070             mod_sym->set_del_prop_head(sym->get_first_del_prop());
1071             sym->set_del_prop_head(0);
1072 
1073             /*
1074              *   mark the original object as a 'class' object - it might
1075              *   have been compiled as a normal instance in its own
1076              *   translation unit, but it's now a class because it's the
1077              *   base class for this link-time 'modify'
1078              */
1079             mod_sym->mark_compiled_as_class();
1080 
1081             /* transfer the dictionary to the base symbol */
1082             mod_sym->set_dict(sym->get_dict());
1083 
1084             /* copy the class flag to the base symbol */
1085             mod_sym->set_is_class(sym->is_class());
1086 
1087             /* set our class flag to the one from the original symbol */
1088             class_flag = sym->is_class();
1089 
1090             /*
1091              *   transfer the superclass list from the original symbol to
1092              *   the modified base symbol
1093              */
1094             mod_sym->set_sc_head(sym->get_sc_head());
1095             sym->set_sc_head(0);
1096 
1097             /* transfer the vocabulary list to the modified base symbol */
1098             mod_sym->set_vocab_head(sym->get_vocab_head());
1099             sym->set_vocab_head(0);
1100         }
1101 
1102         /* do the remaining loading into the synthesized placeholder */
1103         sym = mod_sym;
1104     }
1105     else if (modified_flag)
1106     {
1107         /*
1108          *   The symbol was modified, so the name is fake.  Because the
1109          *   name is tied to the object ID, which can change between the
1110          *   the time of writing the object file and now, when we're
1111          *   loading the object file, we must synthesize a new fake name
1112          *   in the context of the loaded object file.  The name is based
1113          *   on the object number, which is why it must be re-synthesized
1114          *   - the object number in this scheme can be different than the
1115          *   original object number in the object file.
1116          */
1117         sym = synthesize_modified_obj_sym(FALSE);
1118 
1119         /* set the appropriate metaclass */
1120         sym->set_metaclass(meta);
1121     }
1122     else if (anon)
1123     {
1124         /*
1125          *   we will definitely not find a previous entry for an anonymous
1126          *   symbol, because there's no name to look up
1127          */
1128         sym = 0;
1129     }
1130     else
1131     {
1132         /*
1133          *   normal object - look up a previous definition of the symbol
1134          *   in the global symbol table
1135          */
1136         sym = (CTcSymObj *)G_prs->get_global_symtab()->find(txt, len);
1137     }
1138 
1139     /*
1140      *   If this symbol is already defined, make sure the original
1141      *   definition is an object, and make sure that it's only defined
1142      *   (not referenced as external) once.  If it's not defined, define
1143      *   it anew.
1144      */
1145     if (sym != 0 && sym->get_type() != TC_SYM_OBJ)
1146     {
1147         /*
1148          *   It's already defined, but it's not an object - log a symbol
1149          *   type conflict error
1150          */
1151         sym->log_objfile_conflict(fname, TC_SYM_OBJ);
1152 
1153         /*
1154          *   proceed despite the error, since this is merely a symbol
1155          *   conflict and not a file corruption - create a fake symbol to
1156          *   hold the data of the original symbol so we can continue
1157          *   loading
1158          */
1159         sym = 0;
1160         use_fake_sym = TRUE;
1161     }
1162     else if ((ext_replace_flag || ext_modify_flag)
1163              && sym != 0 && sym->get_metaclass() != TC_META_TADSOBJ)
1164     {
1165         /* cannot modify or replace anything but an ordinary object */
1166         G_tcmain->log_error(0, 0, TC_SEV_ERROR,
1167                             TCERR_OBJFILE_CANNOT_MOD_OR_REP_TYPE,
1168                             (int)sym->get_sym_len(), sym->get_sym(),
1169                             fname);
1170 
1171         /* forget that we're doing a replacement */
1172         ext_replace_flag = ext_modify_flag = FALSE;
1173     }
1174     else if (sym != 0
1175              && (sym->get_metaclass() == TC_META_DICT
1176                  || sym->get_metaclass() == TC_META_GRAMPROD))
1177     {
1178         /*
1179          *   If this is a dictionary or grammar production object, and the
1180          *   original definition was of the same metaclass, allow the
1181          *   multiple definitions without conflict - just treat the new
1182          *   definition as external.  These object types don't require a
1183          *   primary definition - every time such an object is defined,
1184          *   it's a definition, but the same definition can appear in
1185          *   multiple object files without conflict.  Simply act as though
1186          *   this new declaration is extern after all in this case.
1187          */
1188         if (meta == sym->get_metaclass())
1189         {
1190             /*
1191              *   it's another one of the same type - allow it without
1192              *   conflict; act as though this new definition is external
1193              */
1194             if (!sym->is_extern())
1195                 is_extern = FALSE;
1196         }
1197         else
1198         {
1199             /* the other one's of a different type - log a conflict */
1200             sym->log_objfile_conflict(fname, TC_SYM_OBJ);
1201 
1202             /* proceed with a fake symbol */
1203             sym = 0;
1204             use_fake_sym = TRUE;
1205         }
1206     }
1207     else if ((ext_replace_flag || ext_modify_flag)
1208              && (sym == 0 || sym->is_extern()))
1209     {
1210         /*
1211          *   This symbol isn't defined yet, or is only defined as an
1212          *   external, but the new symbol is marked as 'replace' or
1213          *   'modify' - it's an error, because the original version of an
1214          *   object must always be loaded before the replaced or modified
1215          *   version
1216          */
1217         G_tcmain->log_error(0, 0, TC_SEV_ERROR,
1218                             TCERR_OBJFILE_MODREPOBJ_BEFORE_ORIG,
1219                             (int)len, txt, fname);
1220 
1221         /* forget the symbol */
1222         sym = 0;
1223     }
1224     else if (sym != 0
1225              && !sym->is_extern()
1226              && !(is_extern || ext_modify_flag || ext_replace_flag
1227                   || modified_flag))
1228     {
1229         /*
1230          *   the symbol was already defined, and this is a new actual
1231          *   definition (not external, and not replace or modify) -- this
1232          *   is an error because it means the same object is defined more
1233          *   than once
1234          */
1235         sym->log_objfile_conflict(fname, TC_SYM_OBJ);
1236 
1237         /*
1238          *   proceed despite the error, since this is merely a symbol
1239          *   conflict and not a file corruption - create a fake symbol to
1240          *   hold the data of the original symbol so we can continue
1241          *   loading
1242          */
1243         sym = 0;
1244         use_fake_sym = TRUE;
1245     }
1246     else if (sym != 0 && meta != sym->get_metaclass())
1247     {
1248         /*
1249          *   the new symbol and the old symbol have different metaclasses
1250          *   - it's a conflict
1251          */
1252         sym->log_objfile_conflict(fname, TC_SYM_OBJ);
1253 
1254         /* proceed with a fake symbol */
1255         sym = 0;
1256         use_fake_sym = TRUE;
1257     }
1258 
1259     /* create the object if necessary */
1260     if (sym == 0)
1261     {
1262         /*
1263          *   The symbol isn't defined yet - create the new definition and
1264          *   add it to the symbol table.  Allocate a new object ID for the
1265          *   symbol in the normal fashion.
1266          */
1267         sym = new CTcSymObj(txt, len, FALSE, G_cg->new_obj_id(),
1268                             is_extern, meta, 0);
1269 
1270         /*
1271          *   if we're using a fake symbol, don't bother adding the symbol
1272          *   to the symbol table, since its only function is to allow us
1273          *   to finish reading the object file data (we won't actually try
1274          *   to link when using a fake symbol, since this always means
1275          *   that an error has made linking impossible; we'll proceed
1276          *   anyway so that we catch any other errors that remain to be
1277          *   found)
1278          *
1279          *   similarly, don't add the symbol if it's anonymous
1280          */
1281         if (!use_fake_sym && !anon)
1282             G_prs->get_global_symtab()->add_entry(sym);
1283 
1284         /* if it's anonymous, add it to the anonymous symbol list */
1285         if (anon)
1286             G_prs->add_anon_obj(sym);
1287     }
1288 
1289     /*
1290      *   If we're replacing the object, tell the code generator to get rid
1291      *   of the old object definition in the object stream -- delete the
1292      *   definition at the symbol's old stream offset.
1293      */
1294     if (ext_replace_flag)
1295         G_cg->notify_replace_object(sym->get_stream_ofs());
1296 
1297     /*
1298      *   If this is a non-extern definition, we now have the object
1299      *   defined -- remove the 'extern' flag from the symbol table entry,
1300      *   and set the symbol's data to the data we just read.  Do not
1301      *   transfer data to the symbol if this is an extern, since we want
1302      *   to use the existing data from the originally loaded object.
1303      */
1304     if (!is_extern)
1305     {
1306         /* clear the external flag */
1307         sym->set_extern(FALSE);
1308 
1309         /* set the object's stream offset, if we read one */
1310         if (stream_ofs_valid)
1311             sym->set_stream_ofs(stream_ofs);
1312 
1313         /* set the base 'modify' symbol if this symbol modifies another */
1314         if (mod_base_sym != 0)
1315             sym->set_mod_base_sym(mod_base_sym);
1316 
1317         /* set the new symbol's fixup list */
1318         sym->set_fixups(fixups);
1319 
1320         /* set the new symbol's deleted property list */
1321         sym->set_del_prop_head(del_prop_head);
1322 
1323         /*
1324          *   set the symbol's class flag - only add the class flag,
1325          *   because we might have already set the class flag for this
1326          *   symbol based on the external definition
1327          */
1328         if (class_flag)
1329             sym->set_is_class(class_flag);
1330     }
1331 
1332     /* add this symbol to the load file object index list */
1333     G_prs->add_sym_from_obj_file(obj_file_idx, sym);
1334 
1335     /* set the dictionary, if one was specified */
1336     if (dict_idx != 0)
1337         sym->set_dict(G_prs->get_obj_dict(dict_idx));
1338 
1339     /*
1340      *   if this is a dictionary symbol, add it to the dictionary fixup
1341      *   list
1342      */
1343     if (meta == TC_META_DICT)
1344         G_prs->add_dict_from_obj_file(sym);
1345 
1346 
1347     /*
1348      *   Set the translation table entry for the symbol.  We know the
1349      *   original ID local to the object file, and we know the new global
1350      *   object ID.
1351      */
1352     obj_xlat[id] = sym->get_obj_id();
1353 
1354     /* success */
1355     return sym;
1356 }
1357 
1358 /*
1359  *   Apply our self-reference fixups
1360  */
apply_internal_fixups()1361 void CTcSymObjBase::apply_internal_fixups()
1362 {
1363     CTcIdFixup *fixup;
1364     CTcObjPropDel *entry;
1365     CTcSymObj *mod_base;
1366 
1367     /* run through our list and apply each fixup */
1368     for (fixup = fixups_ ; fixup != 0 ; fixup = fixup->nxt_)
1369         fixup->apply_fixup(obj_id_, 4);
1370 
1371     /*
1372      *   If we're a 'modify' object, and we were based at compile-time on
1373      *   an object external to the translation unit in which this modified
1374      *   version of the object was defined, we'll have a property deletion
1375      *   list to be applied at link time.  Now is the time - go through
1376      *   our list and delete each property in each of our 'modify' base
1377      *   classes.  Don't delete the properties in our own object,
1378      *   obviously - just in our modified base classes.
1379      */
1380     for (mod_base = mod_base_sym_ ; mod_base != 0 ;
1381          mod_base = mod_base->get_mod_base_sym())
1382     {
1383         /* delete each property in our deletion list in this base class */
1384         for (entry = first_del_prop_ ; entry != 0 ; entry = entry->nxt_)
1385         {
1386             /* delete this property from the base object */
1387             mod_base->delete_prop_from_mod_base(entry->prop_sym_->get_prop());
1388 
1389             /* remove it from the base object's vocabulary list */
1390             mod_base->delete_vocab_prop(entry->prop_sym_->get_prop());
1391         }
1392     }
1393 }
1394 
1395 /*
1396  *   Merge my private grammar rules into the master rule list for the
1397  *   associated grammar production object.
1398  */
merge_grammar_entry()1399 void CTcSymObjBase::merge_grammar_entry()
1400 {
1401     CTcSymObj *prod_sym;
1402     CTcGramProdEntry *master_entry;
1403 
1404     /* if I don't have a grammar list, there's nothing to do */
1405     if (grammar_entry_ == 0)
1406         return;
1407 
1408     /* get the grammar production object my rules are associated with */
1409     prod_sym = grammar_entry_->get_prod_sym();
1410 
1411     /* get the master list for the production */
1412     master_entry = G_prs->get_gramprod_entry(prod_sym);
1413 
1414     /* move the alternatives from my private list to the master list */
1415     grammar_entry_->move_alts_to(master_entry);
1416 }
1417 
1418 
1419 /* ------------------------------------------------------------------------ */
1420 /*
1421  *   metaclass symbol base - image/object file functions
1422  */
1423 
1424 /*
1425  *   load from an object file
1426  */
1427 int CTcSymMetaclassBase::
load_from_obj_file(CVmFile * fp,const textchar_t * fname,tctarg_obj_id_t * obj_xlat)1428    load_from_obj_file(CVmFile *fp, const textchar_t *fname,
1429                       tctarg_obj_id_t *obj_xlat)
1430 {
1431     const char *txt;
1432     size_t len;
1433     int meta_idx;
1434     int prop_cnt;
1435     CTcSymMetaclass *sym;
1436     char buf[TOK_SYM_MAX_LEN + 1];
1437     CTcSymMetaProp *prop;
1438     int was_defined;
1439     tctarg_obj_id_t class_obj;
1440 
1441     /* read the symbol name */
1442     txt = base_read_from_sym_file(fp);
1443     len = strlen(txt);
1444 
1445     /* read the metaclass index, class object ID, and property count */
1446     fp->read_bytes(buf, 8);
1447     meta_idx = osrp2(buf);
1448     class_obj = osrp4(buf + 2);
1449     prop_cnt = osrp2(buf + 6);
1450 
1451     /* check for a previous definition */
1452     sym = (CTcSymMetaclass *)G_prs->get_global_symtab()->find(txt, len);
1453     if (sym == 0)
1454     {
1455         /* it's not defined yet - create the new definition */
1456         sym = new CTcSymMetaclass(txt, len, FALSE, meta_idx,
1457                                   G_cg->new_obj_id());
1458         G_prs->get_global_symtab()->add_entry(sym);
1459 
1460         /* note that it wasn't yet defined */
1461         was_defined = FALSE;
1462 
1463         /* set the metaclass symbol pointer in the dependency table */
1464         G_cg->set_meta_sym(meta_idx, sym);
1465     }
1466     else if (sym->get_type() != TC_SYM_METACLASS)
1467     {
1468         /* log a conflict */
1469         sym->log_objfile_conflict(fname, TC_SYM_METACLASS);
1470 
1471         /* forget the symbol */
1472         sym = 0;
1473         was_defined = FALSE;
1474     }
1475     else
1476     {
1477         /* if the metaclass index doesn't match, it's an error */
1478         if (sym->get_meta_idx() != meta_idx)
1479             G_tcmain->log_error(0, 0, TC_SEV_ERROR,
1480                                 TCERR_OBJFILE_METACLASS_IDX_CONFLICT,
1481                                 (int)len, txt, fname);
1482 
1483         /* note that it was previously defined */
1484         was_defined = TRUE;
1485 
1486         /* start with the first property */
1487         prop = sym->get_prop_head();
1488     }
1489 
1490     /* set the ID translation for the class object */
1491     if (sym != 0)
1492         obj_xlat[class_obj] = sym->get_class_obj();
1493 
1494     /* read the property names */
1495     for ( ; prop_cnt != 0 ; --prop_cnt)
1496     {
1497         int is_static;
1498 
1499         /* read the property symbol name */
1500         txt = base_read_from_sym_file(fp);
1501         len = strlen(txt);
1502 
1503         /* read the flags */
1504         fp->read_bytes(buf, 1);
1505         is_static = ((buf[0] & 1) != 0);
1506 
1507         /* check what we're doing */
1508         if (sym == 0)
1509         {
1510             /*
1511              *   we have a conflict, so we're just scanning the names to
1512              *   keep in sync with the file - ignore it
1513              */
1514         }
1515         else if (was_defined)
1516         {
1517             /*
1518              *   the metaclass was previously defined - simply check to
1519              *   ensure that this property matches the corresponding
1520              *   property (by list position) in the original definition
1521              */
1522             if (prop == 0)
1523             {
1524                 /*
1525                  *   we're past the end of the original definition's
1526                  *   property list - this is okay, as we can simply add
1527                  *   the properties in the new list (which must be a more
1528                  *   recent definition than the original one)
1529                  */
1530                 sym->add_prop(txt, len, fname, is_static);
1531             }
1532             else if (prop->prop_->get_sym_len() != len
1533                      || memcmp(prop->prop_->get_sym(), txt, len) != 0)
1534             {
1535                 /* this one doesn't match - it's an error */
1536                 G_tcmain->log_error(0, 0, TC_SEV_ERROR,
1537                                     TCERR_OBJFILE_METACLASS_PROP_CONFLICT,
1538                                     (int)len, txt,
1539                                     (int)prop->prop_->get_sym_len(),
1540                                     prop->prop_->get_sym(), fname);
1541             }
1542 
1543             /* move on to the next property in the list */
1544             if (prop != 0)
1545                 prop = prop->nxt_;
1546         }
1547         else
1548         {
1549             /*
1550              *   we're defining the metaclass anew - add this property to
1551              *   the metaclass's property list
1552              */
1553             sym->add_prop(txt, len, fname, is_static);
1554         }
1555     }
1556 
1557     /* read our modifier object flag */
1558     fp->read_bytes(buf, 1);
1559     if (buf[0] != 0)
1560     {
1561         /* laod the new object */
1562         CTcSymObj *mod_obj;
1563 
1564         /* we have a modification object - load it */
1565         mod_obj = CTcSymObj::load_from_obj_file_modbase(
1566             fp, fname, obj_xlat, 0, 0, FALSE);
1567 
1568         /*
1569          *   if the metaclass already has a modification object, then the
1570          *   bottom of the chain we just loaded modifies the top of the
1571          *   existing chain
1572          */
1573         if (sym->get_mod_obj() != 0)
1574         {
1575             CTcSymObj *obj;
1576             CTcSymObj *prv;
1577 
1578             /*
1579              *   Set the bottom of the new chain to point to the top of
1580              *   the existing chain.  The bottom object in each object
1581              *   file's modification chain is always a dummy root object;
1582              *   we'll thus find the second to last object in the new
1583              *   chain, and replace the pointer to its dummy root
1584              *   superclass with a pointer to the top of the
1585              *   previously-loaded chain that we're modifying.
1586              */
1587 
1588             /* find the second-to-last object in the new chain */
1589             for (prv = 0, obj = mod_obj ;
1590                  obj != 0 && obj->get_mod_base_sym() != 0 ;
1591                  prv = obj, obj = obj->get_mod_base_sym()) ;
1592 
1593             /*
1594              *   if we found the second-to-last object, set up the link
1595              *   back into the old chain
1596              */
1597             if (prv != 0)
1598                 prv->set_mod_base_sym(sym->get_mod_obj());
1599         }
1600 
1601         /* point the metaclass to the modification object */
1602         sym->set_mod_obj(mod_obj);
1603     }
1604 
1605     /* return success - the file appears well-formed */
1606     return 0;
1607 }
1608 
1609 
1610 /* ------------------------------------------------------------------------ */
1611 /*
1612  *   property symbol entry base - image/object file functions
1613  */
1614 
1615 /*
1616  *   Load from an object file
1617  */
load_from_obj_file(class CVmFile * fp,const textchar_t * fname,tctarg_prop_id_t * prop_xlat)1618 int CTcSymPropBase::load_from_obj_file(class CVmFile *fp,
1619                                        const textchar_t *fname,
1620                                        tctarg_prop_id_t *prop_xlat)
1621 {
1622     const char *txt;
1623     size_t len;
1624     ulong id;
1625     CTcSymProp *sym;
1626 
1627     /* read the symbol name information */
1628     txt = base_read_from_sym_file(fp);
1629     len = strlen(txt);
1630 
1631     /* read our property ID */
1632     id = (ulong)fp->read_int4();
1633 
1634     /*
1635      *   If this symbol is already defined, make sure the original
1636      *   definition is a property.  If it's not defined, define it anew.
1637      */
1638     sym = (CTcSymProp *)G_prs->get_global_symtab()->find(txt, len);
1639     if (sym == 0)
1640     {
1641         /*
1642          *   It's not defined yet - create the new definition and add it
1643          *   to the symbol table.  Allocate a new property ID for the
1644          *   symbol in the normal fashion.
1645          */
1646         sym = new CTcSymProp(txt, len, FALSE, G_cg->new_prop_id());
1647         G_prs->get_global_symtab()->add_entry(sym);
1648     }
1649     else if (sym->get_type() != TC_SYM_PROP)
1650     {
1651         /*
1652          *   It's not already defined as a property - log a symbol type
1653          *   conflict error
1654          */
1655         sym->log_objfile_conflict(fname, TC_SYM_PROP);
1656 
1657         /*
1658          *   proceed despite the error, since this is merely a symbol
1659          *   conflict and not a file corruption
1660          */
1661         return 0;
1662     }
1663 
1664     /*
1665      *   Set the translation table entry for the symbol.  We know the
1666      *   original ID local to the object file, and we know the new global
1667      *   property ID.
1668      */
1669     prop_xlat[id] = sym->get_prop();
1670 
1671     /* success */
1672     return 0;
1673 }
1674 
1675 /* ------------------------------------------------------------------------ */
1676 /*
1677  *   enumerator symbol entry base - image/object file functions
1678  */
1679 
1680 /*
1681  *   Load from an object file
1682  */
load_from_obj_file(class CVmFile * fp,const textchar_t * fname,ulong * enum_xlat)1683 int CTcSymEnumBase::load_from_obj_file(class CVmFile *fp,
1684                                        const textchar_t *fname,
1685                                        ulong *enum_xlat)
1686 {
1687     const char *txt;
1688     size_t len;
1689     ulong id;
1690     CTcSymEnum *sym;
1691     char buf[32];
1692     int is_token;
1693 
1694     /* read the symbol name information */
1695     txt = base_read_from_sym_file(fp);
1696     len = strlen(txt);
1697 
1698     /* read our enumerator ID */
1699     id = (ulong)fp->read_int4();
1700 
1701     /* read our flags */
1702     fp->read_bytes(buf, 1);
1703 
1704     /* get the 'token' flag */
1705     is_token = ((buf[0] & 1) != 0);
1706 
1707     /*
1708      *   If this symbol is already defined, make sure the original
1709      *   definition is an enum.  If it's not defined, define it anew.
1710      */
1711     sym = (CTcSymEnum *)G_prs->get_global_symtab()->find(txt, len);
1712     if (sym == 0)
1713     {
1714         /*
1715          *   It's not defined yet - create the new definition and add it
1716          *   to the symbol table.  Allocate a new enumerator ID for the
1717          *   symbol in the normal fashion.
1718          */
1719         sym = new CTcSymEnum(txt, len, FALSE, G_prs->new_enum_id(), is_token);
1720         G_prs->get_global_symtab()->add_entry(sym);
1721     }
1722     else if (sym->get_type() != TC_SYM_ENUM)
1723     {
1724         /*
1725          *   It's not already defined as an enumerator - log a symbol type
1726          *   conflict error
1727          */
1728         sym->log_objfile_conflict(fname, TC_SYM_ENUM);
1729 
1730         /*
1731          *   proceed despite the error, since this is merely a symbol
1732          *   conflict and not a file corruption
1733          */
1734         return 0;
1735     }
1736 
1737     /*
1738      *   Set the translation table entry for the symbol.  We know the
1739      *   original ID local to the object file, and we know the new global
1740      *   enum ID.
1741      */
1742     enum_xlat[id] = sym->get_enum_id();
1743 
1744     /* success */
1745     return 0;
1746 }
1747 
1748 /* ------------------------------------------------------------------------ */
1749 /*
1750  *   Built-in function symbol base - image/object file functions
1751  */
1752 
1753 /*
1754  *   load from an object file
1755  */
load_from_obj_file(class CVmFile * fp,const textchar_t * fname)1756 int CTcSymBifBase::load_from_obj_file(class CVmFile *fp,
1757                                       const textchar_t *fname)
1758 {
1759     const char *txt;
1760     size_t len;
1761     CTcSymBif *sym;
1762     char buf[10];
1763     int func_set_id;
1764     int func_idx;
1765     int has_retval;
1766     int min_argc;
1767     int max_argc;
1768     int varargs;
1769 
1770     /* read the symbol name information */
1771     txt = base_read_from_sym_file(fp);
1772     len = strlen(txt);
1773 
1774     /* read our additional information */
1775     fp->read_bytes(buf, 10);
1776     varargs = buf[0];
1777     has_retval = buf[1];
1778     min_argc = osrp2(buf+2);
1779     max_argc = osrp2(buf+4);
1780     func_set_id = osrp2(buf+6);
1781     func_idx = osrp2(buf+8);
1782 
1783     /*
1784      *   If this symbol is already defined, make sure the new definition
1785      *   matches the original definition - built-in function sets must be
1786      *   identical in all object files loaded.  If it's not already
1787      *   defined, add it now.
1788      */
1789     sym = (CTcSymBif *)G_prs->get_global_symtab()->find(txt, len);
1790     if (sym == 0)
1791     {
1792         /*
1793          *   it's not defined yet - create the new definition and add it
1794          *   to the symbol table
1795          */
1796         sym = new CTcSymBif(txt, len, FALSE, func_set_id, func_idx,
1797                             has_retval, min_argc, max_argc, varargs);
1798         G_prs->get_global_symtab()->add_entry(sym);
1799     }
1800     else if (sym->get_type() != TC_SYM_BIF)
1801     {
1802         /* log the error */
1803         sym->log_objfile_conflict(fname, TC_SYM_BIF);
1804     }
1805     else if (sym->get_func_set_id() != func_set_id
1806              || sym->get_func_idx() != func_idx
1807              || sym->get_min_argc() != min_argc
1808              || sym->get_max_argc() != max_argc
1809              || sym->is_varargs() != varargs
1810              || sym->has_retval() != has_retval)
1811     {
1812         /*
1813          *   this function is already defined but has different settings
1814          *   -- we cannot reconcile the different usages of the function,
1815          *   so this is an error
1816          */
1817         G_tcmain->log_error(0, 0, TC_SEV_ERROR, TCERR_OBJFILE_BIF_INCOMPAT,
1818                             (int)len, txt, fname);
1819     }
1820     else
1821     {
1822         /*
1823          *   everything about the symbol matches - there's no need to
1824          *   redefine the symbol, since it's already set up exactly as we
1825          *   need it to be
1826          */
1827     }
1828 
1829     /* continue reading the file */
1830     return 0;
1831 }
1832 
1833 /* ------------------------------------------------------------------------ */
1834 /*
1835  *   Grammar production list entry
1836  */
1837 
1838 /*
1839  *   load from an object file
1840  */
load_from_obj_file(CVmFile * fp,const tctarg_prop_id_t * prop_xlat,const ulong * enum_xlat,CTcSymObj * private_owner)1841 void CTcGramProdEntry::load_from_obj_file(
1842     CVmFile *fp, const tctarg_prop_id_t *prop_xlat, const ulong *enum_xlat,
1843     CTcSymObj *private_owner)
1844 {
1845     uint idx;
1846     ulong cnt;
1847     CTcSymObj *obj;
1848     CTcGramProdEntry *prod;
1849     ulong flags;
1850 
1851     /*
1852      *   read the object file index of the production object, and get the
1853      *   production object
1854      */
1855     idx = (uint)fp->read_int4();
1856     obj = G_prs->get_objfile_objsym(idx);
1857 
1858     /* declare the production object */
1859     prod = G_prs->declare_gramprod(obj->get_sym(), obj->get_sym_len());
1860 
1861     /* if we have a private owner, create a private rule list */
1862     if (private_owner != 0)
1863         prod = private_owner->create_grammar_entry(
1864             obj->get_sym(), obj->get_sym_len());
1865 
1866     /* read the flags */
1867     flags = (ulong)fp->read_int4();
1868 
1869     /* set the explicitly-declared flag if appropriate */
1870     if (flags & 1)
1871         prod->set_declared(TRUE);
1872 
1873     /* read the alternative count */
1874     cnt = (uint)fp->read_int4();
1875 
1876     /* read the alternatives */
1877     for ( ; cnt != 0 ; --cnt)
1878     {
1879         CTcGramProdAlt *alt;
1880 
1881         /* read an alternative */
1882         alt = CTcGramProdAlt::load_from_obj_file(fp, prop_xlat, enum_xlat);
1883 
1884         /* add it to the production's list */
1885         if (prod != 0)
1886             prod->add_alt(alt);
1887     }
1888 }
1889 
1890 
1891 /* ------------------------------------------------------------------------ */
1892 /*
1893  *   Grammar production alternative
1894  */
1895 
1896 /*
1897  *   load from an object file
1898  */
1899 CTcGramProdAlt *CTcGramProdAlt::
load_from_obj_file(CVmFile * fp,const tctarg_prop_id_t * prop_xlat,const ulong * enum_xlat)1900    load_from_obj_file(CVmFile *fp, const tctarg_prop_id_t *prop_xlat,
1901                       const ulong *enum_xlat)
1902 {
1903     uint idx;
1904     ulong cnt;
1905     CTcSymObj *obj;
1906     CTcGramProdAlt *alt;
1907     CTcDictEntry *dict;
1908     int score;
1909     int badness;
1910 
1911     /* read my score and badness */
1912     score = fp->read_int2();
1913     badness = fp->read_int2();
1914 
1915     /* read my processor object index, and get the associated object */
1916     idx = (uint)fp->read_int4();
1917     obj = G_prs->get_objfile_objsym(idx);
1918 
1919     /* read my dictionary object index, and get the associated entry */
1920     idx = (uint)fp->read_int4();
1921     dict = G_prs->get_obj_dict(idx);
1922 
1923     /* create the alternative object */
1924     alt = new (G_prsmem) CTcGramProdAlt(obj, dict);
1925 
1926     /* set the score badness */
1927     alt->set_score(score);
1928     alt->set_badness(badness);
1929 
1930     /* read the number of tokens */
1931     cnt = (ulong)fp->read_int4();
1932 
1933     /* read the tokens */
1934     for ( ; cnt != 0 ; --cnt)
1935     {
1936         CTcGramProdTok *tok;
1937 
1938         /* read a token */
1939         tok = CTcGramProdTok::load_from_obj_file(fp, prop_xlat, enum_xlat);
1940 
1941         /* add it to the alternative's list */
1942         alt->add_tok(tok);
1943     }
1944 
1945     /* return the alternative */
1946     return alt;
1947 }
1948 
1949 
1950 /* ------------------------------------------------------------------------ */
1951 /*
1952  *   Grammar production token
1953  */
1954 
1955 /*
1956  *   load from an object file
1957  */
1958 CTcGramProdTok *CTcGramProdTok::
load_from_obj_file(CVmFile * fp,const tctarg_prop_id_t * prop_xlat,const ulong * enum_xlat)1959    load_from_obj_file(CVmFile *fp, const tctarg_prop_id_t *prop_xlat,
1960                       const ulong *enum_xlat)
1961 {
1962     CTcGramProdTok *tok;
1963     CTcSymObj *obj;
1964     tcgram_tok_type typ;
1965     tctarg_prop_id_t prop;
1966     size_t len;
1967     char *txt;
1968     uint idx;
1969     ulong enum_id;
1970     size_t i;
1971 
1972     /* create a new token */
1973     tok = new (G_prsmem) CTcGramProdTok();
1974 
1975     /* read the type */
1976     typ = (tcgram_tok_type)fp->read_int2();
1977 
1978     /* read the data, which depends on the type */
1979     switch(typ)
1980     {
1981     case TCGRAM_PROD:
1982         /* read the production object's object file index */
1983         idx = (uint)fp->read_int4();
1984 
1985         /* translate it to an object */
1986         obj = G_prs->get_objfile_objsym(idx);
1987 
1988         /* set the production object in the token */
1989         tok->set_match_prod(obj);
1990         break;
1991 
1992     case TCGRAM_TOKEN_TYPE:
1993         /* read the token ID, translating to the new enum numbering */
1994         enum_id = enum_xlat[fp->read_int4()];
1995 
1996         /* set the token-type match */
1997         tok->set_match_token_type(enum_id);
1998         break;
1999 
2000     case TCGRAM_PART_OF_SPEECH:
2001         /* read the property ID, translating to the new numbering system */
2002         prop = prop_xlat[fp->read_int2()];
2003 
2004         /* set the part of speech in the token */
2005         tok->set_match_part_of_speech(prop);
2006         break;
2007 
2008     case TCGRAM_PART_OF_SPEECH_LIST:
2009         /* read the list length */
2010         len = (size_t)fp->read_int2();
2011 
2012         /* set the type */
2013         tok->set_match_part_list();
2014 
2015         /* read each element and add it to the list */
2016         for (i = 0 ; i < len ; ++i)
2017             tok->add_match_part_ele(prop_xlat[fp->read_int2()]);
2018 
2019         /* done */
2020         break;
2021 
2022     case TCGRAM_LITERAL:
2023         /* read the length of the string */
2024         len = (size_t)fp->read_int2();
2025 
2026         /* allocate parser memory to hold the text */
2027         txt = (char *)G_prsmem->alloc(len);
2028 
2029         /* read the text of the literal */
2030         fp->read_bytes(txt, len);
2031 
2032         /* set the literal in the token */
2033         tok->set_match_literal(txt, len);
2034         break;
2035 
2036     case TCGRAM_STAR:
2037         /* there's no additional data */
2038         tok->set_match_star();
2039         break;
2040 
2041     case TCGRAM_UNKNOWN:
2042         /* no extra data to read */
2043         break;
2044     }
2045 
2046     /* read and set the property association */
2047     tok->set_prop_assoc(prop_xlat[fp->read_int2()]);
2048 
2049     /* return the token */
2050     return tok;
2051 }
2052