xref: /original-bsd/old/as.tahoe/asmain.c (revision 1897046e)
1 /*
2  *	Copyright (c) 1982 Regents of the University of California
3  */
4 #ifndef lint
5 static char sccsid[] = "@(#)asmain.c 4.13 6/30/83";
6 #endif not lint
7 
8 #include <stdio.h>
9 #include <ctype.h>
10 #include <signal.h>
11 
12 #include "as.h"
13 #include "assyms.h"
14 #include "asscan.h"
15 #include "asexpr.h"
16 #include <paths.h>
17 
18 #define	unix_lang_name "VAX/UNIX Assembler V6/30/83 4.13"
19 /*
20  *	variables to manage reading the assembly source files
21  */
22 char	*dotsname;	/*the current file name; managed by the parser*/
23 int	lineno;		/*current line number; managed by the parser*/
24 char	**innames;	/*names of the files being assembled*/
25 int	ninfiles;	/*how many interesting files there are*/
26 FILE	*source;	/*current source file (for listing) */
27 char	layout[400];	/*layout bytes */
28 char	*layoutpos = layout;	/*layout position in listfile */
29 int	ind = 0;	/*innames in-index: 0..minfiles */
30 int	endofsource = 0;
31 long	sourcepos;
32 /*
33  *	Flags settable from the argv process argument list
34  */
35 int	silent = 0;	/*don't complain about any errors*/
36 int	savelabels = 0;	/*write the labels to the a.out file*/
37 int 	d124 = 4;	/*default allocate 4 bytes for unknown pointers*/
38 int	anyerrs = 0;	/*no errors yet*/
39 int	anywarnings=0;	/*no warnings yet*/
40 int	orgwarn = 0;	/*Bad origins*/
41 int	passno = 1;	/* current pass*/
42 int	jxxxJUMP = 0;	/* in jxxxes that branch too far, use jmp instead of brw */
43 int	readonlydata = 0;	/* initialzed data -> text space */
44 int	liston = 0;	/* don't produce listing */
45 
46 
47 #ifdef DEBUG
48 int 	debug = 0;
49 int	toktrace = 0;
50 #endif
51 
52 int	useVM =	0;
53 
54 char	*endcore;	/*where to get more symbol space*/
55 
56 /*
57  *	Managers of the a.out file.
58  */
59 struct	exec	hdr;
60 #define	MAGIC	0407
61 u_long	tsize;		/* total text size */
62 u_long	dsize;		/* total data size */
63 u_long	datbase;	/* base of the data segment */
64 u_long	trsize;		/* total text relocation size */
65 u_long	drsize;		/* total data relocation size */
66 
67 /*
68  *	Information about the current segment is accumulated in
69  *	usedot; the most important information stored is the
70  *	accumulated size of each of the text and data segments
71  *
72  *	dotp points to the correct usedot expression for the current segment
73  */
74 struct	exp	usedot[NLOC+NLOC];	/* info about all segments */
75 struct	exp	*dotp;			/* data/text location pointer */
76 /*
77  *	The inter pass temporary token file is opened and closed by stdio, but
78  *	is written to using direct read/write, as the temporary file
79  *	is composed of buffers exactly BUFSIZ long.
80  */
81 FILE	*tokfile;			/* interpass communication file */
82 char	tokfilename[TNAMESIZE];
83 /*
84  *	The string file is the string table
85  *	cat'ed to the end of the built up a.out file
86  */
87 FILE	*strfile;			/* interpass string file */
88 char	strfilename[TNAMESIZE];
89 int	strfilepos = 0;			/* position within the string file */
90 /*
91  *	a.out is created during the second pass.
92  *	It is opened by stdio, but is filled with the parallel
93  *	block I/O library
94  */
95 char	*outfile = "a.out";
96 FILE	*a_out_file;
97 off_t	a_out_off;			/* cumulative offsets for segments */
98 /*
99  *	The logical files containing the assembled data for each of
100  *	the text and data segments are
101  *	managed by the parallel block I/O library.
102  *	a.out is logically opened in many places at once to
103  *	receive the assembled data from the various segments as
104  *	it all trickles in, but is physically opened only once
105  *	to minimize file overhead.
106  */
107 BFILE	*usefile[NLOC+NLOC];		/* text/data files */
108 BFILE	*txtfil;			/* current text/data file */
109 /*
110  *	Relocation information is accumulated seperately for each
111  *	segment.  This is required by the old loader (from BTL),
112  *	but not by the new loader (Bill Joy).
113  *
114  *	However, the size of the relocation information can not be computed
115  *	during or after the 1st pass because the ''absoluteness' of values
116  *	is unknown until all locally declared symbols have been seen.
117  *	Thus, the size of the relocation information is only
118  *	known after the second pass is finished.
119  *	This obviates the use of the block I/O
120  *	library, which requires knowing the exact offsets in a.out.
121  *
122  *	So, we save the relocation information internally (we don't
123  *	go to internal files to minimize overhead).
124  *
125  *	Empirically, we studied 259 files composing the system,
126  *	two compilers and a compiler generator: (all of which have
127  *	fairly large source files)
128  *
129  *	Number of files = 259
130  *		Number of non zero text reloc files: 233
131  *		Number of non zero data reloc files: 53
132  *	Average text relocation = 889
133  *	Average data relocation = 346
134  *	Number of files > BUFSIZ text relocation = 71
135  *	Number of files > BUFSIZ data relocation = 6
136  *
137  *	For compiled C code, there is usually one text segment and two
138  *	data segments; we see that allocating our own buffers and
139  *	doing our internal handling of relocation information will,
140  *	on the average, not use more memory than taken up by the buffers
141  *	allocated for doing file I/O in parallel to a number of file.
142  *
143  *	If we are assembling with the -V option, we
144  *	use the left over token buffers from the 2nd pass,
145  *	otherwise, we create our own.
146  *
147  *	When the 2nd pass is complete, closeoutrel flushes the token
148  *	buffers out to a BFILE.
149  *
150  *	The internals to relbufdesc are known only in assyms.c
151  *
152  *	outrel constructs the relocation information.
153  *	closeoutrel flushes the relocation information to relfil.
154  */
155 struct	relbufdesc	*rusefile[NLOC+NLOC];
156 struct	relbufdesc 	*relfil;	/* un concatnated relocation info */
157 BFILE	*relocfile;			/* concatnated relocation info */
158 /*
159  *	Once the relocation information has been written,
160  *	we can write out the symbol table using the Block I/O
161  *	mechanisms, as we once again know the offsets into
162  *	the a.out file.
163  *
164  *	We use relfil to output the symbol table information.
165  */
166 char	*tmpdirprefix = _PATH_TMP;
167 int delexit();
168 
169 main(argc, argv)
170 	int	argc;
171 	char 	**argv;
172 {
173 	char	*sbrk();
174 
175 	tokfilename[0] = 0;
176 	strfilename[0] = 0;
177 	endcore = sbrk(0);
178 
179 	argprocess(argc, argv);		/* process argument lists */
180 	if (anyerrs) exit(1);
181 
182 	initialize();
183 	zeroorigins();			/* set origins to zero */
184 	zerolocals();			/* fix local label counters */
185 
186 	i_pass1();			/* open temp files, etc */
187 	pass1();			/* first pass through .s files */
188 	testlocals();			/* check for undefined locals */
189 	if (anyerrs) delexit();
190 
191 	pass1_5();			/* resolve jxxx */
192 	if (anyerrs) delexit();
193 
194 	open_a_out();			/* open a.out */
195 	roundsegments();		/* round segments to FW */
196 	build_hdr();			/* build initial header, and output */
197 
198 	i_pass2();			/* reopen temporary file, etc */
199 	pass2();			/* second pass through the virtual .s */
200 	if (anyerrs) delexit();
201 
202 	fillsegments();			/* fill segments with 0 to FW */
203 	reloc_syms();			/* dump relocation and symbol table */
204 
205 	delete();			/* remove tmp file */
206 	bflush();			/* close off block I/O view of a.out */
207 	fix_a_out();			/* add in text and data reloc counts */
208 
209 	if (anyerrs == 0 && orgwarn)
210 		yyerror("Caution: absolute origins.\n");
211 
212 	exit(anyerrs != 0);
213 }
214 
215 argprocess(argc, argv)
216 	int	argc;
217 	char	*argv[];
218 {
219 	register	char	*cp;
220 
221 	ninfiles = 0;
222 	silent = 0;
223 #ifdef DEBUG
224 	debug = 0;
225 #endif
226 	innames = (char **)ClearCalloc(argc+1, sizeof (innames[0]));
227 	dotsname = "<argv error>";
228 	while (argc > 1) {
229 		if (argv[1][0] != '-')
230 			innames[ninfiles++] = argv[1];
231 		else {
232 			cp = argv[1] + 1;
233 			/*
234 			 *	We can throw away single minus signs, so
235 			 *	that make scripts for the PDP 11 assembler work
236 			 *	on this assembler too
237 			 */
238 			while (*cp){
239 				switch(*cp++){
240 				 default:
241 					yyerror("Unknown flag: %c", *--cp);
242 					cp++;
243 					break;
244 				 case 'v':
245 					selfwhat(stdout);
246 					exit(1);
247 				 case 'd':
248 					d124 = *cp++ - '0';
249 					if ( (d124 != 1) && (d124 != 2) &&
250 					     (d124 != 4)){
251 						yyerror("-d[124] only");
252 						exit(1);
253 					}
254 					break;
255 				 case 'P':
256 					liston = 1;
257 					listfile = stdout;
258 					break;
259 				 case 'o':
260 					if (argc < 3){
261 						yyerror("-o what???");
262 						exit(1);
263 					}
264 					outfile = argv[2];
265 				   bumpone:
266 					argc -= 2;
267 					argv += 2;
268 					goto nextarg;
269 
270 				 case 't':
271 					if (argc < 3){
272 						yyerror("-t what???");
273 						exit(1);
274 					}
275 					tmpdirprefix = argv[2];
276 					goto bumpone;
277 
278 				 case 'V':
279 					useVM = 1;
280 					break;
281 				 case 'W':
282 					silent = 1;
283 					break;
284 				 case 'L':
285 					savelabels = 1;
286 					break;
287 				 case 'J':
288 					jxxxJUMP = 1;
289 					break;
290 #ifdef DEBUG
291 				 case 'D':
292 					debug = 1;
293 					break;
294 				 case 'T':
295 					toktrace = 1;
296 					break;
297 #endif
298 				 case 'R':
299 					readonlydata = 1;
300 					break;
301 				}	/*end of the switch*/
302 			}	/*end of pulling out all arguments*/
303 		}	/*end of a flag argument*/
304 		--argc; ++argv;
305 	   nextarg:;
306 	}
307 	/* innames[ninfiles] = 0; */
308 }
309 /*
310  *	poke through the data space and find all sccs identifiers.
311  *	We assume:
312  *	a) that extern char **environ; is the first thing in the bss
313  *	segment (true, if one is using the new version of cmgt.crt0.c)
314  *	b) that the sccsid's have not been put into text space.
315  */
316 selfwhat(place)
317 	FILE	*place;
318 {
319 	extern	char **environ;
320 	register	char	*ub;
321 	register	char *cp;
322 	char	*sbrk();
323 
324 	for (cp = (char *)&environ, ub = sbrk(0); cp < ub; cp++){
325 		if (cp[0] != '@') continue;
326 		if (cp[1] != '(') continue;
327 		if (cp[2] != '#') continue;
328 		if (cp[3] != ')') continue;
329 		fputc('\t', place);
330 		for (cp += 4; cp < ub; cp++){
331 			if (*cp == 0) break;
332 			if (*cp == '>') break;
333 			if (*cp == '\n') break;
334 			fputc(*cp, place);
335 		}
336 		fputc('\n', place);
337 	}
338 }
339 
340 initialize()
341 {
342 	if (signal(SIGINT, SIG_IGN) != SIG_IGN)
343 		signal(SIGINT, delexit);
344 	/*
345 	 *	Install symbols in the table
346 	 */
347 	symtabinit();
348 	syminstall();
349 	/*
350 	 *	Build the expression parser accelerator token sets
351 	 */
352 	buildtokensets();
353 }
354 
355 zeroorigins()
356 {
357 	register	int	locindex;
358 	/*
359 	 *	Mark usedot: the first NLOC slots are for named text segments,
360 	 *	the next for named data segments.
361 	 */
362 	for (locindex = 0; locindex < NLOC; locindex++){
363 		usedot[locindex].e_xtype = XTEXT;
364 		usedot[NLOC + locindex].e_xtype = XDATA;
365 		usedot[locindex].e_xvalue = 0;
366 		usedot[NLOC + locindex].e_xvalue = 0;
367 	}
368 }
369 
370 zerolocals()
371 {
372 	register	int	i;
373 
374 	for (i = 0; i <= 9; i++) {
375 		lgensym[i] = 1;
376 		genref[i] = 0;
377 	}
378 }
379 
380 i_pass1()
381 {
382 	FILE	*tempopen();
383 	if (useVM == 0)
384 		tokfile = tempopen(tokfilename, "T");
385 	strfile = tempopen(strfilename, "S");
386 	/*
387 	 *	write out the string length.
388 	 *	This will be overwritten when the
389 	 *	strings are tacked onto the growing a.out file
390 	 */
391 	strfilepos = sizeof(int);
392 	fwrite(&strfilepos, sizeof(int), 1, strfile);
393 
394 	inittokfile();
395 	initijxxx();
396 }
397 
398 FILE *tempopen(tname, part)
399 	char	*tname;
400 	char	*part;
401 {
402 	FILE	*file;
403 	(void)sprintf(tname, "%s%sas%s%05d",
404 		tmpdirprefix,
405 		(tmpdirprefix[strlen(tmpdirprefix)-1] != '/') ? "/" : 0,
406 		part,
407 		getpid());
408 	file = fopen(tname, "w");
409 	if (file == NULL) {
410 		yyerror("Bad pass 1 temporary file for writing %s", tname);
411 		delexit();
412 	}
413 	return(file);
414 }
415 
416 pass1()
417 {
418 	register	int	i;
419 
420 	passno = 1;
421 	dotp = &usedot[0];
422 	txtfil = (BFILE *)0;
423 	relfil = (struct relbufdesc *)0;
424 
425 	if (ninfiles == 0){		/*take the input from stdin directly*/
426 		lineno = 1;
427 		dotsname = "<stdin>";
428 
429 		yyparse();
430 	} else {		/*we have the names tanked*/
431 		for (i = 0; i < ninfiles; i++){
432 			new_dot_s(innames[i]);
433 			if (freopen(innames[i], "r", stdin) == NULL) {
434 				yyerror( "Can't open source file %s\n",
435 					innames[i]);
436 				exit(2);
437 			}
438 			/* stdio is NOT used to read the input characters */
439 			/* we use read directly, into our own buffers */
440 			yyparse();
441 		}
442 	}
443 
444 	closetokfile();		/*kick out the last buffered intermediate text*/
445 }
446 
447 testlocals()
448 {
449 	register	int	i;
450 	for (i = 0; i <= 9; i++) {
451 		if (genref[i])
452 			yyerror("Reference to undefined local label %df", i);
453 		lgensym[i] = 1;
454 		genref[i] = 0;
455 	}
456 }
457 
458 pass1_5()
459 {
460 	sortsymtab();
461 #ifdef DEBUG
462 	if (debug) dumpsymtab();
463 #endif
464 	jxxxfix();
465 #ifdef DEBUG
466 	if (debug) dumpsymtab();
467 #endif
468 }
469 
470 open_a_out()
471 {
472 	/*
473 	 *	Open up the a.out file now, and get set to build
474 	 *	up offsets into it for all of the various text,data
475 	 *	text relocation and data relocation segments.
476 	 */
477 	a_out_file = fopen(outfile, "w");
478 	if (a_out_file == NULL) {
479 		yyerror("Cannot create %s", outfile);
480 		delexit();
481 	}
482 	biofd = a_out_file->_file;
483 	a_out_off = 0;
484 }
485 
486 roundsegments()
487 {
488 	register	int	locindex;
489 	register	long	v;
490 	/*
491 	 *	round and assign text segment origins
492 	 *	the exec header always goes in usefile[0]
493 	 */
494 	tsize = 0;
495 	for (locindex=0; locindex<NLOC; locindex++) {
496 		v = round(usedot[locindex].e_xvalue, FW);
497 		usedot[locindex].e_xvalue = tsize;
498 		if ((locindex == 0) || (v != 0) ){
499 			usefile[locindex] = (BFILE *)Calloc(1, sizeof(BFILE));
500 			bopen(usefile[locindex], a_out_off);
501 			if (locindex == 0)
502 				a_out_off = sizeof (struct exec);
503 		} else {
504 			usefile[locindex] = (BFILE *)-1;
505 		}
506 		tsize += v;
507 		a_out_off += v;
508 	}
509 	/*
510 	 *		Round and assign data segment origins.
511 	 */
512 	datbase = round(tsize, FW);
513 	for (locindex=0; locindex<NLOC; locindex++) {
514 		v = round(usedot[NLOC+locindex].e_xvalue, FW);
515 		usedot[NLOC+locindex].e_xvalue = datbase + dsize;
516 		if (v != 0){
517 			usefile[NLOC + locindex] = (BFILE *)Calloc(1,sizeof(BFILE));
518 			bopen(usefile[NLOC + locindex], a_out_off);
519 		} else {
520 			usefile[NLOC + locindex] = (BFILE *)-1;
521 		}
522 		dsize += v;
523 		a_out_off += v;
524 	}
525 	/*
526 	 *	Assign final values to symbols
527 	 */
528 	hdr.a_bss = dsize;
529 	freezesymtab();		/* this touches hdr.a_bss */
530 	stabfix();
531 	/*
532 	 *	Set up the relocation information "files" to
533 	 *	be zero; outrel takes care of the rest
534 	 */
535 	for (locindex = 0; locindex < NLOC + NLOC; locindex++){
536 		rusefile[locindex] = (struct relbufdesc *)0;
537 	}
538 }
539 
540 build_hdr()
541 {
542 	/*
543 	 *	Except for the text and data relocation sizes,
544 	 *	calculate the final values for the header
545 	 *
546 	 *	Write out the initial copy; we to come
547 	 *	back later and patch up a_trsize and a_drsize,
548 	 *	and overwrite this first version of the header.
549 	 */
550 	hdr.a_magic = MAGIC;
551 	hdr.a_text = tsize;
552 	hdr.a_data = dsize;
553 	hdr.a_bss -= dsize;
554 	hdr.a_syms = sizesymtab();	/* Does not include string pool length */
555 	hdr.a_entry = 0;
556 	hdr.a_trsize = 0;
557 	hdr.a_drsize = 0;
558 
559 	bwrite((char *)&hdr, sizeof(hdr), usefile[0]);
560 }
561 
562 i_pass2()
563 {
564 	if (useVM == 0) {
565 		fclose(tokfile);
566 		tokfile = fopen(tokfilename, "r");
567 		if (tokfile==NULL) {
568 		   yyerror("Bad pass 2 temporary file for reading %s", tokfilename);
569 		   delexit();
570 		}
571 	}
572 	fclose(strfile);
573 	strfile = fopen(strfilename, "r");
574 }
575 
576 pass2()
577 {
578 #ifdef DEBUG
579 	if (debug)
580 		printf("\n\n\n\t\tPASS 2\n\n\n\n");
581 #endif DEBUG
582 	passno = 2;
583 	lineno = 1;
584 	if (liston && ninfiles != 0)
585 	{
586 		char ch;
587 		source = fopen (innames[ind++], "r");
588 		(void)sprintf (layoutpos, "%4ld  00000000    ", lineno);
589 		layoutpos += LHEAD;
590 		ch = getc (source);
591 		if (ch == EOF)
592 		{
593 			if (ind == ninfiles)
594 				endofsource = 1;
595 			else
596 				source = fopen (innames[ind++], "r");
597 		}
598 		else
599 			ungetc (ch, source);
600 	}
601 	else
602 		endofsource = 1;
603 	dotp = &usedot[0];
604 	txtfil = usefile[0];	/* already opened (always!) */
605 	relfil = 0;		/* outrel takes care of the rest */
606 	initoutrel();
607 
608 	inittokfile();
609 
610 	yyparse();
611 
612 	closetokfile();
613 }
614 
615 fillsegments()
616 {
617 	int	locindex;
618 	/*
619 	 *	Round text and data segments to FW by appending zeros
620 	 */
621 	for (locindex = 0; locindex < NLOC + NLOC; locindex++) {
622 		if (usefile[locindex]) {
623 			txtfil = usefile[locindex];
624 			dotp = &usedot[locindex];
625 			while (usedot[locindex].e_xvalue & FW)
626 				outb(0);
627 		}
628 	}
629 }
630 
631 reloc_syms()
632 {
633 	u_long	closerelfil();
634 	/*
635 	 *	Move the relocation information to a.out
636 	 *	a_out_off is the offset so far:
637 	 *	exec + text segments + data segments
638 	 */
639 	relocfile = (BFILE *)Calloc(1,sizeof(BFILE));
640 	bopen(relocfile, a_out_off);
641 	a_out_off += closeoutrel(relocfile);
642 
643 	hdr.a_trsize = trsize;
644 	hdr.a_drsize = drsize;
645 	if (readonlydata) {
646 		hdr.a_text += hdr.a_data;
647 		hdr.a_data = 0;
648 		hdr.a_trsize += hdr.a_drsize;
649 		hdr.a_drsize = 0;
650 	}
651 	/*
652 	 *	Output the symbol table and the string pool
653 	 *
654 	 *	We must first rewind the string pool file to its beginning,
655 	 *	in case it was seek'ed into for fetching ascii and asciz
656 	 *	strings.
657 	 */
658 	fseek(strfile, 0, 0);
659 	symwrite(relocfile);
660 }
661 
662 fix_a_out()
663 {
664 	if (lseek(a_out_file->_file, 0L, 0) < 0L)
665 		yyerror("Reposition for header rewrite fails");
666 	if (write(a_out_file->_file, (char *)&hdr, sizeof (struct exec)) < 0)
667 		yyerror("Rewrite of header fails");
668 }
669 
670 delexit()
671 {
672 	delete();
673 	if (passno == 2){
674 		unlink(outfile);
675 	}
676 	exit(1);
677 }
678 
679 delete()
680 {
681 	if (useVM == 0 || tokfilename[0])
682 		unlink(tokfilename);
683 	if (strfilename[0])
684 		unlink(strfilename);
685 }
686 
687 sawabort()
688 {
689 	char	*fillinbuffer();
690 	while (fillinbuffer() != (char *)0)
691 		continue;
692 	delete();
693 	exit(1);	/*although the previous pass will also exit non zero*/
694 }
695 
696 panic(fmt, a1, a2, a3, a4)
697 	char	*fmt;
698 	/*VARARGS 1*/
699 {
700 	yyerror("Assembler panic: bad internal data structure.");
701 	yyerror(fmt, a1, a2, a3, a4);
702 	delete();
703 	abort();
704 }
705