xref: /original-bsd/old/as.vax/asmain.c (revision 10020db5)
1 /*
2  * Copyright (c) 1982 Regents of the University of California.
3  * All rights reserved.  The Berkeley software License Agreement
4  * specifies the terms and conditions for redistribution.
5  */
6 
7 #ifndef lint
8 char copyright[] =
9 "@(#) Copyright (c) 1982 Regents of the University of California.\n\
10  All rights reserved.\n";
11 #endif not lint
12 
13 #ifndef lint
14 static char sccsid[] = "@(#)asmain.c	5.5 (Berkeley) 03/12/90";
15 #endif not lint
16 
17 #include <stdio.h>
18 #include <ctype.h>
19 #include <signal.h>
20 
21 #include "as.h"
22 #include "assyms.h"
23 #include "asscan.h"
24 #include "asexpr.h"
25 #include <paths.h>
26 
27 #include <sys/stat.h>
28 
29 #define	unix_lang_name "VAX/UNIX Assembler V03/12/90 5.5"
30 /*
31  *	variables to manage reading the assembly source files
32  */
33 char	*dotsname;	/*the current file name; managed by the parser*/
34 int	lineno;		/*current line number; managed by the parser*/
35 char	**innames;	/*names of the files being assembled*/
36 int	ninfiles;	/*how many interesting files there are*/
37 /*
38  *	Flags settable from the argv process argument list
39  */
40 int	silent = 0;	/*don't complain about any errors*/
41 int	savelabels = 0;	/*write the labels to the a.out file*/
42 int 	d124 = 4;	/*default allocate 4 bytes for unknown pointers*/
43 int 	maxalign = 2;	/*default .align maximum*/
44 int	anyerrs = 0;	/*no errors yet*/
45 int	anywarnings=0;	/*no warnings yet*/
46 int	orgwarn = 0;	/*Bad origins*/
47 int	passno = 1;	/* current pass*/
48 int	jxxxJUMP = 0;	/* in jxxxes that branch too far, use jmp instead of brw */
49 int	readonlydata = 0;	/* initialzed data -> text space */
50 
51 int	nGHnumbers = 0;		/* GH numbers used */
52 int	nGHopcodes = 0;		/* GH opcodes used */
53 int	nnewopcodes = 0;	/* new opcodes used */
54 
55 #ifdef DEBUG
56 int 	debug = 0;
57 int	toktrace = 0;
58 #endif
59 
60 int	useVM =	0;
61 
62 char	*endcore;	/*where to get more symbol space*/
63 
64 /*
65  *	Managers of the a.out file.
66  */
67 struct	exec	hdr;
68 #define	MAGIC	0407
69 u_long	tsize;		/* total text size */
70 u_long	dsize;		/* total data size */
71 u_long	datbase;	/* base of the data segment */
72 u_long	trsize;		/* total text relocation size */
73 u_long	drsize;		/* total data relocation size */
74 
75 /*
76  *	Information about the current segment is accumulated in
77  *	usedot; the most important information stored is the
78  *	accumulated size of each of the text and data segments
79  *
80  *	dotp points to the correct usedot expression for the current segment
81  */
82 struct	exp	usedot[NLOC+NLOC];	/* info about all segments */
83 struct	exp	*dotp;			/* data/text location pointer */
84 /*
85  *	The inter pass temporary token file is opened and closed by stdio, but
86  *	is written to using direct read/write, as the temporary file
87  *	is composed of buffers exactly BUFSIZ long.
88  */
89 FILE	*tokfile;			/* interpass communication file */
90 char	tokfilename[TNAMESIZE];
91 /*
92  *	The string file is the string table
93  *	cat'ed to the end of the built up a.out file
94  */
95 FILE	*strfile;			/* interpass string file */
96 char	strfilename[TNAMESIZE];
97 int	strfilepos = 0;			/* position within the string file */
98 /*
99  *	a.out is created during the second pass.
100  *	It is opened by stdio, but is filled with the parallel
101  *	block I/O library
102  */
103 char	*outfile;
104 FILE	*a_out_file;
105 off_t	a_out_off;			/* cumulative offsets for segments */
106 /*
107  *	The logical files containing the assembled data for each of
108  *	the text and data segments are
109  *	managed by the parallel block I/O library.
110  *	a.out is logically opened in many places at once to
111  *	receive the assembled data from the various segments as
112  *	it all trickles in, but is physically opened only once
113  *	to minimize file overhead.
114  */
115 BFILE	*usefile[NLOC+NLOC];		/* text/data files */
116 BFILE	*txtfil;			/* current text/data file */
117 /*
118  *	Relocation information is accumulated seperately for each
119  *	segment.  This is required by the old loader (from BTL),
120  *	but not by the new loader (Bill Joy).
121  *
122  *	However, the size of the relocation information can not be computed
123  *	during or after the 1st pass because the ''absoluteness' of values
124  *	is unknown until all locally declared symbols have been seen.
125  *	Thus, the size of the relocation information is only
126  *	known after the second pass is finished.
127  *	This obviates the use of the block I/O
128  *	library, which requires knowing the exact offsets in a.out.
129  *
130  *	So, we save the relocation information internally (we don't
131  *	go to internal files to minimize overhead).
132  *
133  *	Empirically, we studied 259 files composing the system,
134  *	two compilers and a compiler generator: (all of which have
135  *	fairly large source files)
136  *
137  *	Number of files = 259
138  *		Number of non zero text reloc files: 233
139  *		Number of non zero data reloc files: 53
140  *	Average text relocation = 889
141  *	Average data relocation = 346
142  *	Number of files > BUFSIZ text relocation = 71
143  *	Number of files > BUFSIZ data relocation = 6
144  *
145  *	For compiled C code, there is usually one text segment and two
146  *	data segments; we see that allocating our own buffers and
147  *	doing our internal handling of relocation information will,
148  *	on the average, not use more memory than taken up by the buffers
149  *	allocated for doing file I/O in parallel to a number of file.
150  *
151  *	If we are assembling with the -V option, we
152  *	use the left over token buffers from the 2nd pass,
153  *	otherwise, we create our own.
154  *
155  *	When the 2nd pass is complete, closeoutrel flushes the token
156  *	buffers out to a BFILE.
157  *
158  *	The internals to relbufdesc are known only in assyms.c
159  *
160  *	outrel constructs the relocation information.
161  *	closeoutrel flushes the relocation information to relfil.
162  */
163 struct	relbufdesc	*rusefile[NLOC+NLOC];
164 struct	relbufdesc 	*relfil;	/* un concatnated relocation info */
165 BFILE	*relocfile;			/* concatnated relocation info */
166 /*
167  *	Once the relocation information has been written,
168  *	we can write out the symbol table using the Block I/O
169  *	mechanisms, as we once again know the offsets into
170  *	the a.out file.
171  *
172  *	We use relfil to output the symbol table information.
173  */
174 char	*tmpdirprefix = "/tmp/";
175 int delexit();
176 
177 main(argc, argv)
178 	int	argc;
179 	char 	**argv;
180 {
181 	char	*sbrk();
182 
183 	tokfilename[0] = 0;
184 	strfilename[0] = 0;
185 	endcore = sbrk(0);
186 
187 	argprocess(argc, argv);		/* process argument lists */
188 	if (anyerrs) exit(1);
189 
190 	initialize();
191 	zeroorigins();			/* set origins to zero */
192 	zerolocals();			/* fix local label counters */
193 
194 	i_pass1();			/* open temp files, etc */
195 	pass1();			/* first pass through .s files */
196 	testlocals();			/* check for undefined locals */
197 	if (anyerrs) delexit();
198 
199 	pass1_5();			/* resolve jxxx */
200 	if (anyerrs) delexit();
201 
202 	open_a_out();			/* open a.out */
203 	roundsegments();		/* round segments to FW */
204 	build_hdr();			/* build initial header, and output */
205 
206 	i_pass2();			/* reopen temporary file, etc */
207 	pass2();			/* second pass through the virtual .s */
208 	if (anyerrs) delexit();
209 
210 	fillsegments();			/* fill segments with 0 to FW */
211 	reloc_syms();			/* dump relocation and symbol table */
212 
213 	delete();			/* remove tmp file */
214 	bflush();			/* close off block I/O view of a.out */
215 	fix_a_out();			/* add in text and data reloc counts */
216 
217 	if (anyerrs == 0 && orgwarn)
218 		yyerror("Caution: absolute origins.\n");
219 
220 	if (nGHnumbers)
221 		yywarning("Caution: G or H format floating point numbers");
222 	if (nGHopcodes)
223 		yywarning("Caution: G or H format floating point operators");
224 	if (nnewopcodes)
225 		yywarning("Caution: New Opcodes");
226 	if (nGHnumbers || nGHopcodes || nnewopcodes)
227 		yywarning("These are not defined for all implementations of the VAX architecture.\n");
228 
229 	exit(anyerrs != 0);
230 }
231 
232 argprocess(argc, argv)
233 	int	argc;
234 	char	*argv[];
235 {
236 	register	char	*cp;
237 
238 	ninfiles = 0;
239 	silent = 0;
240 #ifdef DEBUG
241 	debug = 0;
242 #endif
243 	outfile = (char *)genbuildname("a.out");
244 	innames = (char **)ClearCalloc(argc+1, sizeof (innames[0]));
245 	dotsname = "<argv error>";
246 	while (argc > 1) {
247 		if (argv[1][0] != '-')
248 			innames[ninfiles++] = argv[1];
249 		else {
250 			cp = argv[1] + 1;
251 			/*
252 			 *	We can throw away single minus signs, so
253 			 *	that make scripts for the PDP 11 assembler work
254 			 *	on this assembler too
255 			 */
256 			while (*cp){
257 				switch(*cp++){
258 				 default:
259 					yyerror("Unknown flag: %c", *--cp);
260 					cp++;
261 					break;
262 				 case 'v':
263 					selfwhat(stdout);
264 					exit(1);
265 				 case 'd':
266 					d124 = *cp++ - '0';
267 					if ( (d124 != 1) && (d124 != 2) &&
268 					     (d124 != 4)){
269 						yyerror("-d[124] only");
270 						exit(1);
271 					}
272 					break;
273 				 case 'a':
274 					maxalign = atoi(cp+1);
275 					for (cp++; isdigit(*cp); cp++)
276 						/*VOID*/;
277 					if ( (maxalign > 16) || (maxalign < 0)){
278 						yyerror("-a: 0<=align<=16");
279 						exit(1);
280 					}
281 					break;
282 				 case 'o':
283 					if (argc < 3){
284 						yyerror("-o what???");
285 						exit(1);
286 					}
287 					outfile = (char *)genbuildname(argv[2]);
288 				   bumpone:
289 					argc -= 2;
290 					argv += 2;
291 					goto nextarg;
292 
293 				 case 't':
294 					if (argc < 3){
295 						yyerror("-t what???");
296 						exit(1);
297 					}
298 					tmpdirprefix = argv[2];
299 					goto bumpone;
300 
301 				 case 'V':
302 					useVM = 1;
303 					break;
304 				 case 'W':
305 					silent = 1;
306 					break;
307 				 case 'L':
308 					savelabels = 1;
309 					break;
310 				 case 'J':
311 					jxxxJUMP = 1;
312 					break;
313 #ifdef DEBUG
314 				 case 'D':
315 					debug = 1;
316 					break;
317 				 case 'T':
318 					toktrace = 1;
319 					break;
320 #endif
321 				 case 'R':
322 					readonlydata = 1;
323 					break;
324 				}	/*end of the switch*/
325 			}	/*end of pulling out all arguments*/
326 		}	/*end of a flag argument*/
327 		--argc; ++argv;
328 	   nextarg:;
329 	}
330 	/* innames[ninfiles] = 0; */
331 }
332 /*
333  *	poke through the data space and find all sccs identifiers.
334  *	We assume:
335  *	a) that extern char **environ; is the first thing in the bss
336  *	segment (true, if one is using the new version of cmgt.crt0.c)
337  *	b) that the sccsid's have not been put into text space.
338  */
339 selfwhat(place)
340 	FILE	*place;
341 {
342 	extern	char **environ;
343 	register	char	*ub;
344 	register	char *cp;
345 	register	char	*pat;
346 	char	*sbrk();
347 
348 	for (cp = (char *)&environ, ub = sbrk(0); cp < ub; cp++){
349 		if (cp[0] != '@') continue;
350 		if (cp[1] != '(') continue;
351 		if (cp[2] != '#') continue;
352 		if (cp[3] != ')') continue;
353 		fputc('\t', place);
354 		for (cp += 4; cp < ub; cp++){
355 			if (*cp == 0) break;
356 			if (*cp == '>') break;
357 			if (*cp == '\n') break;
358 			fputc(*cp, place);
359 		}
360 		fputc('\n', place);
361 	}
362 }
363 
364 initialize()
365 {
366 	if (signal(SIGINT, SIG_IGN) != SIG_IGN)
367 		signal(SIGINT, delexit);
368 	/*
369 	 *	Install symbols in the table
370 	 */
371 	symtabinit();
372 	syminstall();
373 	/*
374 	 *	Build the expression parser accelerator token sets
375 	 */
376 	buildtokensets();
377 }
378 
379 zeroorigins()
380 {
381 	register	int	locindex;
382 	/*
383 	 *	Mark usedot: the first NLOC slots are for named text segments,
384 	 *	the next for named data segments.
385 	 */
386 	for (locindex = 0; locindex < NLOC; locindex++){
387 		usedot[locindex].e_xtype = XTEXT;
388 		usedot[NLOC + locindex].e_xtype = XDATA;
389 		usedot[locindex].e_xvalue = 0;
390 		usedot[NLOC + locindex].e_xvalue = 0;
391 	}
392 }
393 
394 zerolocals()
395 {
396 	register	int	i;
397 
398 	for (i = 0; i <= 9; i++) {
399 		lgensym[i] = 1;
400 		genref[i] = 0;
401 	}
402 }
403 
404 i_pass1()
405 {
406 	FILE	*tempopen();
407 	if (useVM == 0)
408 		tokfile = tempopen(tokfilename, "T");
409 	strfile = tempopen(strfilename, "S");
410 	/*
411 	 *	write out the string length.
412 	 *	This will be overwritten when the
413 	 *	strings are tacked onto the growing a.out file
414 	 */
415 	strfilepos = sizeof(int);
416 	fwrite(&strfilepos, sizeof(int), 1, strfile);
417 
418 	inittokfile();
419 	initijxxx();
420 }
421 
422 FILE *tempopen(tname, part)
423 	char	*tname;
424 	char	*part;
425 {
426 	FILE	*file;
427 	(void)sprintf(tname, "%s%sas%s%05d",
428 		tmpdirprefix,
429 		(tmpdirprefix[strlen(tmpdirprefix)-1] != '/') ? "/" : "",
430 		part,
431 		getpid());
432 	file = fopen(tname, "w");
433 	if (file == NULL) {
434 		yyerror("Bad pass 1 temporary file for writing %s", tname);
435 		delexit();
436 	}
437 	return(file);
438 }
439 
440 pass1()
441 {
442 	register	int	i;
443 
444 	passno = 1;
445 	dotp = &usedot[0];
446 	txtfil = (BFILE *)0;
447 	relfil = (struct relbufdesc *)0;
448 
449 	if (ninfiles == 0){		/*take the input from stdin directly*/
450 		lineno = 1;
451 		dotsname = "<stdin>";
452 
453 		yyparse();
454 	} else {		/*we have the names tanked*/
455 		for (i = 0; i < ninfiles; i++){
456 			new_dot_s(innames[i]);
457 			if (freopen(innames[i], "r", stdin) == NULL) {
458 				yyerror( "Can't open source file %s\n",
459 					innames[i]);
460 				exit(2);
461 			}
462 			/* stdio is NOT used to read the input characters */
463 			/* we use read directly, into our own buffers */
464 			yyparse();
465 		}
466 	}
467 
468 	closetokfile();		/*kick out the last buffered intermediate text*/
469 }
470 
471 testlocals()
472 {
473 	register	int	i;
474 	for (i = 0; i <= 9; i++) {
475 		if (genref[i])
476 			yyerror("Reference to undefined local label %df", i);
477 		lgensym[i] = 1;
478 		genref[i] = 0;
479 	}
480 }
481 
482 pass1_5()
483 {
484 	sortsymtab();
485 #ifdef DEBUG
486 	if (debug) dumpsymtab();
487 #endif
488 	jxxxfix();
489 #ifdef DEBUG
490 	if (debug) dumpsymtab();
491 #endif
492 }
493 
494 open_a_out()
495 {
496 	struct stat stb;
497 
498 	/*
499 	 *	Open up the a.out file now, and get set to build
500 	 *	up offsets into it for all of the various text,data
501 	 *	text relocation and data relocation segments.
502 	 */
503 	a_out_file = fopen(outfile, "w");
504 	if (a_out_file == NULL) {
505 		yyerror("Cannot create %s", outfile);
506 		delexit();
507 	}
508 	biofd = a_out_file->_file;
509 	fstat(biofd, &stb);
510 	biobufsize = stb.st_blksize;
511 	a_out_off = 0;
512 }
513 
514 roundsegments()
515 {
516 	register	int	locindex;
517 	register	long	v;
518 	/*
519 	 *	round and assign text segment origins
520 	 *	the exec header always goes in usefile[0]
521 	 */
522 	tsize = 0;
523 	for (locindex=0; locindex<NLOC; locindex++) {
524 		v = round(usedot[locindex].e_xvalue, FW);
525 		usedot[locindex].e_xvalue = tsize;
526 		if ((locindex == 0) || (v != 0) ){
527 			usefile[locindex] = (BFILE *)Calloc(1, sizeof(BFILE));
528 			bopen(usefile[locindex], a_out_off);
529 			if (locindex == 0)
530 				a_out_off = sizeof (struct exec);
531 		} else {
532 			usefile[locindex] = (BFILE *)-1;
533 		}
534 		tsize += v;
535 		a_out_off += v;
536 	}
537 	/*
538 	 *		Round and assign data segment origins.
539 	 */
540 	datbase = round(tsize, FW);
541 	for (locindex=0; locindex<NLOC; locindex++) {
542 		v = round(usedot[NLOC+locindex].e_xvalue, FW);
543 		usedot[NLOC+locindex].e_xvalue = datbase + dsize;
544 		if (v != 0){
545 			usefile[NLOC + locindex] = (BFILE *)Calloc(1,sizeof(BFILE));
546 			bopen(usefile[NLOC + locindex], a_out_off);
547 		} else {
548 			usefile[NLOC + locindex] = (BFILE *)-1;
549 		}
550 		dsize += v;
551 		a_out_off += v;
552 	}
553 	/*
554 	 *	Assign final values to symbols
555 	 */
556 	hdr.a_bss = dsize;
557 	freezesymtab();		/* this touches hdr.a_bss */
558 	stabfix();
559 	/*
560 	 *	Set up the relocation information "files" to
561 	 *	be zero; outrel takes care of the rest
562 	 */
563 	for (locindex = 0; locindex < NLOC + NLOC; locindex++){
564 		rusefile[locindex] = (struct relbufdesc *)0;
565 	}
566 }
567 
568 build_hdr()
569 {
570 	/*
571 	 *	Except for the text and data relocation sizes,
572 	 *	calculate the final values for the header
573 	 *
574 	 *	Write out the initial copy; we to come
575 	 *	back later and patch up a_trsize and a_drsize,
576 	 *	and overwrite this first version of the header.
577 	 */
578 	hdr.a_magic = MAGIC;
579 	hdr.a_text = tsize;
580 	hdr.a_data = dsize;
581 	hdr.a_bss -= dsize;
582 	hdr.a_syms = sizesymtab();	/* Does not include string pool length */
583 	hdr.a_entry = 0;
584 	hdr.a_trsize = 0;
585 	hdr.a_drsize = 0;
586 
587 	bwrite((char *)&hdr, sizeof(hdr), usefile[0]);
588 }
589 
590 i_pass2()
591 {
592 	if (useVM == 0) {
593 		fclose(tokfile);
594 		tokfile = fopen(tokfilename, "r");
595 		if (tokfile==NULL) {
596 		   yyerror("Bad pass 2 temporary file for reading %s", tokfilename);
597 		   delexit();
598 		}
599 	}
600 	fclose(strfile);
601 	strfile = fopen(strfilename, "r");
602 }
603 
604 pass2()
605 {
606 #ifdef DEBUG
607 	if (debug)
608 		printf("\n\n\n\t\tPASS 2\n\n\n\n");
609 #endif DEBUG
610 	passno = 2;
611 	lineno = 1;
612 	dotp = &usedot[0];
613 	txtfil = usefile[0];	/* already opened (always!) */
614 	relfil = 0;		/* outrel takes care of the rest */
615 	initoutrel();
616 
617 	inittokfile();
618 
619 	yyparse();
620 
621 	closetokfile();
622 }
623 
624 fillsegments()
625 {
626 	int	locindex;
627 	/*
628 	 *	Round text and data segments to FW by appending zeros
629 	 */
630 	for (locindex = 0; locindex < NLOC + NLOC; locindex++) {
631 		if (usefile[locindex]) {
632 			txtfil = usefile[locindex];
633 			dotp = &usedot[locindex];
634 			while (usedot[locindex].e_xvalue & FW)
635 				outb(0);
636 		}
637 	}
638 }
639 
640 reloc_syms()
641 {
642 	u_long	closerelfil();
643 	/*
644 	 *	Move the relocation information to a.out
645 	 *	a_out_off is the offset so far:
646 	 *	exec + text segments + data segments
647 	 */
648 	relocfile = (BFILE *)Calloc(1,sizeof(BFILE));
649 	bopen(relocfile, a_out_off);
650 	a_out_off += closeoutrel(relocfile);
651 
652 	hdr.a_trsize = trsize;
653 	hdr.a_drsize = drsize;
654 	if (readonlydata) {
655 		hdr.a_text += hdr.a_data;
656 		hdr.a_data = 0;
657 		hdr.a_trsize += hdr.a_drsize;
658 		hdr.a_drsize = 0;
659 	}
660 	/*
661 	 *	Output the symbol table and the string pool
662 	 *
663 	 *	We must first rewind the string pool file to its beginning,
664 	 *	in case it was seek'ed into for fetching ascii and asciz
665 	 *	strings.
666 	 */
667 	fseek(strfile, 0, 0);
668 	symwrite(relocfile);
669 }
670 
671 fix_a_out()
672 {
673 	if (lseek(a_out_file->_file, 0L, 0) < 0L)
674 		yyerror("Reposition for header rewrite fails");
675 	if (write(a_out_file->_file, (char *)&hdr, sizeof (struct exec)) < 0)
676 		yyerror("Rewrite of header fails");
677 }
678 
679 delexit()
680 {
681 	delete();
682 	if (passno == 2){
683 		unlink(outfile);
684 	}
685 	exit(1);
686 }
687 
688 delete()
689 {
690 	if (useVM == 0 || tokfilename[0])
691 		unlink(tokfilename);
692 	if (strfilename[0])
693 		unlink(strfilename);
694 }
695 
696 sawabort()
697 {
698 	char	*fillinbuffer();
699 	while (fillinbuffer() != (char *)0)
700 		continue;
701 	delete();
702 	exit(1);	/*although the previous pass will also exit non zero*/
703 }
704 
705 panic(fmt, a1, a2, a3, a4)
706 	char	*fmt;
707 	/*VARARGS 1*/
708 {
709 	yyerror("Assembler panic: bad internal data structure.");
710 	yyerror(fmt, a1, a2, a3, a4);
711 	delete();
712 	abort();
713 }
714