xref: /original-bsd/old/as.vax/asmain.c (revision fbb2a877)
1 /*
2  * Copyright (c) 1982 Regents of the University of California.
3  * All rights reserved.  The Berkeley software License Agreement
4  * specifies the terms and conditions for redistribution.
5  */
6 
7 #ifndef lint
8 char copyright[] =
9 "@(#) Copyright (c) 1982 Regents of the University of California.\n\
10  All rights reserved.\n";
11 #endif not lint
12 
13 #ifndef lint
14 static char sccsid[] = "@(#)asmain.c	5.3 (Berkeley) 10/22/87";
15 #endif not lint
16 
17 #include <stdio.h>
18 #include <ctype.h>
19 #include <signal.h>
20 
21 #include "as.h"
22 #include "assyms.h"
23 #include "asscan.h"
24 #include "asexpr.h"
25 
26 #include <sys/stat.h>
27 
28 #define	unix_lang_name "VAX/UNIX Assembler V10/22/87 5.3"
29 /*
30  *	variables to manage reading the assembly source files
31  */
32 char	*dotsname;	/*the current file name; managed by the parser*/
33 int	lineno;		/*current line number; managed by the parser*/
34 char	**innames;	/*names of the files being assembled*/
35 int	ninfiles;	/*how many interesting files there are*/
36 /*
37  *	Flags settable from the argv process argument list
38  */
39 int	silent = 0;	/*don't complain about any errors*/
40 int	savelabels = 0;	/*write the labels to the a.out file*/
41 int 	d124 = 4;	/*default allocate 4 bytes for unknown pointers*/
42 int 	maxalign = 2;	/*default .align maximum*/
43 int	anyerrs = 0;	/*no errors yet*/
44 int	anywarnings=0;	/*no warnings yet*/
45 int	orgwarn = 0;	/*Bad origins*/
46 int	passno = 1;	/* current pass*/
47 int	jxxxJUMP = 0;	/* in jxxxes that branch too far, use jmp instead of brw */
48 int	readonlydata = 0;	/* initialzed data -> text space */
49 
50 int	nGHnumbers = 0;		/* GH numbers used */
51 int	nGHopcodes = 0;		/* GH opcodes used */
52 int	nnewopcodes = 0;	/* new opcodes used */
53 
54 #ifdef DEBUG
55 int 	debug = 0;
56 int	toktrace = 0;
57 #endif
58 
59 int	useVM =	0;
60 
61 char	*endcore;	/*where to get more symbol space*/
62 
63 /*
64  *	Managers of the a.out file.
65  */
66 struct	exec	hdr;
67 #define	MAGIC	0407
68 u_long	tsize;		/* total text size */
69 u_long	dsize;		/* total data size */
70 u_long	datbase;	/* base of the data segment */
71 u_long	trsize;		/* total text relocation size */
72 u_long	drsize;		/* total data relocation size */
73 
74 /*
75  *	Information about the current segment is accumulated in
76  *	usedot; the most important information stored is the
77  *	accumulated size of each of the text and data segments
78  *
79  *	dotp points to the correct usedot expression for the current segment
80  */
81 struct	exp	usedot[NLOC+NLOC];	/* info about all segments */
82 struct	exp	*dotp;			/* data/text location pointer */
83 /*
84  *	The inter pass temporary token file is opened and closed by stdio, but
85  *	is written to using direct read/write, as the temporary file
86  *	is composed of buffers exactly BUFSIZ long.
87  */
88 FILE	*tokfile;			/* interpass communication file */
89 char	tokfilename[TNAMESIZE];
90 /*
91  *	The string file is the string table
92  *	cat'ed to the end of the built up a.out file
93  */
94 FILE	*strfile;			/* interpass string file */
95 char	strfilename[TNAMESIZE];
96 int	strfilepos = 0;			/* position within the string file */
97 /*
98  *	a.out is created during the second pass.
99  *	It is opened by stdio, but is filled with the parallel
100  *	block I/O library
101  */
102 char	*outfile = "a.out";
103 FILE	*a_out_file;
104 off_t	a_out_off;			/* cumulative offsets for segments */
105 /*
106  *	The logical files containing the assembled data for each of
107  *	the text and data segments are
108  *	managed by the parallel block I/O library.
109  *	a.out is logically opened in many places at once to
110  *	receive the assembled data from the various segments as
111  *	it all trickles in, but is physically opened only once
112  *	to minimize file overhead.
113  */
114 BFILE	*usefile[NLOC+NLOC];		/* text/data files */
115 BFILE	*txtfil;			/* current text/data file */
116 /*
117  *	Relocation information is accumulated seperately for each
118  *	segment.  This is required by the old loader (from BTL),
119  *	but not by the new loader (Bill Joy).
120  *
121  *	However, the size of the relocation information can not be computed
122  *	during or after the 1st pass because the ''absoluteness' of values
123  *	is unknown until all locally declared symbols have been seen.
124  *	Thus, the size of the relocation information is only
125  *	known after the second pass is finished.
126  *	This obviates the use of the block I/O
127  *	library, which requires knowing the exact offsets in a.out.
128  *
129  *	So, we save the relocation information internally (we don't
130  *	go to internal files to minimize overhead).
131  *
132  *	Empirically, we studied 259 files composing the system,
133  *	two compilers and a compiler generator: (all of which have
134  *	fairly large source files)
135  *
136  *	Number of files = 259
137  *		Number of non zero text reloc files: 233
138  *		Number of non zero data reloc files: 53
139  *	Average text relocation = 889
140  *	Average data relocation = 346
141  *	Number of files > BUFSIZ text relocation = 71
142  *	Number of files > BUFSIZ data relocation = 6
143  *
144  *	For compiled C code, there is usually one text segment and two
145  *	data segments; we see that allocating our own buffers and
146  *	doing our internal handling of relocation information will,
147  *	on the average, not use more memory than taken up by the buffers
148  *	allocated for doing file I/O in parallel to a number of file.
149  *
150  *	If we are assembling with the -V option, we
151  *	use the left over token buffers from the 2nd pass,
152  *	otherwise, we create our own.
153  *
154  *	When the 2nd pass is complete, closeoutrel flushes the token
155  *	buffers out to a BFILE.
156  *
157  *	The internals to relbufdesc are known only in assyms.c
158  *
159  *	outrel constructs the relocation information.
160  *	closeoutrel flushes the relocation information to relfil.
161  */
162 struct	relbufdesc	*rusefile[NLOC+NLOC];
163 struct	relbufdesc 	*relfil;	/* un concatnated relocation info */
164 BFILE	*relocfile;			/* concatnated relocation info */
165 /*
166  *	Once the relocation information has been written,
167  *	we can write out the symbol table using the Block I/O
168  *	mechanisms, as we once again know the offsets into
169  *	the a.out file.
170  *
171  *	We use relfil to output the symbol table information.
172  */
173 char	*tmpdirprefix = "/tmp/";
174 int delexit();
175 
176 main(argc, argv)
177 	int	argc;
178 	char 	**argv;
179 {
180 	char	*sbrk();
181 
182 	tokfilename[0] = 0;
183 	strfilename[0] = 0;
184 	endcore = sbrk(0);
185 
186 	argprocess(argc, argv);		/* process argument lists */
187 	if (anyerrs) exit(1);
188 
189 	initialize();
190 	zeroorigins();			/* set origins to zero */
191 	zerolocals();			/* fix local label counters */
192 
193 	i_pass1();			/* open temp files, etc */
194 	pass1();			/* first pass through .s files */
195 	testlocals();			/* check for undefined locals */
196 	if (anyerrs) delexit();
197 
198 	pass1_5();			/* resolve jxxx */
199 	if (anyerrs) delexit();
200 
201 	open_a_out();			/* open a.out */
202 	roundsegments();		/* round segments to FW */
203 	build_hdr();			/* build initial header, and output */
204 
205 	i_pass2();			/* reopen temporary file, etc */
206 	pass2();			/* second pass through the virtual .s */
207 	if (anyerrs) delexit();
208 
209 	fillsegments();			/* fill segments with 0 to FW */
210 	reloc_syms();			/* dump relocation and symbol table */
211 
212 	delete();			/* remove tmp file */
213 	bflush();			/* close off block I/O view of a.out */
214 	fix_a_out();			/* add in text and data reloc counts */
215 
216 	if (anyerrs == 0 && orgwarn)
217 		yyerror("Caution: absolute origins.\n");
218 
219 	if (nGHnumbers)
220 		yywarning("Caution: G or H format floating point numbers");
221 	if (nGHopcodes)
222 		yywarning("Caution: G or H format floating point operators");
223 	if (nnewopcodes)
224 		yywarning("Caution: New Opcodes");
225 	if (nGHnumbers || nGHopcodes || nnewopcodes)
226 		yywarning("These are not defined for all implementations of the VAX architecture.\n");
227 
228 	exit(anyerrs != 0);
229 }
230 
231 argprocess(argc, argv)
232 	int	argc;
233 	char	*argv[];
234 {
235 	register	char	*cp;
236 
237 	ninfiles = 0;
238 	silent = 0;
239 #ifdef DEBUG
240 	debug = 0;
241 #endif
242 	innames = (char **)ClearCalloc(argc+1, sizeof (innames[0]));
243 	dotsname = "<argv error>";
244 	while (argc > 1) {
245 		if (argv[1][0] != '-')
246 			innames[ninfiles++] = argv[1];
247 		else {
248 			cp = argv[1] + 1;
249 			/*
250 			 *	We can throw away single minus signs, so
251 			 *	that make scripts for the PDP 11 assembler work
252 			 *	on this assembler too
253 			 */
254 			while (*cp){
255 				switch(*cp++){
256 				 default:
257 					yyerror("Unknown flag: %c", *--cp);
258 					cp++;
259 					break;
260 				 case 'v':
261 					selfwhat(stdout);
262 					exit(1);
263 				 case 'd':
264 					d124 = *cp++ - '0';
265 					if ( (d124 != 1) && (d124 != 2) &&
266 					     (d124 != 4)){
267 						yyerror("-d[124] only");
268 						exit(1);
269 					}
270 					break;
271 				 case 'a':
272 					maxalign = atoi(cp+1);
273 					for (cp++; isdigit(*cp); cp++)
274 						/*VOID*/;
275 					if ( (maxalign > 16) || (maxalign < 0)){
276 						yyerror("-a: 0<=align<=16");
277 						exit(1);
278 					}
279 					break;
280 				 case 'o':
281 					if (argc < 3){
282 						yyerror("-o what???");
283 						exit(1);
284 					}
285 					outfile = argv[2];
286 				   bumpone:
287 					argc -= 2;
288 					argv += 2;
289 					goto nextarg;
290 
291 				 case 't':
292 					if (argc < 3){
293 						yyerror("-t what???");
294 						exit(1);
295 					}
296 					tmpdirprefix = argv[2];
297 					goto bumpone;
298 
299 				 case 'V':
300 					useVM = 1;
301 					break;
302 				 case 'W':
303 					silent = 1;
304 					break;
305 				 case 'L':
306 					savelabels = 1;
307 					break;
308 				 case 'J':
309 					jxxxJUMP = 1;
310 					break;
311 #ifdef DEBUG
312 				 case 'D':
313 					debug = 1;
314 					break;
315 				 case 'T':
316 					toktrace = 1;
317 					break;
318 #endif
319 				 case 'R':
320 					readonlydata = 1;
321 					break;
322 				}	/*end of the switch*/
323 			}	/*end of pulling out all arguments*/
324 		}	/*end of a flag argument*/
325 		--argc; ++argv;
326 	   nextarg:;
327 	}
328 	/* innames[ninfiles] = 0; */
329 }
330 /*
331  *	poke through the data space and find all sccs identifiers.
332  *	We assume:
333  *	a) that extern char **environ; is the first thing in the bss
334  *	segment (true, if one is using the new version of cmgt.crt0.c)
335  *	b) that the sccsid's have not been put into text space.
336  */
337 selfwhat(place)
338 	FILE	*place;
339 {
340 	extern	char **environ;
341 	register	char	*ub;
342 	register	char *cp;
343 	register	char	*pat;
344 	char	*sbrk();
345 
346 	for (cp = (char *)&environ, ub = sbrk(0); cp < ub; cp++){
347 		if (cp[0] != '@') continue;
348 		if (cp[1] != '(') continue;
349 		if (cp[2] != '#') continue;
350 		if (cp[3] != ')') continue;
351 		fputc('\t', place);
352 		for (cp += 4; cp < ub; cp++){
353 			if (*cp == 0) break;
354 			if (*cp == '>') break;
355 			if (*cp == '\n') break;
356 			fputc(*cp, place);
357 		}
358 		fputc('\n', place);
359 	}
360 }
361 
362 initialize()
363 {
364 	if (signal(SIGINT, SIG_IGN) != SIG_IGN)
365 		signal(SIGINT, delexit);
366 	/*
367 	 *	Install symbols in the table
368 	 */
369 	symtabinit();
370 	syminstall();
371 	/*
372 	 *	Build the expression parser accelerator token sets
373 	 */
374 	buildtokensets();
375 }
376 
377 zeroorigins()
378 {
379 	register	int	locindex;
380 	/*
381 	 *	Mark usedot: the first NLOC slots are for named text segments,
382 	 *	the next for named data segments.
383 	 */
384 	for (locindex = 0; locindex < NLOC; locindex++){
385 		usedot[locindex].e_xtype = XTEXT;
386 		usedot[NLOC + locindex].e_xtype = XDATA;
387 		usedot[locindex].e_xvalue = 0;
388 		usedot[NLOC + locindex].e_xvalue = 0;
389 	}
390 }
391 
392 zerolocals()
393 {
394 	register	int	i;
395 
396 	for (i = 0; i <= 9; i++) {
397 		lgensym[i] = 1;
398 		genref[i] = 0;
399 	}
400 }
401 
402 i_pass1()
403 {
404 	FILE	*tempopen();
405 	if (useVM == 0)
406 		tokfile = tempopen(tokfilename, "T");
407 	strfile = tempopen(strfilename, "S");
408 	/*
409 	 *	write out the string length.
410 	 *	This will be overwritten when the
411 	 *	strings are tacked onto the growing a.out file
412 	 */
413 	strfilepos = sizeof(int);
414 	fwrite(&strfilepos, sizeof(int), 1, strfile);
415 
416 	inittokfile();
417 	initijxxx();
418 }
419 
420 FILE *tempopen(tname, part)
421 	char	*tname;
422 	char	*part;
423 {
424 	FILE	*file;
425 	(void)sprintf(tname, "%s%sas%s%05d",
426 		tmpdirprefix,
427 		(tmpdirprefix[strlen(tmpdirprefix)-1] != '/') ? "/" : "",
428 		part,
429 		getpid());
430 	file = fopen(tname, "w");
431 	if (file == NULL) {
432 		yyerror("Bad pass 1 temporary file for writing %s", tname);
433 		delexit();
434 	}
435 	return(file);
436 }
437 
438 pass1()
439 {
440 	register	int	i;
441 
442 	passno = 1;
443 	dotp = &usedot[0];
444 	txtfil = (BFILE *)0;
445 	relfil = (struct relbufdesc *)0;
446 
447 	if (ninfiles == 0){		/*take the input from stdin directly*/
448 		lineno = 1;
449 		dotsname = "<stdin>";
450 
451 		yyparse();
452 	} else {		/*we have the names tanked*/
453 		for (i = 0; i < ninfiles; i++){
454 			new_dot_s(innames[i]);
455 			if (freopen(innames[i], "r", stdin) == NULL) {
456 				yyerror( "Can't open source file %s\n",
457 					innames[i]);
458 				exit(2);
459 			}
460 			/* stdio is NOT used to read the input characters */
461 			/* we use read directly, into our own buffers */
462 			yyparse();
463 		}
464 	}
465 
466 	closetokfile();		/*kick out the last buffered intermediate text*/
467 }
468 
469 testlocals()
470 {
471 	register	int	i;
472 	for (i = 0; i <= 9; i++) {
473 		if (genref[i])
474 			yyerror("Reference to undefined local label %df", i);
475 		lgensym[i] = 1;
476 		genref[i] = 0;
477 	}
478 }
479 
480 pass1_5()
481 {
482 	sortsymtab();
483 #ifdef DEBUG
484 	if (debug) dumpsymtab();
485 #endif
486 	jxxxfix();
487 #ifdef DEBUG
488 	if (debug) dumpsymtab();
489 #endif
490 }
491 
492 open_a_out()
493 {
494 	struct stat stb;
495 
496 	/*
497 	 *	Open up the a.out file now, and get set to build
498 	 *	up offsets into it for all of the various text,data
499 	 *	text relocation and data relocation segments.
500 	 */
501 	a_out_file = fopen(outfile, "w");
502 	if (a_out_file == NULL) {
503 		yyerror("Cannot create %s", outfile);
504 		delexit();
505 	}
506 	biofd = a_out_file->_file;
507 	fstat(biofd, &stb);
508 	biobufsize = stb.st_blksize;
509 	a_out_off = 0;
510 }
511 
512 roundsegments()
513 {
514 	register	int	locindex;
515 	register	long	v;
516 	/*
517 	 *	round and assign text segment origins
518 	 *	the exec header always goes in usefile[0]
519 	 */
520 	tsize = 0;
521 	for (locindex=0; locindex<NLOC; locindex++) {
522 		v = round(usedot[locindex].e_xvalue, FW);
523 		usedot[locindex].e_xvalue = tsize;
524 		if ((locindex == 0) || (v != 0) ){
525 			usefile[locindex] = (BFILE *)Calloc(1, sizeof(BFILE));
526 			bopen(usefile[locindex], a_out_off);
527 			if (locindex == 0)
528 				a_out_off = sizeof (struct exec);
529 		} else {
530 			usefile[locindex] = (BFILE *)-1;
531 		}
532 		tsize += v;
533 		a_out_off += v;
534 	}
535 	/*
536 	 *		Round and assign data segment origins.
537 	 */
538 	datbase = round(tsize, FW);
539 	for (locindex=0; locindex<NLOC; locindex++) {
540 		v = round(usedot[NLOC+locindex].e_xvalue, FW);
541 		usedot[NLOC+locindex].e_xvalue = datbase + dsize;
542 		if (v != 0){
543 			usefile[NLOC + locindex] = (BFILE *)Calloc(1,sizeof(BFILE));
544 			bopen(usefile[NLOC + locindex], a_out_off);
545 		} else {
546 			usefile[NLOC + locindex] = (BFILE *)-1;
547 		}
548 		dsize += v;
549 		a_out_off += v;
550 	}
551 	/*
552 	 *	Assign final values to symbols
553 	 */
554 	hdr.a_bss = dsize;
555 	freezesymtab();		/* this touches hdr.a_bss */
556 	stabfix();
557 	/*
558 	 *	Set up the relocation information "files" to
559 	 *	be zero; outrel takes care of the rest
560 	 */
561 	for (locindex = 0; locindex < NLOC + NLOC; locindex++){
562 		rusefile[locindex] = (struct relbufdesc *)0;
563 	}
564 }
565 
566 build_hdr()
567 {
568 	/*
569 	 *	Except for the text and data relocation sizes,
570 	 *	calculate the final values for the header
571 	 *
572 	 *	Write out the initial copy; we to come
573 	 *	back later and patch up a_trsize and a_drsize,
574 	 *	and overwrite this first version of the header.
575 	 */
576 	hdr.a_magic = MAGIC;
577 	hdr.a_text = tsize;
578 	hdr.a_data = dsize;
579 	hdr.a_bss -= dsize;
580 	hdr.a_syms = sizesymtab();	/* Does not include string pool length */
581 	hdr.a_entry = 0;
582 	hdr.a_trsize = 0;
583 	hdr.a_drsize = 0;
584 
585 	bwrite((char *)&hdr, sizeof(hdr), usefile[0]);
586 }
587 
588 i_pass2()
589 {
590 	if (useVM == 0) {
591 		fclose(tokfile);
592 		tokfile = fopen(tokfilename, "r");
593 		if (tokfile==NULL) {
594 		   yyerror("Bad pass 2 temporary file for reading %s", tokfilename);
595 		   delexit();
596 		}
597 	}
598 	fclose(strfile);
599 	strfile = fopen(strfilename, "r");
600 }
601 
602 pass2()
603 {
604 #ifdef DEBUG
605 	if (debug)
606 		printf("\n\n\n\t\tPASS 2\n\n\n\n");
607 #endif DEBUG
608 	passno = 2;
609 	lineno = 1;
610 	dotp = &usedot[0];
611 	txtfil = usefile[0];	/* already opened (always!) */
612 	relfil = 0;		/* outrel takes care of the rest */
613 	initoutrel();
614 
615 	inittokfile();
616 
617 	yyparse();
618 
619 	closetokfile();
620 }
621 
622 fillsegments()
623 {
624 	int	locindex;
625 	/*
626 	 *	Round text and data segments to FW by appending zeros
627 	 */
628 	for (locindex = 0; locindex < NLOC + NLOC; locindex++) {
629 		if (usefile[locindex]) {
630 			txtfil = usefile[locindex];
631 			dotp = &usedot[locindex];
632 			while (usedot[locindex].e_xvalue & FW)
633 				outb(0);
634 		}
635 	}
636 }
637 
638 reloc_syms()
639 {
640 	u_long	closerelfil();
641 	/*
642 	 *	Move the relocation information to a.out
643 	 *	a_out_off is the offset so far:
644 	 *	exec + text segments + data segments
645 	 */
646 	relocfile = (BFILE *)Calloc(1,sizeof(BFILE));
647 	bopen(relocfile, a_out_off);
648 	a_out_off += closeoutrel(relocfile);
649 
650 	hdr.a_trsize = trsize;
651 	hdr.a_drsize = drsize;
652 	if (readonlydata) {
653 		hdr.a_text += hdr.a_data;
654 		hdr.a_data = 0;
655 		hdr.a_trsize += hdr.a_drsize;
656 		hdr.a_drsize = 0;
657 	}
658 	/*
659 	 *	Output the symbol table and the string pool
660 	 *
661 	 *	We must first rewind the string pool file to its beginning,
662 	 *	in case it was seek'ed into for fetching ascii and asciz
663 	 *	strings.
664 	 */
665 	fseek(strfile, 0, 0);
666 	symwrite(relocfile);
667 }
668 
669 fix_a_out()
670 {
671 	if (lseek(a_out_file->_file, 0L, 0) < 0L)
672 		yyerror("Reposition for header rewrite fails");
673 	if (write(a_out_file->_file, (char *)&hdr, sizeof (struct exec)) < 0)
674 		yyerror("Rewrite of header fails");
675 }
676 
677 delexit()
678 {
679 	delete();
680 	if (passno == 2){
681 		unlink(outfile);
682 	}
683 	exit(1);
684 }
685 
686 delete()
687 {
688 	if (useVM == 0 || tokfilename[0])
689 		unlink(tokfilename);
690 	if (strfilename[0])
691 		unlink(strfilename);
692 }
693 
694 sawabort()
695 {
696 	char	*fillinbuffer();
697 	while (fillinbuffer() != (char *)0)
698 		continue;
699 	delete();
700 	exit(1);	/*although the previous pass will also exit non zero*/
701 }
702 
703 panic(fmt, a1, a2, a3, a4)
704 	char	*fmt;
705 	/*VARARGS 1*/
706 {
707 	yyerror("Assembler panic: bad internal data structure.");
708 	yyerror(fmt, a1, a2, a3, a4);
709 	delete();
710 	abort();
711 }
712