xref: /original-bsd/old/as.vax/asmain.c (revision fb7939e6)
1 /*
2  *	Copyright (c) 1982 Regents of the University of California
3  */
4 #ifndef lint
5 static char sccsid[] = "@(#)asmain.c 4.13 06/30/83";
6 #endif not lint
7 
8 #include <stdio.h>
9 #include <ctype.h>
10 #include <signal.h>
11 
12 #include "as.h"
13 #include "assyms.h"
14 #include "asscan.h"
15 #include "asexpr.h"
16 
17 #define	unix_lang_name "VAX/UNIX Assembler V06/30/83 4.13"
18 /*
19  *	variables to manage reading the assembly source files
20  */
21 char	*dotsname;	/*the current file name; managed by the parser*/
22 int	lineno;		/*current line number; managed by the parser*/
23 char	**innames;	/*names of the files being assembled*/
24 int	ninfiles;	/*how many interesting files there are*/
25 /*
26  *	Flags settable from the argv process argument list
27  */
28 int	silent = 0;	/*don't complain about any errors*/
29 int	savelabels = 0;	/*write the labels to the a.out file*/
30 int 	d124 = 4;	/*default allocate 4 bytes for unknown pointers*/
31 int	anyerrs = 0;	/*no errors yet*/
32 int	anywarnings=0;	/*no warnings yet*/
33 int	orgwarn = 0;	/*Bad origins*/
34 int	passno = 1;	/* current pass*/
35 int	jxxxJUMP = 0;	/* in jxxxes that branch too far, use jmp instead of brw */
36 int	readonlydata = 0;	/* initialzed data -> text space */
37 
38 int	nGHnumbers = 0;		/* GH numbers used */
39 int	nGHopcodes = 0;		/* GH opcodes used */
40 int	nnewopcodes = 0;	/* new opcodes used */
41 
42 #ifdef DEBUG
43 int 	debug = 0;
44 int	toktrace = 0;
45 #endif
46 
47 int	useVM =	0;
48 
49 char	*endcore;	/*where to get more symbol space*/
50 
51 /*
52  *	Managers of the a.out file.
53  */
54 struct	exec	hdr;
55 #define	MAGIC	0407
56 u_long	tsize;		/* total text size */
57 u_long	dsize;		/* total data size */
58 u_long	datbase;	/* base of the data segment */
59 u_long	trsize;		/* total text relocation size */
60 u_long	drsize;		/* total data relocation size */
61 
62 /*
63  *	Information about the current segment is accumulated in
64  *	usedot; the most important information stored is the
65  *	accumulated size of each of the text and data segments
66  *
67  *	dotp points to the correct usedot expression for the current segment
68  */
69 struct	exp	usedot[NLOC+NLOC];	/* info about all segments */
70 struct	exp	*dotp;			/* data/text location pointer */
71 /*
72  *	The inter pass temporary token file is opened and closed by stdio, but
73  *	is written to using direct read/write, as the temporary file
74  *	is composed of buffers exactly BUFSIZ long.
75  */
76 FILE	*tokfile;			/* interpass communication file */
77 char	tokfilename[TNAMESIZE];
78 /*
79  *	The string file is the string table
80  *	cat'ed to the end of the built up a.out file
81  */
82 FILE	*strfile;			/* interpass string file */
83 char	strfilename[TNAMESIZE];
84 int	strfilepos = 0;			/* position within the string file */
85 /*
86  *	a.out is created during the second pass.
87  *	It is opened by stdio, but is filled with the parallel
88  *	block I/O library
89  */
90 char	*outfile = "a.out";
91 FILE	*a_out_file;
92 off_t	a_out_off;			/* cumulative offsets for segments */
93 /*
94  *	The logical files containing the assembled data for each of
95  *	the text and data segments are
96  *	managed by the parallel block I/O library.
97  *	a.out is logically opened in many places at once to
98  *	receive the assembled data from the various segments as
99  *	it all trickles in, but is physically opened only once
100  *	to minimize file overhead.
101  */
102 BFILE	*usefile[NLOC+NLOC];		/* text/data files */
103 BFILE	*txtfil;			/* current text/data file */
104 /*
105  *	Relocation information is accumulated seperately for each
106  *	segment.  This is required by the old loader (from BTL),
107  *	but not by the new loader (Bill Joy).
108  *
109  *	However, the size of the relocation information can not be computed
110  *	during or after the 1st pass because the ''absoluteness' of values
111  *	is unknown until all locally declared symbols have been seen.
112  *	Thus, the size of the relocation information is only
113  *	known after the second pass is finished.
114  *	This obviates the use of the block I/O
115  *	library, which requires knowing the exact offsets in a.out.
116  *
117  *	So, we save the relocation information internally (we don't
118  *	go to internal files to minimize overhead).
119  *
120  *	Empirically, we studied 259 files composing the system,
121  *	two compilers and a compiler generator: (all of which have
122  *	fairly large source files)
123  *
124  *	Number of files = 259
125  *		Number of non zero text reloc files: 233
126  *		Number of non zero data reloc files: 53
127  *	Average text relocation = 889
128  *	Average data relocation = 346
129  *	Number of files > BUFSIZ text relocation = 71
130  *	Number of files > BUFSIZ data relocation = 6
131  *
132  *	For compiled C code, there is usually one text segment and two
133  *	data segments; we see that allocating our own buffers and
134  *	doing our internal handling of relocation information will,
135  *	on the average, not use more memory than taken up by the buffers
136  *	allocated for doing file I/O in parallel to a number of file.
137  *
138  *	If we are assembling with the -V option, we
139  *	use the left over token buffers from the 2nd pass,
140  *	otherwise, we create our own.
141  *
142  *	When the 2nd pass is complete, closeoutrel flushes the token
143  *	buffers out to a BFILE.
144  *
145  *	The internals to relbufdesc are known only in assyms.c
146  *
147  *	outrel constructs the relocation information.
148  *	closeoutrel flushes the relocation information to relfil.
149  */
150 struct	relbufdesc	*rusefile[NLOC+NLOC];
151 struct	relbufdesc 	*relfil;	/* un concatnated relocation info */
152 BFILE	*relocfile;			/* concatnated relocation info */
153 /*
154  *	Once the relocation information has been written,
155  *	we can write out the symbol table using the Block I/O
156  *	mechanisms, as we once again know the offsets into
157  *	the a.out file.
158  *
159  *	We use relfil to output the symbol table information.
160  */
161 char	*tmpdirprefix = "/tmp/";
162 int delexit();
163 
164 main(argc, argv)
165 	int	argc;
166 	char 	**argv;
167 {
168 	char	*sbrk();
169 
170 	tokfilename[0] = 0;
171 	strfilename[0] = 0;
172 	endcore = sbrk(0);
173 
174 	argprocess(argc, argv);		/* process argument lists */
175 	if (anyerrs) exit(1);
176 
177 	initialize();
178 	zeroorigins();			/* set origins to zero */
179 	zerolocals();			/* fix local label counters */
180 
181 	i_pass1();			/* open temp files, etc */
182 	pass1();			/* first pass through .s files */
183 	testlocals();			/* check for undefined locals */
184 	if (anyerrs) delexit();
185 
186 	pass1_5();			/* resolve jxxx */
187 	if (anyerrs) delexit();
188 
189 	open_a_out();			/* open a.out */
190 	roundsegments();		/* round segments to FW */
191 	build_hdr();			/* build initial header, and output */
192 
193 	i_pass2();			/* reopen temporary file, etc */
194 	pass2();			/* second pass through the virtual .s */
195 	if (anyerrs) delexit();
196 
197 	fillsegments();			/* fill segments with 0 to FW */
198 	reloc_syms();			/* dump relocation and symbol table */
199 
200 	delete();			/* remove tmp file */
201 	bflush();			/* close off block I/O view of a.out */
202 	fix_a_out();			/* add in text and data reloc counts */
203 
204 	if (anyerrs == 0 && orgwarn)
205 		yyerror("Caution: absolute origins.\n");
206 
207 	if (nGHnumbers)
208 		yywarning("Caution: G or H format floating point numbers");
209 	if (nGHopcodes)
210 		yywarning("Caution: G or H format floating point operators");
211 	if (nnewopcodes)
212 		yywarning("Caution: New Opcodes");
213 	if (nGHnumbers || nGHopcodes || nnewopcodes)
214 		yywarning("These are not defined for all implementations of the VAX architecture.\n");
215 
216 	exit(anyerrs != 0);
217 }
218 
219 argprocess(argc, argv)
220 	int	argc;
221 	char	*argv[];
222 {
223 	register	char	*cp;
224 
225 	ninfiles = 0;
226 	silent = 0;
227 #ifdef DEBUG
228 	debug = 0;
229 #endif
230 	innames = (char **)ClearCalloc(argc+1, sizeof (innames[0]));
231 	dotsname = "<argv error>";
232 	while (argc > 1) {
233 		if (argv[1][0] != '-')
234 			innames[ninfiles++] = argv[1];
235 		else {
236 			cp = argv[1] + 1;
237 			/*
238 			 *	We can throw away single minus signs, so
239 			 *	that make scripts for the PDP 11 assembler work
240 			 *	on this assembler too
241 			 */
242 			while (*cp){
243 				switch(*cp++){
244 				 default:
245 					yyerror("Unknown flag: %c", *--cp);
246 					cp++;
247 					break;
248 				 case 'v':
249 					selfwhat(stdout);
250 					exit(1);
251 				 case 'd':
252 					d124 = *cp++ - '0';
253 					if ( (d124 != 1) && (d124 != 2) &&
254 					     (d124 != 4)){
255 						yyerror("-d[124] only");
256 						exit(1);
257 					}
258 					break;
259 				 case 'o':
260 					if (argc < 3){
261 						yyerror("-o what???");
262 						exit(1);
263 					}
264 					outfile = argv[2];
265 				   bumpone:
266 					argc -= 2;
267 					argv += 2;
268 					goto nextarg;
269 
270 				 case 't':
271 					if (argc < 3){
272 						yyerror("-t what???");
273 						exit(1);
274 					}
275 					tmpdirprefix = argv[2];
276 					goto bumpone;
277 
278 				 case 'V':
279 					useVM = 1;
280 					break;
281 				 case 'W':
282 					silent = 1;
283 					break;
284 				 case 'L':
285 					savelabels = 1;
286 					break;
287 				 case 'J':
288 					jxxxJUMP = 1;
289 					break;
290 #ifdef DEBUG
291 				 case 'D':
292 					debug = 1;
293 					break;
294 				 case 'T':
295 					toktrace = 1;
296 					break;
297 #endif
298 				 case 'R':
299 					readonlydata = 1;
300 					break;
301 				}	/*end of the switch*/
302 			}	/*end of pulling out all arguments*/
303 		}	/*end of a flag argument*/
304 		--argc; ++argv;
305 	   nextarg:;
306 	}
307 	/* innames[ninfiles] = 0; */
308 }
309 /*
310  *	poke through the data space and find all sccs identifiers.
311  *	We assume:
312  *	a) that extern char **environ; is the first thing in the bss
313  *	segment (true, if one is using the new version of cmgt.crt0.c)
314  *	b) that the sccsid's have not been put into text space.
315  */
316 selfwhat(place)
317 	FILE	*place;
318 {
319 	extern	char **environ;
320 	register	char	*ub;
321 	register	char *cp;
322 	register	char	*pat;
323 	char	*sbrk();
324 
325 	for (cp = (char *)&environ, ub = sbrk(0); cp < ub; cp++){
326 		if (cp[0] != '@') continue;
327 		if (cp[1] != '(') continue;
328 		if (cp[2] != '#') continue;
329 		if (cp[3] != ')') continue;
330 		fputc('\t', place);
331 		for (cp += 4; cp < ub; cp++){
332 			if (*cp == 0) break;
333 			if (*cp == '>') break;
334 			if (*cp == '\n') break;
335 			fputc(*cp, place);
336 		}
337 		fputc('\n', place);
338 	}
339 }
340 
341 initialize()
342 {
343 	if (signal(SIGINT, SIG_IGN) != SIG_IGN)
344 		signal(SIGINT, delexit);
345 	/*
346 	 *	Install symbols in the table
347 	 */
348 	symtabinit();
349 	syminstall();
350 	/*
351 	 *	Build the expression parser accelerator token sets
352 	 */
353 	buildtokensets();
354 }
355 
356 zeroorigins()
357 {
358 	register	int	locindex;
359 	/*
360 	 *	Mark usedot: the first NLOC slots are for named text segments,
361 	 *	the next for named data segments.
362 	 */
363 	for (locindex = 0; locindex < NLOC; locindex++){
364 		usedot[locindex].e_xtype = XTEXT;
365 		usedot[NLOC + locindex].e_xtype = XDATA;
366 		usedot[locindex].e_xvalue = 0;
367 		usedot[NLOC + locindex].e_xvalue = 0;
368 	}
369 }
370 
371 zerolocals()
372 {
373 	register	int	i;
374 
375 	for (i = 0; i <= 9; i++) {
376 		lgensym[i] = 1;
377 		genref[i] = 0;
378 	}
379 }
380 
381 i_pass1()
382 {
383 	FILE	*tempopen();
384 	if (useVM == 0)
385 		tokfile = tempopen(tokfilename, "T");
386 	strfile = tempopen(strfilename, "S");
387 	/*
388 	 *	write out the string length.
389 	 *	This will be overwritten when the
390 	 *	strings are tacked onto the growing a.out file
391 	 */
392 	strfilepos = sizeof(int);
393 	fwrite(&strfilepos, sizeof(int), 1, strfile);
394 
395 	inittokfile();
396 	initijxxx();
397 }
398 
399 FILE *tempopen(tname, part)
400 	char	*tname;
401 	char	*part;
402 {
403 	FILE	*file;
404 	sprintf(tname, "%s%sas%s%05d",
405 		tmpdirprefix,
406 		(tmpdirprefix[strlen(tmpdirprefix)-1] != '/') ? "/" : 0,
407 		part,
408 		getpid());
409 	file = fopen(tname, "w");
410 	if (file == NULL) {
411 		yyerror("Bad pass 1 temporary file for writing %s", tname);
412 		delexit();
413 	}
414 	return(file);
415 }
416 
417 pass1()
418 {
419 	register	int	i;
420 
421 	passno = 1;
422 	dotp = &usedot[0];
423 	txtfil = (BFILE *)0;
424 	relfil = (struct relbufdesc *)0;
425 
426 	if (ninfiles == 0){		/*take the input from stdin directly*/
427 		lineno = 1;
428 		dotsname = "<stdin>";
429 
430 		yyparse();
431 	} else {		/*we have the names tanked*/
432 		for (i = 0; i < ninfiles; i++){
433 			new_dot_s(innames[i]);
434 			if (freopen(innames[i], "r", stdin) == NULL) {
435 				yyerror( "Can't open source file %s\n",
436 					innames[i]);
437 				exit(2);
438 			}
439 			/* stdio is NOT used to read the input characters */
440 			/* we use read directly, into our own buffers */
441 			yyparse();
442 		}
443 	}
444 
445 	closetokfile();		/*kick out the last buffered intermediate text*/
446 }
447 
448 testlocals()
449 {
450 	register	int	i;
451 	for (i = 0; i <= 9; i++) {
452 		if (genref[i])
453 			yyerror("Reference to undefined local label %df", i);
454 		lgensym[i] = 1;
455 		genref[i] = 0;
456 	}
457 }
458 
459 pass1_5()
460 {
461 	sortsymtab();
462 #ifdef DEBUG
463 	if (debug) dumpsymtab();
464 #endif
465 	jxxxfix();
466 #ifdef DEBUG
467 	if (debug) dumpsymtab();
468 #endif
469 }
470 
471 open_a_out()
472 {
473 	/*
474 	 *	Open up the a.out file now, and get set to build
475 	 *	up offsets into it for all of the various text,data
476 	 *	text relocation and data relocation segments.
477 	 */
478 	a_out_file = fopen(outfile, "w");
479 	if (a_out_file == NULL) {
480 		yyerror("Cannot create %s", outfile);
481 		delexit();
482 	}
483 	biofd = a_out_file->_file;
484 	a_out_off = 0;
485 }
486 
487 roundsegments()
488 {
489 	register	int	locindex;
490 	register	long	v;
491 	/*
492 	 *	round and assign text segment origins
493 	 *	the exec header always goes in usefile[0]
494 	 */
495 	tsize = 0;
496 	for (locindex=0; locindex<NLOC; locindex++) {
497 		v = round(usedot[locindex].e_xvalue, FW);
498 		usedot[locindex].e_xvalue = tsize;
499 		if ((locindex == 0) || (v != 0) ){
500 			usefile[locindex] = (BFILE *)Calloc(1, sizeof(BFILE));
501 			bopen(usefile[locindex], a_out_off);
502 			if (locindex == 0)
503 				a_out_off = sizeof (struct exec);
504 		} else {
505 			usefile[locindex] = (BFILE *)-1;
506 		}
507 		tsize += v;
508 		a_out_off += v;
509 	}
510 	/*
511 	 *		Round and assign data segment origins.
512 	 */
513 	datbase = round(tsize, FW);
514 	for (locindex=0; locindex<NLOC; locindex++) {
515 		v = round(usedot[NLOC+locindex].e_xvalue, FW);
516 		usedot[NLOC+locindex].e_xvalue = datbase + dsize;
517 		if (v != 0){
518 			usefile[NLOC + locindex] = (BFILE *)Calloc(1,sizeof(BFILE));
519 			bopen(usefile[NLOC + locindex], a_out_off);
520 		} else {
521 			usefile[NLOC + locindex] = (BFILE *)-1;
522 		}
523 		dsize += v;
524 		a_out_off += v;
525 	}
526 	/*
527 	 *	Assign final values to symbols
528 	 */
529 	hdr.a_bss = dsize;
530 	freezesymtab();		/* this touches hdr.a_bss */
531 	stabfix();
532 	/*
533 	 *	Set up the relocation information "files" to
534 	 *	be zero; outrel takes care of the rest
535 	 */
536 	for (locindex = 0; locindex < NLOC + NLOC; locindex++){
537 		rusefile[locindex] = (struct relbufdesc *)0;
538 	}
539 }
540 
541 build_hdr()
542 {
543 	/*
544 	 *	Except for the text and data relocation sizes,
545 	 *	calculate the final values for the header
546 	 *
547 	 *	Write out the initial copy; we to come
548 	 *	back later and patch up a_trsize and a_drsize,
549 	 *	and overwrite this first version of the header.
550 	 */
551 	hdr.a_magic = MAGIC;
552 	hdr.a_text = tsize;
553 	hdr.a_data = dsize;
554 	hdr.a_bss -= dsize;
555 	hdr.a_syms = sizesymtab();	/* Does not include string pool length */
556 	hdr.a_entry = 0;
557 	hdr.a_trsize = 0;
558 	hdr.a_drsize = 0;
559 
560 	bwrite((char *)&hdr, sizeof(hdr), usefile[0]);
561 }
562 
563 i_pass2()
564 {
565 	if (useVM == 0) {
566 		fclose(tokfile);
567 		tokfile = fopen(tokfilename, "r");
568 		if (tokfile==NULL) {
569 		   yyerror("Bad pass 2 temporary file for reading %s", tokfilename);
570 		   delexit();
571 		}
572 	}
573 	fclose(strfile);
574 	strfile = fopen(strfilename, "r");
575 }
576 
577 pass2()
578 {
579 #ifdef DEBUG
580 	if (debug)
581 		printf("\n\n\n\t\tPASS 2\n\n\n\n");
582 #endif DEBUG
583 	passno = 2;
584 	lineno = 1;
585 	dotp = &usedot[0];
586 	txtfil = usefile[0];	/* already opened (always!) */
587 	relfil = 0;		/* outrel takes care of the rest */
588 	initoutrel();
589 
590 	inittokfile();
591 
592 	yyparse();
593 
594 	closetokfile();
595 }
596 
597 fillsegments()
598 {
599 	int	locindex;
600 	/*
601 	 *	Round text and data segments to FW by appending zeros
602 	 */
603 	for (locindex = 0; locindex < NLOC + NLOC; locindex++) {
604 		if (usefile[locindex]) {
605 			txtfil = usefile[locindex];
606 			dotp = &usedot[locindex];
607 			while (usedot[locindex].e_xvalue & FW)
608 				outb(0);
609 		}
610 	}
611 }
612 
613 reloc_syms()
614 {
615 	u_long	closerelfil();
616 	/*
617 	 *	Move the relocation information to a.out
618 	 *	a_out_off is the offset so far:
619 	 *	exec + text segments + data segments
620 	 */
621 	relocfile = (BFILE *)Calloc(1,sizeof(BFILE));
622 	bopen(relocfile, a_out_off);
623 	a_out_off += closeoutrel(relocfile);
624 
625 	hdr.a_trsize = trsize;
626 	hdr.a_drsize = drsize;
627 	if (readonlydata) {
628 		hdr.a_text += hdr.a_data;
629 		hdr.a_data = 0;
630 		hdr.a_trsize += hdr.a_drsize;
631 		hdr.a_drsize = 0;
632 	}
633 	/*
634 	 *	Output the symbol table and the string pool
635 	 *
636 	 *	We must first rewind the string pool file to its beginning,
637 	 *	in case it was seek'ed into for fetching ascii and asciz
638 	 *	strings.
639 	 */
640 	fseek(strfile, 0, 0);
641 	symwrite(relocfile);
642 }
643 
644 fix_a_out()
645 {
646 	if (lseek(a_out_file->_file, 0L, 0) < 0L)
647 		yyerror("Reposition for header rewrite fails");
648 	if (write(a_out_file->_file, (char *)&hdr, sizeof (struct exec)) < 0)
649 		yyerror("Rewrite of header fails");
650 }
651 
652 delexit()
653 {
654 	delete();
655 	if (passno == 2){
656 		unlink(outfile);
657 	}
658 	exit(1);
659 }
660 
661 delete()
662 {
663 	if (useVM == 0 || tokfilename[0])
664 		unlink(tokfilename);
665 	if (strfilename[0])
666 		unlink(strfilename);
667 }
668 
669 sawabort()
670 {
671 	char	*fillinbuffer();
672 	while (fillinbuffer() != (char *)0)
673 		continue;
674 	delete();
675 	exit(1);	/*although the previous pass will also exit non zero*/
676 }
677 
678 panic(fmt, a1, a2, a3, a4)
679 	char	*fmt;
680 	/*VARARGS 1*/
681 {
682 	yyerror("Assembler panic: bad internal data structure.");
683 	yyerror(fmt, a1, a2, a3, a4);
684 	delete();
685 	abort();
686 }
687