xref: /original-bsd/old/as.vax/asmain.c (revision c35f7ea3)
1 /*
2  *	Copyright (c) 1982 Regents of the University of California
3  */
4 #ifndef lint
5 static char sccsid[] = "@(#)asmain.c 4.10 04/17/82";
6 #endif not lint
7 
8 #include <stdio.h>
9 #include <ctype.h>
10 #include <signal.h>
11 
12 #include "as.h"
13 #include "assyms.h"
14 #include "asscan.h"
15 #include "asexpr.h"
16 
17 #ifdef UNIX
18 #define	unix_lang_name "VAX/UNIX Assembler V04/17/82 4.10"
19 #endif
20 
21 #ifdef VMS
22 #define vms_lang_name "VAX/VMS C Assembler V1.00"
23 #endif VMS
24 
25 /*
26  *	variables to manage reading the assembly source files
27  */
28 char	*dotsname;	/*the current file name; managed by the parser*/
29 int	lineno;		/*current line number; managed by the parser*/
30 char	**innames;	/*names of the files being assembled*/
31 int	ninfiles;	/*how many interesting files there are*/
32 /*
33  *	Flags settable from the argv process argument list
34  */
35 int	silent = 0;	/*don't complain about any errors*/
36 int	savelabels = 0;	/*write the labels to the a.out file*/
37 int 	d124 = 4;	/*default allocate 4 bytes for unknown pointers*/
38 int	anyerrs = 0;	/*no errors yet*/
39 int	anywarnings=0;	/*no warnings yet*/
40 int	orgwarn = 0;	/*Bad origins*/
41 int	passno = 1;	/* current pass*/
42 int	jxxxJUMP = 0;	/* in jxxxes that branch too far, use jmp instead of brw */
43 int	readonlydata = 0;	/* initialzed data -> text space */
44 
45 int	nGHnumbers = 0;		/* GH numbers used */
46 int	nGHopcodes = 0;		/* GH opcodes used */
47 int	nnewopcodes = 0;	/* new opcodes used */
48 
49 #ifdef DEBUG
50 int 	debug = 0;
51 int	toktrace = 0;
52 #endif
53 
54 int	useVM =		/*put the temp file in virtual memory*/
55 #ifdef VMS
56 	1;		/*VMS has virtual memory (duh)*/
57 #endif VMS
58 #ifdef UNIX
59  	0;
60 #endif
61 
62 char	*endcore;	/*where to get more symbol space*/
63 
64 /*
65  *	Managers of the a.out file.
66  */
67 struct	exec	hdr;
68 #define	MAGIC	0407
69 u_long	tsize;		/* total text size */
70 u_long	dsize;		/* total data size */
71 u_long	datbase;	/* base of the data segment */
72 u_long	trsize;		/* total text relocation size */
73 u_long	drsize;		/* total data relocation size */
74 
75 /*
76  *	Information about the current segment is accumulated in
77  *	usedot; the most important information stored is the
78  *	accumulated size of each of the text and data segments
79  *
80  *	dotp points to the correct usedot expression for the current segment
81  */
82 struct	exp	usedot[NLOC+NLOC];	/* info about all segments */
83 struct	exp	*dotp;			/* data/text location pointer */
84 /*
85  *	The inter pass temporary file is opened and closed by stdio, but
86  *	is written to using direct read/write, as the temporary file
87  *	is composed of buffers exactly BUFSIZ long.
88  */
89 FILE	*tmpfil;			/* interpass communication file */
90 /*
91  *	a.out is created during the second pass.
92  *	It is opened by stdio, but is filled with the parallel
93  *	block I/O library
94  */
95 char	*outfile = "a.out";
96 FILE	*a_out_file;
97 off_t	a_out_off;			/* cumulative offsets for segments */
98 /*
99  *	The logical files containing the assembled data for each of
100  *	the text and data segments are
101  *	managed by the parallel block I/O library.
102  *	a.out is logically opened in many places at once to
103  *	receive the assembled data from the various segments as
104  *	it all trickles in, but is physically opened only once
105  *	to minimize file overhead.
106  */
107 BFILE	*usefile[NLOC+NLOC];		/* text/data files */
108 BFILE	*txtfil;			/* current text/data file */
109 /*
110  *	Relocation information is accumulated seperately for each
111  *	segment.  This is required by the old loader (from BTL),
112  *	but not by the new loader (Bill Joy).
113  *
114  *	However, the size of the relocation information can not be computed
115  *	during or after the 1st pass because the ''absoluteness' of values
116  *	is unknown until all locally declared symbols have been seen.
117  *	Thus, the size of the relocation information is only
118  *	known after the second pass is finished.
119  *	This obviates the use of the block I/O
120  *	library, which requires knowing the exact offsets in a.out.
121  *
122  *	So, we save the relocation information internally (we don't
123  *	go to internal files to minimize overhead).
124  *
125  *	Empirically, we studied 259 files composing the system,
126  *	two compilers and a compiler generator: (all of which have
127  *	fairly large source files)
128  *
129  *	Number of files = 259
130  *		Number of non zero text reloc files: 233
131  *		Number of non zero data reloc files: 53
132  *	Average text relocation = 889
133  *	Average data relocation = 346
134  *	Number of files > BUFSIZ text relocation = 71
135  *	Number of files > BUFSIZ data relocation = 6
136  *
137  *	For compiled C code, there is usually one text segment and two
138  *	data segments; we see that allocating our own buffers and
139  *	doing our internal handling of relocation information will,
140  *	on the average, not use more memory than taken up by the buffers
141  *	allocated for doing file I/O in parallel to a number of file.
142  *
143  *	If we are assembling with the -V option, we
144  *	use the left over token buffers from the 2nd pass,
145  *	otherwise, we create our own.
146  *
147  *	When the 2nd pass is complete, closeoutrel flushes the token
148  *	buffers out to a BFILE.
149  *
150  *	The internals to relbufdesc are known only in assyms.c
151  *
152  *	outrel constructs the relocation information.
153  *	closeoutrel flushes the relocation information to relfil.
154  */
155 struct	relbufdesc	*rusefile[NLOC+NLOC];
156 struct	relbufdesc 	*relfil;	/* un concatnated relocation info */
157 BFILE	*relocfile;			/* concatnated relocation info */
158 /*
159  *	Once the relocation information has been written,
160  *	we can write out the symbol table using the Block I/O
161  *	mechanisms, as we once again know the offsets into
162  *	the a.out file.
163  *
164  *	We use relfil to output the symbol table information.
165  */
166 
167 char	*tmpdirprefix =
168 #ifdef UNIX
169 			"/tmp/";
170 #else VMS
171 			"/usr/tmp/";
172 #endif
173 
174 #define		TMP_SUFFIX	"asXXXXXX"
175 char		tmpn1[TNAMESIZE];
176 
177 int delexit();
178 
179 main(argc, argv)
180 	int	argc;
181 	char 	**argv;
182 {
183 	char	*sbrk();
184 
185 	tmpn1[0] = 0;
186 	endcore = sbrk(0);
187 
188 	argprocess(argc, argv);		/* process argument lists */
189 	if (anyerrs) exit(1);
190 
191 	initialize();
192 	zeroorigins();			/* set origins to zero */
193 	zerolocals();			/* fix local label counters */
194 
195 	i_pass1();			/* open temp files, etc */
196 	pass1();			/* first pass through .s files */
197 	testlocals();			/* check for undefined locals */
198 	if (anyerrs) delexit();
199 
200 	pass1_5();			/* resolve jxxx */
201 	if (anyerrs) delexit();
202 
203 	open_a_out();			/* open a.out */
204 	roundsegments();		/* round segments to FW */
205 	build_hdr();			/* build initial header, and output */
206 
207 	i_pass2();			/* reopen temporary file, etc */
208 	pass2();			/* second pass through the virtual .s */
209 	if (anyerrs) delexit();
210 
211 	fillsegments();			/* fill segments with 0 to FW */
212 	reloc_syms();			/* dump relocation and symbol table */
213 
214 	delete();			/* remove tmp file */
215 	bflush();			/* close off block I/O view of a.out */
216 	fix_a_out();			/* add in text and data reloc counts */
217 
218 	if (anyerrs == 0 && orgwarn)
219 		yyerror("Caution: absolute origins.\n");
220 
221 	if (nGHnumbers)
222 		yywarning("Caution: G or H format floating point numbers");
223 	if (nGHopcodes)
224 		yywarning("Caution: G or H format floating point operators");
225 	if (nnewopcodes)
226 		yywarning("Caution: New Opcodes");
227 	if (nGHnumbers || nGHopcodes || nnewopcodes)
228 		yywarning("These are not defined for all implementations of the VAX architecture.\n");
229 
230 	exit(anyerrs != 0);
231 }	/*end of UNIX main*/
232 
233 argprocess(argc, argv)
234 	int	argc;
235 	char	*argv[];
236 {
237 	register	char	*cp;
238 
239 	ninfiles = 0;
240 	silent = 0;
241 #ifdef DEBUG
242 	debug = 0;
243 #endif
244 	innames = (char **)ClearCalloc(argc+1, sizeof (innames[0]));
245 	dotsname = "<argv error>";
246 	while (argc > 1) {
247 		if (argv[1][0] != '-')
248 			innames[ninfiles++] = argv[1];
249 		else {
250 			cp = argv[1] + 1;
251 			/*
252 			 *	We can throw away single minus signs, so
253 			 *	that make scripts for the PDP 11 assembler work
254 			 *	on this assembler too
255 			 */
256 			while (*cp){
257 				switch(*cp++){
258 				 default:
259 					yyerror("Unknown flag: %c", *--cp);
260 					cp++;
261 					break;
262 				 case 'v':
263 					selfwhat(stdout);
264 					exit(1);
265 				 case 'd':
266 					d124 = *cp++ - '0';
267 					if ( (d124 != 1) && (d124 != 2) &&
268 					     (d124 != 4)){
269 						yyerror("-d[124] only");
270 						exit(1);
271 					}
272 					break;
273 				 case 'o':
274 					if (argc < 3){
275 						yyerror("-o what???");
276 						exit(1);
277 					}
278 					outfile = argv[2];
279 				   bumpone:
280 					argc -= 2;
281 					argv += 2;
282 					goto nextarg;
283 
284 				 case 't':
285 					if (argc < 3){
286 						yyerror("-t what???");
287 						exit(1);
288 					}
289 					tmpdirprefix = argv[2];
290 					goto bumpone;
291 
292 				 case 'V':
293 					useVM = 1;
294 					break;
295 				 case 'W':
296 					silent = 1;
297 					break;
298 				 case 'L':
299 					savelabels = 1;
300 					break;
301 				 case 'J':
302 					jxxxJUMP = 1;
303 					break;
304 #ifdef DEBUG
305 				 case 'D':
306 					debug = 1;
307 					break;
308 				 case 'T':
309 					toktrace = 1;
310 					break;
311 #endif
312 				 case 'R':
313 					readonlydata = 1;
314 					break;
315 				}	/*end of the switch*/
316 			}	/*end of pulling out all arguments*/
317 		}	/*end of a flag argument*/
318 		--argc; ++argv;
319 	   nextarg:;
320 	}
321 	/* innames[ninfiles] = 0; */
322 }
323 /*
324  *	poke through the data space and find all sccs identifiers.
325  *	We assume:
326  *	a) that extern char **environ; is the first thing in the bss
327  *	segment (true, if one is using the new version of cmgt.crt0.c)
328  *	b) that the sccsid's have not been put into text space.
329  */
330 selfwhat(place)
331 	FILE	*place;
332 {
333 	extern	char **environ;
334 	register	char	*ub;
335 	register	char *cp;
336 	register	char	*pat;
337 	char	*sbrk();
338 
339 	for (cp = (char *)&environ, ub = sbrk(0); cp < ub; cp++){
340 		if (cp[0] != '@') continue;
341 		if (cp[1] != '(') continue;
342 		if (cp[2] != '#') continue;
343 		if (cp[3] != ')') continue;
344 		fputc('\t', place);
345 		for (cp += 4; cp < ub; cp++){
346 			if (*cp == 0) break;
347 			if (*cp == '>') break;
348 			if (*cp == '\n') break;
349 			fputc(*cp, place);
350 		}
351 		fputc('\n', place);
352 	}
353 }
354 
355 initialize()
356 {
357 	if (signal(SIGINT, SIG_IGN) != SIG_IGN)
358 		signal(SIGINT, delexit);
359 	/*
360 	 *	Install symbols in the table
361 	 */
362 	symtabinit();
363 	syminstall();
364 	/*
365 	 *	Build the expression parser accelerator token sets
366 	 */
367 	buildtokensets();
368 }
369 
370 zeroorigins()
371 {
372 	register	int	locindex;
373 	/*
374 	 *	Mark usedot: the first NLOC slots are for named text segments,
375 	 *	the next for named data segments.
376 	 */
377 	for (locindex = 0; locindex < NLOC; locindex++){
378 		usedot[locindex].e_xtype = XTEXT;
379 		usedot[NLOC + locindex].e_xtype = XDATA;
380 		usedot[locindex].e_xvalue = 0;
381 		usedot[NLOC + locindex].e_xvalue = 0;
382 	}
383 }
384 
385 zerolocals()
386 {
387 	register	int	i;
388 
389 	for (i = 0; i <= 9; i++) {
390 		lgensym[i] = 1;
391 		genref[i] = 0;
392 	}
393 }
394 
395 i_pass1()
396 {
397 	if (useVM == 0){
398 		strcat(tmpn1, tmpdirprefix);
399 		if (tmpdirprefix[strlen(tmpdirprefix)-1] != '/')
400 			strcat(tmpn1, "/");
401 		(void)strcat(tmpn1, TMP_SUFFIX);
402 		(void)mktemp(tmpn1);
403 		tmpfil = fopen(tmpn1, "w");
404 		if (tmpfil==NULL) {
405 		  yyerror("Bad pass 1 temporary file for writing %s", tmpn1);
406 		  delexit();
407 		}
408 	}
409 
410 	inittmpfile();
411 	initijxxx();
412 }
413 
414 pass1()
415 {
416 	register	int	i;
417 
418 	passno = 1;
419 	dotp = &usedot[0];
420 	txtfil = (BFILE *)0;
421 	relfil = (struct relbufdesc *)0;
422 
423 	if (ninfiles == 0){		/*take the input from stdin directly*/
424 		lineno = 1;
425 		dotsname = "<stdin>";
426 
427 		yyparse();
428 	} else {		/*we have the names tanked*/
429 		for (i = 0; i < ninfiles; i++){
430 			new_dot_s(innames[i]);
431 			if (freopen(innames[i], "r", stdin) == NULL) {
432 				yyerror( "Can't open source file %s\n",
433 					innames[i]);
434 				exit(2);
435 			}
436 			/* stdio is NOT used to read the input characters */
437 			/* we use read directly, into our own buffers */
438 			yyparse();
439 		}
440 	}
441 
442 	closetmpfile();		/*kick out the last buffered intermediate text*/
443 }
444 
445 testlocals()
446 {
447 	register	int	i;
448 	for (i = 0; i <= 9; i++) {
449 		if (genref[i])
450 			yyerror("Reference to undefined local label %df", i);
451 		lgensym[i] = 1;
452 		genref[i] = 0;
453 	}
454 }
455 
456 pass1_5()
457 {
458 	sortsymtab();
459 #ifdef DEBUG
460 	if (debug) dumpsymtab();
461 #endif
462 	jxxxfix();
463 #ifdef DEBUG
464 	if (debug) dumpsymtab();
465 #endif
466 }
467 
468 open_a_out()
469 {
470 	/*
471 	 *	Open up the a.out file now, and get set to build
472 	 *	up offsets into it for all of the various text,data
473 	 *	text relocation and data relocation segments.
474 	 */
475 	a_out_file = fopen(outfile, "w");
476 	if (a_out_file == NULL) {
477 		yyerror("Cannot create %s", outfile);
478 		delexit();
479 	}
480 	biofd = a_out_file->_file;
481 	a_out_off = 0;
482 }
483 
484 roundsegments()
485 {
486 	register	int	locindex;
487 	register	long	v;
488 	/*
489 	 *	round and assign text segment origins
490 	 *	the exec header always goes in usefile[0]
491 	 */
492 	tsize = 0;
493 	for (locindex=0; locindex<NLOC; locindex++) {
494 		v = round(usedot[locindex].e_xvalue, FW);
495 		usedot[locindex].e_xvalue = tsize;
496 		if ((locindex == 0) || (v != 0) ){
497 			usefile[locindex] = (BFILE *)Calloc(1, sizeof(BFILE));
498 			bopen(usefile[locindex], a_out_off);
499 			if (locindex == 0)
500 				a_out_off = sizeof (struct exec);
501 		} else {
502 			usefile[locindex] = (BFILE *)-1;
503 		}
504 		tsize += v;
505 		a_out_off += v;
506 	}
507 	/*
508 	 *		Round and assign data segment origins.
509 	 */
510 	datbase = round(tsize, FW);
511 	for (locindex=0; locindex<NLOC; locindex++) {
512 		v = round(usedot[NLOC+locindex].e_xvalue, FW);
513 		usedot[NLOC+locindex].e_xvalue = datbase + dsize;
514 		if (v != 0){
515 			usefile[NLOC + locindex] = (BFILE *)Calloc(1,sizeof(BFILE));
516 			bopen(usefile[NLOC + locindex], a_out_off);
517 		} else {
518 			usefile[NLOC + locindex] = (BFILE *)-1;
519 		}
520 		dsize += v;
521 		a_out_off += v;
522 	}
523 	/*
524 	 *	Assign final values to symbols
525 	 */
526 	hdr.a_bss = dsize;
527 	freezesymtab();		/* this touches hdr.a_bss */
528 	stabfix();
529 	/*
530 	 *	Set up the relocation information "files" to
531 	 *	be zero; outrel takes care of the rest
532 	 */
533 	for (locindex = 0; locindex < NLOC + NLOC; locindex++){
534 		rusefile[locindex] = (struct relbufdesc *)0;
535 	}
536 }
537 
538 build_hdr()
539 {
540 	/*
541 	 *	Except for the text and data relocation sizes,
542 	 *	calculate the final values for the header
543 	 *
544 	 *	Write out the initial copy; we to come
545 	 *	back later and patch up a_trsize and a_drsize,
546 	 *	and overwrite this first version of the header.
547 	 */
548 	hdr.a_magic = MAGIC;
549 	hdr.a_text = tsize;
550 	hdr.a_data = dsize;
551 	hdr.a_bss -= dsize;
552 	hdr.a_syms = sizesymtab();	/* Does not include string pool length */
553 	hdr.a_entry = 0;
554 	hdr.a_trsize = 0;
555 	hdr.a_drsize = 0;
556 
557 	bwrite((char *)&hdr, sizeof(hdr), usefile[0]);
558 }
559 
560 i_pass2()
561 {
562 	if (useVM == 0) {
563 		fclose(tmpfil);
564 		tmpfil = fopen(tmpn1, "r");
565 		if (tmpfil==NULL) {
566 		   yyerror("Bad pass 2 temporary file for reading %s", tmpn1);
567 		   delexit();
568 		}
569 	}
570 }
571 
572 pass2()
573 {
574 #ifdef DEBUG
575 	if (debug)
576 		printf("\n\n\n\t\tPASS 2\n\n\n\n");
577 #endif DEBUG
578 	passno = 2;
579 	lineno = 1;
580 	dotp = &usedot[0];
581 	txtfil = usefile[0];	/* already opened (always!) */
582 	relfil = 0;		/* outrel takes care of the rest */
583 	initoutrel();
584 
585 	inittmpfile();
586 
587 	yyparse();
588 
589 	closetmpfile();
590 }
591 
592 fillsegments()
593 {
594 	int	locindex;
595 	/*
596 	 *	Round text and data segments to FW by appending zeros
597 	 */
598 	for (locindex = 0; locindex < NLOC + NLOC; locindex++) {
599 		if (usefile[locindex]) {
600 			txtfil = usefile[locindex];
601 			dotp = &usedot[locindex];
602 			while (usedot[locindex].e_xvalue & FW)
603 				outb(0);
604 		}
605 	}
606 }
607 
608 reloc_syms()
609 {
610 	u_long	closerelfil();
611 	/*
612 	 *	Move the relocation information to a.out
613 	 *	a_out_off is the offset so far:
614 	 *	exec + text segments + data segments
615 	 */
616 	relocfile = (BFILE *)Calloc(1,sizeof(BFILE));
617 	bopen(relocfile, a_out_off);
618 	a_out_off += closeoutrel(relocfile);
619 
620 	hdr.a_trsize = trsize;
621 	hdr.a_drsize = drsize;
622 	if (readonlydata) {
623 		hdr.a_text += hdr.a_data;
624 		hdr.a_data = 0;
625 		hdr.a_trsize += hdr.a_drsize;
626 		hdr.a_drsize = 0;
627 	}
628 	/*
629 	 *	Output the symbol table
630 	 *	and if FLEXNAMES is set, the string pool
631 	 */
632 	symwrite(relocfile);
633 }
634 
635 fix_a_out()
636 {
637 	if (lseek(a_out_file->_file, 0L, 0) < 0L)
638 		yyerror("Reposition for header rewrite fails");
639 	if (write(a_out_file->_file, (char *)&hdr, sizeof (struct exec)) < 0)
640 		yyerror("Rewrite of header fails");
641 }
642 
643 delexit()
644 {
645 	delete();
646 	if (passno == 2){
647 		unlink(outfile);
648 	}
649 	exit(1);
650 }
651 
652 delete()
653 {
654 	if (useVM == 0 || tmpn1[0])
655 		unlink(tmpn1);
656 }
657 
658 sawabort()
659 {
660 	char	*fillinbuffer();
661 	while (fillinbuffer() != (char *)0)
662 		continue;
663 	delete();
664 	exit(1);	/*although the previous pass will also exit non zero*/
665 }
666 
667 panic(fmt, a1, a2, a3, a4)
668 	char	*fmt;
669 	/*VARARGS 1*/
670 {
671 	yyerror("Assembler panic: bad internal data structure.");
672 	yyerror(fmt, a1, a2, a3, a4);
673 	delete();
674 	abort();
675 }
676