1 /* aslex.c */
2
3 /*
4 * Copyright (C) 1989-2010 Alan R. Baldwin
5 *
6 * This program is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program. If not, see <http://www.gnu.org/licenses/>.
18 *
19 *
20 * Alan R. Baldwin
21 * 721 Berkeley St.
22 * Kent, Ohio 44240
23 *
24 */
25
26 /*
27 * 28-Oct-97 JLH bug in getst(): sign extend on ~(SPACE|ILL)
28 * causes infinite loop
29 */
30
31 /*
32 * Extensions: P. Felber, M. Hope
33 */
34
35 #include "dbuf_string.h"
36 #include "asxxxx.h"
37
38 /*)Module aslex.c
39 *
40 * The module aslex.c includes the general lexical
41 * analysis routines for the assembler.
42 *
43 * aslex.c contains the following functions:
44 * int comma()
45 * char endline()
46 * int get()
47 * int getdstr()
48 * int getdlm()
49 * VOID getid()
50 * int getmap()
51 * int getnb()
52 * int getlnm()
53 * VOID getst()
54 * int more()
55 * int nxtline()
56 * VOID unget()
57 *
58 * aslex.c contains no local/static variables
59 */
60
61 /*)Function VOID getid(id,c)
62 *
63 * char * id a pointer to a string of
64 * maximum length NCPS-1
65 * int c mode flag
66 * >=0 this is first character to
67 * copy to the string buffer
68 * <0 skip white space, first
69 * character must be a LETTER
70 *
71 * The function getid() scans the current assembler-source text line
72 * from the current position copying the next LETTER | DIGIT string
73 * into the external string buffer id[]. The string ends when a non
74 * LETTER or DIGIT character is found. The maximum number of characters
75 * copied is NCPS-1. If the input string is larger than NCPS-1
76 * characters then the string is truncated. The string is always
77 * NULL terminated. If the mode argument (c) is >=0 then (c) is
78 * the first character copied to the string buffer, if (c) is <0
79 * then intervening white space (SPACES and TABS) are skipped and
80 * the first character found must be a LETTER else a 'q' error
81 * terminates the parse of this assembler-source text line.
82 *
83 * local variables:
84 * char * p pointer to external string buffer
85 * int c current character value
86 *
87 * global variables:
88 * char ctype[] a character array which defines the
89 * type of character being processed.
90 * This index is the character
91 * being processed.
92 *
93 * called functions:
94 * int get() aslex.c
95 * int getnb() aslex.c
96 * VOID unget() aslex.c
97 * VOID qerr() assubr.c
98 *
99 * side effects:
100 * Use of getnb(), get(), and unget() updates the
101 * global pointer ip, the position in the current
102 * assembler-source text line.
103 */
104
105 VOID
getid(char * id,int c)106 getid(char *id, int c)
107 {
108 char *p;
109
110 if (c < 0) {
111 c = getnb();
112 if ((ctype[c] & LETTER) == 0)
113 qerr();
114 }
115 p = id;
116 do {
117 if (p < &id[NCPS-1])
118 *p++ = c;
119 } while (ctype[c=get()] & (LETTER|DIGIT));
120 unget(c);
121 *p++ = 0;
122 }
123
124 /*)Function VOID getst(id,c)
125 *
126 * char * id a pointer to a string of
127 * maximum length NCPS-1
128 * int c mode flag
129 * >=0 this is first character to
130 * copy to the string buffer
131 * <0 skip white space, first
132 * character must be a LETTER
133 *
134 * The function getst() scans the current assembler-source text line
135 * from the current position copying the next character string into
136 * the external string buffer (id). The string ends when a SPACE or
137 * ILL character is found. The maximum number of characters copied is
138 * NCPS-1. If the input string is larger than NCPS-1 characters then
139 * the string is truncated. The string is always NULL terminated.
140 * If the mode argument (c) is >=0 then (c) is the first character
141 * copied to the string buffer, if (c) is <0 then intervening white
142 * space (SPACES and TABS) are skipped and the first character found
143 * must be a LETTER else a 'q' error terminates the parse of this
144 * assembler-source text line.
145 *
146 * local variables:
147 * char * p pointer to external string buffer
148 * int c current character value
149 *
150 * global variables:
151 * char ctype[] a character array which defines the
152 * type of character being processed.
153 * This index is the character
154 * being processed.
155 *
156 * called functions:
157 * int get() aslex.c
158 * int getnb() aslex.c
159 * VOID unget() aslex.c
160 * VOID qerr() assubr.c
161 *
162 * side effects:
163 * use of getnb(), get(), and unget() updates the
164 * global pointer ip, the position in the current
165 * assembler-source text line.
166 */
167
168 VOID
getst(char * id,int c)169 getst(char *id, int c)
170 {
171 char *p;
172
173 if (c < 0) {
174 c = getnb();
175 if ((ctype[c] & LETTER) == 0)
176 qerr();
177 }
178 p = id;
179 do {
180 if (p < &id[NCPS-1])
181 *p++ = c;
182 } while (ctype[c=get()] & ~(SPACE|ILL) & 0xFF);
183 unget(c);
184 *p++ = 0;
185 }
186
187 /*)Function int getdstr(str, slen)
188 *
189 * char * str character array to return string in
190 * int slen charater array length
191 *
192 * The function getdstr() returns the character string
193 * within delimiters. If no delimiting character
194 * is found a 'q' error is generated.
195 *
196 * local variables:
197 * int c current character from
198 * assembler-source text line
199 * int d the delimiting character
200 *
201 * global variables:
202 * none
203 *
204 * called functions:
205 * int get() aslex.c
206 * int getdlm() aslex.c
207 * VOID qerr() assubr.c
208 *
209 * side effects:
210 * Returns the character string delimited by the
211 * character returned from getdlm(). SPACEs and
212 * TABs before the delimited string are skipped.
213 * A 'q' error is generated if no delimited string
214 * is found or the input line terminates unexpectedly.
215 */
216
217 VOID
getdstr(str,slen)218 getdstr(str, slen)
219 char * str;
220 int slen;
221 {
222 char *p;
223 int c, d;
224
225 d = getdlm();
226
227 p = str;
228 while ((c = get()) != d) {
229 if (c == '\0') {
230 qerr();
231 }
232 if (p < &str[slen-1]) {
233 *p++ = c;
234 } else {
235 break;
236 }
237 }
238 *p = 0;
239 }
240
241 /*)Function int getdlm()
242 *
243 * The function getdlm() returns the delimiter character
244 * or if the end of the line is encountered a 'q' error
245 * is generated.
246 *
247 * local variables:
248 * int c current character from
249 * assembler-source text line
250 *
251 * global variables:
252 * none
253 *
254 * called functions:
255 * int get() aslex.c
256 * int getnb() aslex.c
257 * int more() aslex.c
258 * VOID qerr() assubr.c
259 *
260 * side effects:
261 * scans ip to the first non 'SPACE' or 'TAB' character
262 * and returns that character or the first character
263 * following a ^ character as the delimiting character.
264 * The end of the text line or the begining of a
265 * comment returns causes a 'q' error.
266 */
267
268 int
getdlm()269 getdlm()
270 {
271 int c;
272
273 if (more()) {
274 if ((c = getnb()) == '^') {
275 c = get();
276 }
277 } else {
278 c = '\0';
279 }
280 if (c == '\0') {
281 qerr();
282 }
283 return (c);
284 }
285
286 /*)Function int getnb()
287 *
288 * The function getnb() scans the current assembler-source
289 * text line returning the first character not a SPACE or TAB.
290 *
291 * local variables:
292 * int c current character from
293 * assembler-source text line
294 *
295 * global variables:
296 * none
297 *
298 * called functions:
299 * int get() aslex.c
300 *
301 * side effects:
302 * use of get() updates the global pointer ip, the position
303 * in the current assembler-source text line
304 */
305
306 int
getnb(void)307 getnb(void)
308 {
309 int c;
310
311 while ((c=get()) == ' ' || c == '\t')
312 ;
313 return (c);
314 }
315
316 /*)Function int get()
317 *
318 * The function get() returns the next character in the
319 * assembler-source text line, at the end of the line a
320 * NULL character is returned.
321 *
322 * local variables:
323 * int c current character from
324 * assembler-source text line
325 *
326 * global variables:
327 * char * ip pointer into the current
328 * assembler-source text line
329 *
330 * called functions:
331 * none
332 *
333 * side effects:
334 * updates ip to the next character position in the
335 * assembler-source text line. If ip is at the end of the
336 * line, ip is not updated.
337 */
338
339 int
get(void)340 get(void)
341 {
342 int c;
343
344 if ((c = *ip) != 0)
345 ++ip;
346 return (c & 0x00FF);
347 }
348
349 /*)Function VOID unget(c)
350 *
351 * int c value of last character read from
352 * assembler-source text line
353 *
354 * If (c) is not a NULL character then the global pointer ip
355 * is updated to point to the preceeding character in the
356 * assembler-source text line.
357 *
358 * NOTE: This function does not push the character (c)
359 * back into the assembler-source text line, only
360 * the pointer ip is changed.
361 *
362 * local variables:
363 * int c last character read from
364 * assembler-source text line
365 *
366 * global variables:
367 * char * ip position into the current
368 * assembler-source text line
369 *
370 * called functions:
371 * none
372 *
373 * side effects:
374 * ip decremented by 1 character position
375 */
376
377 VOID
unget(int c)378 unget(int c)
379 {
380 if (c)
381 if (ip != ib)
382 --ip;
383 }
384
385 /*)Function int getmap(d)
386 *
387 * int d value to compare with the
388 * assembler-source text line character
389 *
390 * The function getmap() converts the 'C' style characters \b, \f,
391 * \n, \r, and \t to their equivalent ascii values and also
392 * converts 'C' style octal constants '\123' to their equivalent
393 * numeric values. If the first character is equivalent to (d) then
394 * a (-1) is returned, if the end of the line is detected then
395 * a 'q' error terminates the parse for this line, or if the first
396 * character is not a \ then the character value is returned.
397 *
398 * local variables:
399 * int c value of character from the
400 * assembler-source text line
401 * int n looping counter
402 * int v current value of numeric conversion
403 *
404 * global variables:
405 * none
406 *
407 * called functions:
408 * int get() aslex.c
409 * VOID qerr() assubr.c
410 *
411 * side effects:
412 * use of get() updates the global pointer ip the position
413 * in the current assembler-source text line
414 */
415
416 int
getmap(int d)417 getmap(int d)
418 {
419 int c, n, v;
420
421 if ((c=get()) == '\0')
422 qerr();
423 if (c == d)
424 return (-1);
425 if (c == '\\') {
426 c = get();
427 switch (c) {
428
429 case 'b':
430 c = '\b';
431 break;
432
433 case 'f':
434 c = '\f';
435 break;
436
437 case 'n':
438 c = '\n';
439 break;
440
441 case 'r':
442 c = '\r';
443 break;
444
445 case 't':
446 c = '\t';
447 break;
448
449 case '0':
450 case '1':
451 case '2':
452 case '3':
453 case '4':
454 case '5':
455 case '6':
456 case '7':
457 n = 0;
458 v = 0;
459 while (++n<=3 && c>='0' && c<='7') {
460 v = (v<<3) + c - '0';
461 c = get();
462 }
463 unget(c);
464 c = v;
465 break;
466
467 default:
468 unget(c);
469 c = '\\';
470 break;
471 }
472 }
473 return (c);
474 }
475
476 /*)Function int comma(flag)
477 *
478 * int flag when flag is non zero a 'q' error is
479 * generated if a COMMA is not found.
480 *
481 * The function comma() skips SPACEs and TABs and returns
482 * a '1' if the next character is a COMMA else a '0' is
483 * returned. If a COMMA is not found and flag is non zero
484 * then a 'q' error is reported.
485 *
486 * local variables:
487 * int c last character read from
488 * assembler-source text line
489 *
490 * global variables:
491 * none
492 *
493 * called functions:
494 * int getnb() aslex.c
495 * VOID qerr() assubr.c
496 * VOID unget() aslex.c
497 *
498 * side effects:
499 * assembler-source text line pointer updated
500 */
501
502 int
comma(int flag)503 comma(int flag)
504 {
505 int c;
506
507 if ((c = getnb()) != ',') {
508 if (flag) {
509 qerr();
510 } else {
511 unget(c);
512 }
513 return(0);
514 }
515 return(1);
516 }
517
518 /*)Function int nxtline()
519 *
520 * The function nxtline() reads a line of assembler-source text
521 * from an assembly source text file, include file, or macro.
522 * Lines of text are processed from assembler-source files until
523 * all files have been read. If an include file is opened then
524 * lines of text are read from the include file (or nested
525 * include file) until the end of the include file is found.
526 * The input text line is transferred into the global string
527 * ib[] and converted to a NULL terminated string. The string
528 * is then copied into the global string ic[] which is used
529 * for internal processing by the assembler. The function
530 * nxtline() returns a (1) after succesfully reading
531 * a line, or a (0) if all files have been read.
532 *
533 * local variables:
534 * int len string length
535 * struct asmf *asmt temporary pointer to the processing structure
536 *
537 * global variables:
538 * char afn[] afile() constructed filespec
539 * int afp afile constructed path length
540 * asmf * asmc pointer to current assembler file structure
541 * asmf * asmi pointer to a queued include file structure
542 * asmf * asmq pointer to a queued macro structure
543 * char * ib string buffer containing
544 * assembler-source text line for processing
545 * char * ic string buffer containing
546 * assembler-source text line for listing
547 * int asmline source file line number
548 * int incfil current include file count
549 * int incline include file line number
550 * int lnlist LIST-NLIST state
551 * int mcrline macro line number
552 * int srcline current source line number
553 * int uflag -u, disable .list/.nlist processing
554 *
555 * called functions:
556 * int dbuf_init()
557 * int dbuf_set_length()
558 * int dbuf_getline()
559 * const char * dbuf_c_str()
560 * int dbuf_append_str()
561 * int fclose() c-library
562 * char * fgetm() asmcro.c
563 * char * strcpy() c_library
564 *
565 * side effects:
566 * include file will be closed at detection of end of file.
567 * the next sequential source file may be selected.
568 * The current file specification afn[] and the path
569 * length afp may be changed.
570 * The respective line counter will be updated.
571 *
572 * --------------------------------------------------------------
573 *
574 * How the assembler sequences the command line assembler
575 * source files, include files, and macros is shown in a
576 * simplified manner in the following.
577 *
578 * main[asmain] sequences the command line files by creating
579 * a linked list of asmf structures, one for each file.
580 *
581 * asmf structures:
582 * ------------- ------------- -------------
583 * | File 1 | | File 2 | | File N |
584 * ------ | ------| | ------| | ------|
585 * | asmp | -->| | next | --> | | next | --> ... --> | | NULL |
586 * ------ ------------- ------------- -------------
587 *
588 * At the beginning of each assembler pass set asmc = asmp
589 * and process the files in sequence.
590 *
591 * If the source file invokes the .include directive to process a
592 * file then a new asmf structure is prepended to the asmc structure
593 * currently being processed. At the end of the include file the
594 * processing resumes at the point the asmc structure was interrupted.
595 * This is shown in the following:
596 *
597 * -------------
598 * | Incl File 1 |
599 * | ------|
600 * | | next |
601 * -------------
602 * |
603 * asmf structures: |
604 * V
605 * ------------- ------------- -------------
606 * | File 1 | | File 2 | | File N |
607 * ------ | ------| | ------| | ------|
608 * | asmp | -->| | next | --> | | next | --> ... --> | | NULL |
609 * ------ ------------- ------------- -------------
610 *
611 * At the .include point link the asmi structure to asmc
612 * and then set asmc = asmi (the include file asmf structure).
613 *
614 * If a source file invokes a macro then a new asmf structure is
615 * prepended to the asmc structure currently being processed. At the
616 * end of the macro the processing resumes at the point the asmc
617 * structure was interrupted.
618 * This is shown in the following:
619 *
620 * ------------- -------------
621 * | Incl File 1 | | Macro |
622 * | ------| | ------|
623 * | | next | | | next |
624 * ------------- -------------
625 * | |
626 * asmf structures: | |
627 * V V
628 * ------------- ------------- -------------
629 * | File 1 | | File 2 | | File N |
630 * ------ | ------| | ------| | ------|
631 * | asmp | -->| | next | --> | | next | --> ... --> | | NULL |
632 * ------ ------------- ------------- -------------
633 *
634 * At the macro point link the asmq structure to asmc
635 * and then set asmc = asmq (the macro asmf structure).
636 *
637 * Note that both include files and macros can be nested.
638 * Macros may be invoked within include files and include
639 * files can be invoked within macros.
640 *
641 * Include files are opened, read, and closed on each pass
642 * of the assembler.
643 *
644 * Macros are recreated during each pass of the assembler.
645 */
646
647 int
nxtline(void)648 nxtline(void)
649 {
650 static struct dbuf_s dbuf_ib;
651 static struct dbuf_s dbuf_ic;
652 size_t len = 0;
653 struct asmf *asmt;
654
655 if (!dbuf_is_initialized (&dbuf_ib))
656 dbuf_init (&dbuf_ib, 1024);
657 if (!dbuf_is_initialized (&dbuf_ic))
658 dbuf_init (&dbuf_ic, 1024);
659 dbuf_set_length (&dbuf_ib, 0);
660 dbuf_set_length (&dbuf_ic, 0);
661
662 loop: if (asmc == NULL) return(0);
663
664 /*
665 * Insert Include File
666 */
667 if (asmi != NULL) {
668 asmc = asmi;
669 asmi = NULL;
670 incline = 0;
671 }
672 /*
673 * Insert Queued Macro
674 */
675 if (asmq != NULL) {
676 asmc = asmq;
677 asmq = NULL;
678 mcrline = 0;
679 }
680
681 switch(asmc->objtyp) {
682 case T_ASM:
683 if ((len = dbuf_getline (&dbuf_ib, asmc->fp)) == 0) {
684 if ((asmc->flevel != flevel) || (asmc->tlevel != tlevel)) {
685 err('i');
686 fprintf(stderr, "?ASxxxx-Error-<i> at end of assembler file\n");
687 fprintf(stderr, " %s\n", geterr('i'));
688 }
689 flevel = asmc->flevel;
690 tlevel = asmc->tlevel;
691 lnlist = asmc->lnlist;
692 asmc = asmc->next;
693 if (asmc != NULL) {
694 asmline = 0;
695 }
696 if ((lnlist & LIST_PAG) || (uflag == 1)) {
697 lop = NLPP;
698 }
699 goto loop;
700 } else {
701 if (asmline++ == 0) {
702 strcpy(afn, asmc->afn);
703 afp = asmc->afp;
704 }
705 srcline = asmline;
706 }
707 break;
708
709 case T_INCL:
710 if ((len = dbuf_getline (&dbuf_ib, asmc->fp)) == 0) {
711 fclose(asmc->fp);
712 incfil -= 1;
713 if ((asmc->flevel != flevel) || (asmc->tlevel != tlevel)) {
714 err('i');
715 fprintf(stderr, "?ASxxxx-Error-<i> at end of include file\n");
716 fprintf(stderr, " %s\n", geterr('i'));
717 }
718 srcline = asmc->line;
719 flevel = asmc->flevel;
720 tlevel = asmc->tlevel;
721 lnlist = asmc->lnlist;
722 asmc = asmc->next;
723 switch (asmc->objtyp) {
724 default:
725 case T_ASM: asmline = srcline; break;
726 case T_INCL: incline = srcline; break;
727 case T_MACRO: mcrline = srcline; break;
728 }
729 /*
730 * Scan for parent file
731 */
732 asmt = asmc;
733 while (asmt != NULL) {
734 if (asmt->objtyp != T_MACRO) {
735 strcpy(afn, asmt->afn);
736 afp = asmt->afp;
737 break;
738 }
739 asmt = asmt->next;
740 }
741 if ((lnlist & LIST_PAG) || (uflag == 1)) {
742 lop = NLPP;
743 }
744 goto loop;
745 } else {
746 if (incline++ == 0) {
747 strcpy(afn, asmc->afn);
748 afp = asmc->afp;
749 }
750 srcline = incline;
751 }
752 break;
753
754 case T_MACRO:
755 dbuf_append(&dbuf_ib, "\0", dbuf_ib.alloc - 1);
756 ib = (char *)dbuf_c_str (&dbuf_ib);
757 ib = fgetm(ib, dbuf_ib.alloc - 1, asmc->fp);
758 if (ib == NULL) {
759 dbuf_set_length(&dbuf_ib, 0);
760 mcrfil -= 1;
761 srcline = asmc->line;
762 flevel = asmc->flevel;
763 tlevel = asmc->tlevel;
764 lnlist = asmc->lnlist;
765 asmc = asmc->next;
766 switch (asmc->objtyp) {
767 default:
768 case T_ASM: asmline = srcline; break;
769 case T_INCL: incline = srcline; break;
770 case T_MACRO: mcrline = srcline; break;
771 }
772 goto loop;
773 } else {
774 len = strlen(ib);
775 dbuf_set_length(&dbuf_ib, len);
776 if (mcrline++ == 0) {
777 ;
778 }
779 srcline = mcrline;
780 }
781 break;
782
783 default:
784 fprintf(stderr, "?ASxxxx-Internal-nxtline(objtyp)-Error.\n\n");
785 asexit(ER_FATAL);
786 break;
787 }
788 ib = (char *)dbuf_c_str (&dbuf_ib);
789
790 /* remove the trailing NL */
791 if (len > 0 && '\n' == ib[len - 1])
792 {
793 --len;
794 if (len > 0 && '\r' == ib[len - 1])
795 --len;
796 dbuf_set_length (&dbuf_ib, len);
797 ib = (char *)dbuf_c_str (&dbuf_ib);
798 }
799
800 dbuf_append_str (&dbuf_ic, ib);
801 ic = (char *)dbuf_c_str (&dbuf_ic);
802 return(1);
803 }
804
805
806 /*)Function: int getlnm()
807 *
808 * The function getlnm() returns the line number of the
809 * originating assembler or include file.
810 *
811 * local variables:
812 * struct asmf *asmt temporary pointer to the processing structure
813 *
814 * global variables:
815 * struct asmf *asmc pointer to the current input processing structure
816 * int asmline line number in current assembler file
817 * int line line number
818 *
819 * functions called:
820 * none
821 *
822 * side effects:
823 * Sets line to the source file line number.
824 */
825
826 int
getlnm()827 getlnm()
828 {
829 struct asmf *asmt;
830
831 line = srcline;
832 if (asmc->objtyp == T_MACRO) {
833 asmt = asmc->next;
834 while (asmt != NULL) {
835 switch (asmt->objtyp) {
836 case T_ASM: return(line = asmline);
837 case T_INCL: return(line = asmt->line);
838 default: asmt = asmt->next; break;
839 }
840 }
841 }
842 return(line);
843 }
844
845 /*)Function int more()
846 *
847 * The function more() scans the assembler-source text line
848 * skipping white space (SPACES and TABS) and returns a (0)
849 * if the end of the line or a comment delimeter (;) is found,
850 * or a (1) if their are additional characters in the line.
851 *
852 * local variables:
853 * int c next character from the
854 * assembler-source text line
855 *
856 * global variables:
857 * none
858 *
859 * called functions:
860 * int getnb() aslex.c
861 * VOID unget() aslex.c
862 *
863 * side effects:
864 * use of getnb() and unget() updates the global pointer ip
865 * the position in the current assembler-source text line
866 */
867
868 int
more(void)869 more(void)
870 {
871 int c;
872
873 c = getnb();
874 unget(c);
875 return( (c == '\0' || c == ';') ? 0 : 1 );
876 }
877
878 /*)Function char endline()
879 *
880 * The function endline() scans the assembler-source text line
881 * skipping white space (SPACES and TABS) and returns the next
882 * character or a (0) if the end of the line is found or a
883 * comment delimiter (;) is found.
884 *
885 * local variables:
886 * int c next character from the
887 * assembler-source text line
888 *
889 * global variables:
890 * none
891 *
892 * called functions:
893 * int getnb() aslex.c
894 *
895 * side effects:
896 * use of getnb() updates the global pointer ip the
897 * position in the current assembler-source text line
898 */
899
900 char
endline(void)901 endline(void)
902 {
903 int c;
904
905 c = getnb();
906 return( (c == '\0' || c == ';') ? 0 : c );
907 }
908