1 #include "ckcsym.h"
2 char *wartv = "Wart Version 2.15, 18 September 2020 ";
3
4 #define CKWART_C
5
6 #ifdef MDEBUG
7 /* Use the real ones in this module only */
8 #ifdef malloc
9 #undef malloc
10 #endif /* malloc */
11 #ifdef calloc
12 #undef calloc
13 #endif /* calloc */
14 #ifdef realloc
15 #undef realloc
16 #endif /* realloc */
17 #ifdef free
18 #undef free
19 #endif /* free */
20 #endif /* MDEBUG */
21
22 #ifdef MAC
23 #define VOID void
24 #endif /* MAC */
25
26 /* W A R T */
27
28 /*
29 A small subset of "lex".
30
31 Authors: Jeff Damens, Frank da Cruz
32 Columbia University Center for Computing Activites.
33 First released November 1984.
34 Copyright (C) 1984, 2009,
35 Trustees of Columbia University in the City of New York.
36 All rights reserved. See the C-Kermit COPYING.TXT file or the
37 copyright text in the ckcmai.c module for disclaimer and permissions.
38 */
39
40 /*
41 * input format is:
42 * lines to be copied | %state <state names...>
43 * %%
44 * <state> | <state,state,...> CHAR { actions }
45 * ...
46 * %%
47 * more lines to be copied
48 */
49
50 #include "ckcdeb.h" /* Includes */
51
52 #ifdef STRATUS
53 /* Actually call printf, not our printf-catcher for Kermit */
54 #ifdef printf
55 #undef printf
56 #endif /* printf */
57 #ifdef fprintf
58 #undef fprintf
59 #endif /* fprintf */
60 #endif /* STRATUS */
61
62 #ifdef MAC
63 /* Same deal for Macintosh */
64 #ifdef printf
65 #undef printf
66 #endif /* printf */
67 #ifdef fprintf
68 #undef fprintf
69 #endif /* fprintf */
70 #endif /* MAC */
71
72 #ifdef UNIX
73 /* And UNIX */
74 #ifdef printf
75 #undef printf
76 #endif /* printf */
77 #ifdef fprintf
78 #undef fprintf
79 #endif /* fprintf */
80 #endif /* UNIX */
81 /*
82 The following "char" should be changed to "short", "int", or "long" if your
83 wart program will generate more than 127 states. Since wart is used mainly
84 with C-Kermit, which has about 80 states, "char" is adequate. This keeps
85 the program about 3K-4K smaller, which can be critical on 16-bit
86 architectures.
87 */
88 #ifdef IRIX60
89 /*
90 Also use short or int if your compiler complains inordinately about
91 "integer conversion resulted in a change of sign"...
92 */
93 #define TBL_TYPE "short" /* C data type of state table */
94 #else
95 #define TBL_TYPE "char" /* C data type of state table */
96 #endif /* IRIX60 */
97
98 #define C_L 014 /* Formfeed */
99
100 #define SEP 1 /* Token types */
101 #define LBRACK 2
102 #define RBRACK 3
103 #define WORD 4
104 #define COMMA 5
105
106 /* Storage sizes */
107
108 #define MAXSTATES 50 /* max number of states */
109 #define MAXWORD 50 /* max # of chars/word */
110 #define SBYTES ((MAXSTATES+6)/8) /* # of bytes for state bitmask */
111
112 /* Name of wart function in generated program */
113
114 #ifndef FNAME
115 #define FNAME "wart"
116 #endif /* FNAME */
117
118 /* Structure for state information */
119
120 struct transx {
121 CHAR states[SBYTES]; /* included states */
122 int anyst; /* true if this good from any state */
123 CHAR inchr; /* input character */
124 int actno; /* associated action */
125 struct transx *nxt;
126 }; /* next transition */
127 typedef struct transx *trans;
128
129 /* Function prototypes */
130
131 _PROTOTYP( VOID setwstate, (int, trans) );
132 _PROTOTYP( int teststate, (int, trans) );
133 _PROTOTYP( trans rdinput, (FILE *, FILE *) );
134 _PROTOTYP( VOID initial, (FILE *, FILE *) );
135 _PROTOTYP( int isin, (char *, int) );
136 _PROTOTYP( int isword, (int) );
137 _PROTOTYP( VOID rdword, (FILE *, char *) );
138 _PROTOTYP( VOID rdstates, (FILE *, FILE *) );
139 _PROTOTYP( trans newtrans, (void) );
140 _PROTOTYP( trans rdrules, (FILE *, FILE *) );
141 _PROTOTYP( VOID statelist, (FILE *, trans) );
142 _PROTOTYP( VOID copyact, (FILE *, FILE *, int) );
143 _PROTOTYP( int faction, (trans, int, int) );
144 _PROTOTYP( VOID emptytbl, (void) );
145 _PROTOTYP( VOID addaction, (int, int, int) );
146 _PROTOTYP( VOID writetbl, (FILE *) );
147 _PROTOTYP( VOID warray, (FILE *, char *, int [], int, char *) );
148 _PROTOTYP( VOID prolog, (FILE *) );
149 _PROTOTYP( VOID epilogue, (FILE *) );
150 _PROTOTYP( VOID copyrest, (FILE *, FILE *) );
151 _PROTOTYP( int gettoken, (FILE *) );
152 _PROTOTYP( VOID rdcmnt, (FILE *) );
153 _PROTOTYP( VOID clrhash, (void) );
154 _PROTOTYP( int hash, (char *) );
155 _PROTOTYP( VOID enter, (char *, int) );
156 _PROTOTYP( int lkup, (char *) );
157 _PROTOTYP( static char* copy, (char *s) );
158
159 /* Variables and tables */
160
161 /* lt 1992-10-08 Begin
162 * provide definition for deblog variable
163 * ckcdeb.h declares as extern. DECC AXP is strict about ref/def model
164 * Variable is unused herein, to the best of my knowledge.
165 */
166 #ifdef VMS
167 int deblog;
168 #endif /* VMS */
169 /* lt 1992-10-08 End
170 */
171
172 static int lines, nstates, nacts;
173
174 static char tokval[MAXWORD];
175
176 static int tbl[MAXSTATES*96];
177
178 char *tbl_type = TBL_TYPE;
179
180 char *txt1 = "\n#define BEGIN state =\n\nint state = 0;\n\nint\n";
181
182 char *fname = FNAME; /* Generated function name goes here */
183
184 /* rest of program... */
185
186 char *txt2 = "()\n\
187 {\n\
188 int c,actno;\n\
189 extern ";
190
191 /* Data type of state table is inserted here (short or int) */
192
193 char *txt2a =
194 " tbl[];\n\
195 while (1) {\n\
196 c = input() - 32;\n\
197 debug(F000,\"PROTO input\",ckitoa(state),c+32);\n\
198 if (c < 0 || c > 95) c = 0;\n";
199
200 char *txt2b = " if ((actno = tbl[c + state*96]) != -1)\n\
201 switch(actno) {\n";
202
203 /* this program's output goes here, followed by final text... */
204
205 char *txt3 = "\n }\n }\n}\n\n";
206
207
208 /*
209 * turn on the bit associated with the given state
210 *
211 */
212 VOID
setwstate(state,t)213 setwstate(state,t) int state; trans t; {
214 int idx,msk;
215 idx = state/8; /* byte associated with state */
216 msk = 0x80 >> (state % 8); /* bit mask for state */
217 t->states[idx] |= msk;
218 }
219
220 /*
221 * see if the state is involved in the transition
222 *
223 */
224 int
teststate(state,t)225 teststate(state,t) int state; trans t; {
226 int idx,msk;
227 idx = state/8;
228 msk = 0x80 >> (state % 8);
229 return(t->states[idx] & msk);
230 }
231
232
233 /*
234 * read input from here...
235 *
236 */
237
238 trans
rdinput(infp,outfp)239 rdinput(infp,outfp) FILE *infp,*outfp; {
240 trans x;
241 lines = 1; /* line counter */
242 nstates = 0; /* no states */
243 nacts = 0; /* no actions yet */
244 fprintf(outfp,"\n%c* WARNING -- This C source program generated by ",'/');
245 fprintf(outfp,"Wart preprocessor. */\n");
246 fprintf(outfp,"%c* Do not edit this file; edit the Wart-format ",'/');
247 fprintf(outfp,"source file instead, */\n");
248 fprintf(outfp,"%c* and then run it through Wart to produce a new ",'/');
249 fprintf(outfp,"C source file. */\n\n");
250 fprintf(outfp,"%c* Wart Version Info: */\n",'/');
251 fprintf(outfp,"char *wartv = \"%s\";\n\n",wartv);
252
253 initial(infp,outfp); /* read state names, initial defs */
254 prolog(outfp); /* write out our initial code */
255 x = rdrules(infp,outfp); /* read rules */
256 epilogue(outfp); /* write out epilogue code */
257 return(x);
258 }
259
260
261 /*
262 * initial - read initial definitions and state names. Returns
263 * on EOF or %%.
264 *
265 */
266 VOID
initial(infp,outfp)267 initial(infp,outfp) FILE *infp, *outfp; {
268 int c;
269 char wordbuf[MAXWORD];
270 while ((c = getc(infp)) != EOF) {
271 if (c == '%') {
272 rdword(infp,wordbuf);
273 if (strcmp(wordbuf,"states") == 0)
274 rdstates(infp,outfp);
275 else if (strcmp(wordbuf,"%") == 0) return;
276 else fprintf(outfp,"%%%s",wordbuf);
277 }
278 else putc(c,outfp);
279 if (c == '\n') lines++;
280 }
281 }
282
283 /*
284 * boolean function to tell if the given character can be part of
285 * a word.
286 *
287 */
288 int
isin(s,c)289 isin(s,c) char *s; int c; {
290 for (; *s != '\0'; s++)
291 if (*s == (char) c) return(1);
292 return(0);
293 }
294 int
isword(c)295 isword(c) int c; {
296 static char special[] = ".%_-$@"; /* these are allowable */
297 return(isalnum(c) || isin(special,c));
298 }
299
300 /*
301 * read the next word into the given buffer.
302 *
303 */
304 VOID
rdword(fp,buf)305 rdword(fp,buf) FILE *fp; char *buf; {
306 int len = 0,c;
307 while (isword(c = getc(fp)) && ++len < MAXWORD) *buf++ = (char) c;
308 *buf++ = '\0'; /* tie off word */
309 ungetc(c,fp); /* put break char back */
310 }
311
312 /*
313 * read state names, up to a newline.
314 *
315 */
316 VOID
rdstates(fp,ofp)317 rdstates(fp,ofp) FILE *fp,*ofp; {
318 int c;
319 char wordbuf[MAXWORD];
320 while ((c = getc(fp)) != EOF && c != '\n') {
321 if (isspace(c) || c == C_L) continue; /* skip whitespace */
322 ungetc(c,fp); /* put char back */
323 rdword(fp,wordbuf); /* read the whole word */
324 enter(wordbuf,++nstates); /* put into symbol tbl */
325 fprintf(ofp,"#define %s %d\n",wordbuf,nstates);
326 }
327 lines++;
328 }
329
330 /*
331 * allocate a new, empty transition node
332 *
333 */
334 trans
newtrans()335 newtrans() {
336 trans new;
337 int i;
338 new = (trans) malloc(sizeof (struct transx));
339 for (i=0; i<SBYTES; i++) new->states[i] = 0;
340 new->anyst = 0;
341 new->nxt = NULL;
342 return(new);
343 }
344
345
346 /*
347 * read all the rules.
348 *
349 */
350
351 trans
rdrules(fp,out)352 rdrules(fp,out) FILE *fp,*out; {
353 trans head,cur,prev;
354 int curtok;
355 head = cur = prev = NULL;
356 while ((curtok = gettoken(fp)) != SEP)
357
358 switch(curtok) {
359 case LBRACK:
360 if (cur == NULL)
361 cur = newtrans();
362 else
363 fatal("duplicate state list");
364 statelist(fp,cur); /* set states */
365 continue; /* prepare to read char */
366
367 case WORD:
368 if ((int)strlen(tokval) != 1)
369 fatal("multiple chars in state");
370 if (cur == NULL) {
371 cur = newtrans();
372 cur->anyst = 1;
373 }
374 cur->actno = ++nacts;
375 cur->inchr = (char) (tokval[0] - 32);
376 if (head == NULL)
377 head = cur;
378 else
379 prev->nxt = cur;
380 prev = cur;
381 cur = NULL;
382 copyact(fp,out,nacts);
383 break;
384 default: fatal("bad input format");
385 }
386 return(head);
387 }
388
389 /*
390 * read a list of (comma-separated) states, set them in the
391 * given transition.
392 *
393 */
394 VOID
statelist(fp,t)395 statelist(fp,t) FILE *fp; trans t; {
396 int curtok,sval;
397 curtok = COMMA;
398 while (curtok != RBRACK) {
399 if (curtok != COMMA) fatal("missing comma");
400 if ((curtok = gettoken(fp)) != WORD) fatal("missing state name");
401 if ((sval = lkup(tokval)) == -1) {
402 fprintf(stderr,"state %s undefined\n",tokval);
403 fatal("undefined state");
404 }
405 setwstate(sval,t);
406 curtok = gettoken(fp);
407 }
408 }
409
410 /*
411 * copy an action from the input to the output file
412 *
413 */
414 VOID
copyact(inp,outp,actno)415 copyact(inp,outp,actno) FILE *inp,*outp; int actno; {
416 int c,bcnt;
417 fprintf(outp,"case %d:\n",actno);
418 while (c = getc(inp), (isspace(c) || c == C_L))
419 if (c == '\n') lines++;
420 if (c == '{') {
421 bcnt = 1;
422 fputs(" {",outp);
423 while (bcnt > 0 && (c = getc(inp)) != EOF) {
424 if (c == '{') bcnt++;
425 else if (c == '}') bcnt--;
426 else if (c == '\n') lines++;
427 putc(c,outp);
428 }
429 if (bcnt > 0) fatal("action doesn't end");
430 } else {
431 while (c != '\n' && c != EOF) {
432 putc(c,outp);
433 c = getc(inp);
434 }
435 lines++;
436 }
437 fprintf(outp,"\n break;\n");
438 }
439
440 /*
441 * find the action associated with a given character and state.
442 * returns -1 if one can't be found.
443 *
444 */
445 int
faction(hd,state,chr)446 faction(hd,state,chr) trans hd; int state,chr; {
447 while (hd != NULL) {
448 if (hd->anyst || teststate(state,hd))
449 if (hd->inchr == ('.' - 32) || hd->inchr == (char) chr)
450 return(hd->actno);
451 hd = hd->nxt;
452 }
453 return(-1);
454 }
455
456 /*
457 * empty the table...
458 *
459 */
460 VOID
emptytbl()461 emptytbl() {
462 int i;
463 for (i=0; i<nstates*96; i++) tbl[i] = -1;
464 }
465
466 /*
467 * add the specified action to the output for the given state and chr.
468 *
469 */
470 VOID
addaction(act,state,chr)471 addaction(act,state,chr) int act,state,chr; {
472 tbl[state*96 + chr] = act;
473 }
474
475 VOID
writetbl(fp)476 writetbl(fp) FILE *fp; {
477 warray(fp,"tbl",tbl,96*(nstates+1),TBL_TYPE);
478 }
479
480
481 /*
482 * write an array to the output file, given its name and size.
483 *
484 */
485 VOID
warray(fp,nam,cont,siz,typ)486 warray(fp,nam,cont,siz,typ) FILE *fp; char *nam; int cont[],siz; char *typ; {
487 int i;
488 fprintf(fp,"%s %s[] = {\n",typ,nam);
489 for (i = 0; i < siz - 1; ) {
490 fprintf(fp," %2d,",cont[i]);
491 if ((++i % 16) == 0) putc('\n',fp);
492 }
493 fprintf(fp,"%2d\n};\n",cont[siz-1]);
494 }
495 /*
496 There was an #ifdef rat's next here here regarding main's return type.
497 The following should be equivalnt and is much simpler. OS2 actually
498 means IBM OS/2 or MS Windows, but OS/2 itself is long gone.
499 -fdc, Fri Sep 18 19:42:48 2020
500 */
501 #ifdef OS2
502 void
503 #else
504 int
505 #endif /* OS2 */
main(argc,argv)506 main(argc,argv) int argc; char **argv; {
507 trans head;
508 int state,c;
509 FILE *infile,*outfile;
510
511 if (argc > 1) {
512 if ((infile = fopen(argv[1],"r")) == NULL) {
513 fprintf(stderr,"Can't open %s\n",argv[1]);
514 fatal("unreadable input file");
515 }
516 } else infile = stdin;
517
518 if (argc > 2) {
519 if ((outfile = fopen(argv[2],"w")) == NULL) {
520 fprintf(stderr,"Can't write to %s\n",argv[2]);
521 fatal("bad output file");
522 }
523 } else outfile = stdout;
524
525 clrhash(); /* empty hash table */
526 head = rdinput(infile,outfile); /* read input file */
527 emptytbl(); /* empty our tables */
528 for (state = 0; state <= nstates; state++)
529 for (c = 1; c < 96; c++) /* find actions, */
530 addaction(faction(head,state,c),state,c); /* add to tbl */
531 writetbl(outfile);
532 copyrest(infile,outfile);
533 printf("%d states, %d actions\n",nstates,nacts);
534 exit(GOOD_EXIT);
535 }
536
537
538 /*
539 * fatal error handler
540 *
541 */
542
543 VOID
fatal(msg)544 fatal(msg) char *msg; {
545 fprintf(stderr,"error in line %d: %s\n",lines,msg);
546 exit(BAD_EXIT);
547 }
548
549 VOID
prolog(outfp)550 prolog(outfp) FILE *outfp; {
551 int c;
552 while ((c = *txt1++) != '\0') putc(c,outfp);
553 while ((c = *fname++) != '\0') putc(c,outfp);
554 while ((c = *txt2++) != '\0') putc(c,outfp);
555 while ((c = *tbl_type++) != '\0') putc(c,outfp);
556 while ((c = *txt2a++) != '\0') putc(c,outfp);
557 while ((c = *txt2b++) != '\0') putc(c,outfp);
558 }
559
560 VOID
epilogue(outfp)561 epilogue(outfp) FILE *outfp; {
562 int c;
563 while ((c = *txt3++) != '\0') putc(c,outfp);
564 }
565
566 VOID
copyrest(in,out)567 copyrest(in,out) FILE *in,*out; {
568 int c;
569 while ((c = getc(in)) != EOF) putc(c,out);
570 }
571
572 /*
573 * gettoken - returns token type of next token, sets tokval
574 * to the string value of the token if appropriate.
575 *
576 */
577
578 int
gettoken(fp)579 gettoken(fp) FILE *fp; {
580 int c;
581 while (1) { /* loop if reading comments... */
582 do {
583 c = getc(fp);
584 if (c == '\n') lines++;
585 } while ((isspace(c) || c == C_L)); /* skip whitespace */
586 switch(c) {
587 case EOF:
588 return(SEP);
589 case '%':
590 if ((c = getc(fp)) == '%') return(SEP);
591 tokval[0] = '%';
592 tokval[1] = (char) c;
593 rdword(fp,tokval+2);
594 return(WORD);
595 case '<':
596 return(LBRACK);
597 case '>':
598 return(RBRACK);
599 case ',':
600 return(COMMA);
601 case '/':
602 if ((c = getc(fp)) == '*') {
603 rdcmnt(fp); /* skip over the comment */
604 continue;
605 } else { /* and keep looping */
606 ungetc(c,fp); /* put this back into input */
607 c = '/'; /* put character back, fall thru */
608 }
609
610 default:
611 if (isword(c)) {
612 ungetc(c,fp);
613 rdword(fp,tokval);
614 return(WORD);
615 } else fatal("Invalid character in input");
616 }
617 }
618 }
619
620 /*
621 * skip over a comment
622 *
623 */
624
625 VOID
rdcmnt(fp)626 rdcmnt(fp) FILE *fp; {
627 int c,star,prcnt;
628 prcnt = star = 0; /* no star seen yet */
629 while (!((c = getc(fp)) == '/' && star)) {
630 if (c == EOF || (prcnt && c == '%')) fatal("Unterminated comment");
631 prcnt = (c == '%');
632 star = (c == '*');
633 if (c == '\n') lines++;
634 }
635 }
636
637 /*
638 * symbol table management for wart
639 *
640 * entry points:
641 * clrhash - empty hash table.
642 * enter - enter a name into the symbol table
643 * lkup - find a name's value in the symbol table.
644 *
645 */
646
647 #define HASHSIZE 101 /* # of entries in hash table */
648
649 struct sym {
650 char *name; /* symbol name */
651 int val; /* value */
652 struct sym *hnxt; /* next on collision chain */
653 } *htab[HASHSIZE]; /* the hash table */
654
655 /*
656 * empty the hash table before using it...
657 *
658 */
659 VOID
clrhash()660 clrhash() {
661 int i;
662 for (i=0; i<HASHSIZE; i++) htab[i] = NULL;
663 }
664
665 /*
666 * compute the value of the hash for a symbol
667 *
668 */
669 int
hash(name)670 hash(name) char *name; {
671 int sum;
672 for (sum = 0; *name != '\0'; name++) sum += (sum + *name);
673 sum %= HASHSIZE; /* take sum mod hashsize */
674 if (sum < 0) sum += HASHSIZE; /* disallow negative hash value */
675 return(sum);
676 }
677
678 /*
679 * make a private copy of a string...
680 *
681 */
682 static char*
copy(s)683 copy(s) char *s; {
684 char *new;
685 new = (char *) malloc((int)strlen(s) + 1);
686 strcpy(new,s);
687 return(new);
688 }
689
690 /*
691 * enter state name into the hash table
692 *
693 */
694 VOID
enter(name,svalue)695 enter(name,svalue) char *name; int svalue; {
696 int h;
697 struct sym *cur;
698 if (lkup(name) != -1) {
699 fprintf(stderr,"state \"%s\" appears twice...\n", name);
700 exit(BAD_EXIT);
701 }
702 h = hash(name);
703 cur = (struct sym *)malloc(sizeof (struct sym));
704 cur->name = copy(name);
705 cur->val = svalue;
706 cur->hnxt = htab[h];
707 htab[h] = cur;
708 }
709
710 /*
711 * find name in the symbol table, return its value. Returns -1
712 * if not found.
713 *
714 */
715 int
lkup(name)716 lkup(name) char *name; {
717 struct sym *cur;
718 for (cur = htab[hash(name)]; cur != NULL; cur = cur->hnxt)
719 if (strcmp(cur->name,name) == 0) return(cur->val);
720 return(-1);
721 }
722