1 #include "ckcsym.h"
2 char *wartv = "Wart Version 2.15, 18 September 2020 ";
3 
4 #define CKWART_C
5 
6 #ifdef MDEBUG
7 /* Use the real ones in this module only */
8 #ifdef malloc
9 #undef malloc
10 #endif /* malloc */
11 #ifdef calloc
12 #undef calloc
13 #endif /* calloc */
14 #ifdef realloc
15 #undef realloc
16 #endif /* realloc */
17 #ifdef free
18 #undef free
19 #endif /* free */
20 #endif /* MDEBUG */
21 
22 #ifdef MAC
23 #define VOID void
24 #endif /* MAC */
25 
26 /* W A R T */
27 
28 /*
29   A small subset of "lex".
30 
31   Authors: Jeff Damens, Frank da Cruz
32   Columbia University Center for Computing Activites.
33   First released November 1984.
34   Copyright (C) 1984, 2009,
35     Trustees of Columbia University in the City of New York.
36     All rights reserved.  See the C-Kermit COPYING.TXT file or the
37     copyright text in the ckcmai.c module for disclaimer and permissions.
38 */
39 
40 /*
41  * input format is:
42  *  lines to be copied | %state <state names...>
43  *  %%
44  * <state> | <state,state,...> CHAR  { actions }
45  * ...
46  *  %%
47  *  more lines to be copied
48  */
49 
50 #include "ckcdeb.h"			/* Includes */
51 
52 #ifdef STRATUS
53 /* Actually call printf, not our printf-catcher for Kermit */
54 #ifdef printf
55 #undef printf
56 #endif /* printf */
57 #ifdef fprintf
58 #undef fprintf
59 #endif /* fprintf */
60 #endif /* STRATUS */
61 
62 #ifdef MAC
63 /* Same deal for Macintosh */
64 #ifdef printf
65 #undef printf
66 #endif /* printf */
67 #ifdef fprintf
68 #undef fprintf
69 #endif /* fprintf */
70 #endif /* MAC */
71 
72 #ifdef UNIX
73 /* And UNIX */
74 #ifdef printf
75 #undef printf
76 #endif /* printf */
77 #ifdef fprintf
78 #undef fprintf
79 #endif /* fprintf */
80 #endif /* UNIX */
81 /*
82   The following "char" should be changed to "short", "int", or "long" if your
83   wart program will generate more than 127 states.  Since wart is used mainly
84   with C-Kermit, which has about 80 states, "char" is adequate.  This keeps
85   the program about 3K-4K smaller, which can be critical on 16-bit
86   architectures.
87 */
88 #ifdef IRIX60
89 /*
90   Also use short or int if your compiler complains inordinately about
91   "integer conversion resulted in a change of sign"...
92 */
93 #define TBL_TYPE "short"		/* C data type of state table */
94 #else
95 #define TBL_TYPE "char"			/* C data type of state table */
96 #endif /* IRIX60 */
97 
98 #define C_L 014				/* Formfeed */
99 
100 #define SEP 1				/* Token types */
101 #define LBRACK 2
102 #define RBRACK 3
103 #define WORD 4
104 #define COMMA 5
105 
106 /* Storage sizes */
107 
108 #define MAXSTATES 50			/* max number of states */
109 #define MAXWORD 50			/* max # of chars/word */
110 #define SBYTES ((MAXSTATES+6)/8)	/* # of bytes for state bitmask */
111 
112 /* Name of wart function in generated program */
113 
114 #ifndef FNAME
115 #define FNAME "wart"
116 #endif /* FNAME */
117 
118 /* Structure for state information */
119 
120 struct transx {
121     CHAR states[SBYTES];		/* included states */
122     int anyst;				/* true if this good from any state */
123     CHAR inchr;				/* input character */
124     int actno;				/* associated action */
125     struct transx *nxt;
126 };					/* next transition */
127 typedef struct transx *trans;
128 
129 /* Function prototypes */
130 
131 _PROTOTYP( VOID setwstate, (int, trans) );
132 _PROTOTYP( int teststate, (int, trans) );
133 _PROTOTYP( trans rdinput, (FILE *, FILE *) );
134 _PROTOTYP( VOID initial, (FILE *, FILE *) );
135 _PROTOTYP( int isin, (char *, int) );
136 _PROTOTYP( int isword, (int) );
137 _PROTOTYP( VOID rdword, (FILE *, char *) );
138 _PROTOTYP( VOID rdstates, (FILE *, FILE *) );
139 _PROTOTYP( trans newtrans, (void) );
140 _PROTOTYP( trans rdrules, (FILE *, FILE *) );
141 _PROTOTYP( VOID statelist, (FILE *, trans) );
142 _PROTOTYP( VOID copyact, (FILE *, FILE *, int) );
143 _PROTOTYP( int faction, (trans, int, int) );
144 _PROTOTYP( VOID emptytbl, (void) );
145 _PROTOTYP( VOID addaction, (int, int, int) );
146 _PROTOTYP( VOID writetbl, (FILE *) );
147 _PROTOTYP( VOID warray, (FILE *, char *, int [], int, char *) );
148 _PROTOTYP( VOID prolog, (FILE *) );
149 _PROTOTYP( VOID epilogue, (FILE *) );
150 _PROTOTYP( VOID copyrest, (FILE *, FILE *) );
151 _PROTOTYP( int gettoken, (FILE *) );
152 _PROTOTYP( VOID rdcmnt, (FILE *) );
153 _PROTOTYP( VOID clrhash, (void) );
154 _PROTOTYP( int hash, (char *) );
155 _PROTOTYP( VOID enter, (char *, int) );
156 _PROTOTYP( int lkup, (char *) );
157 _PROTOTYP( static char* copy, (char *s) );
158 
159 /* Variables and tables */
160 
161 /* lt 1992-10-08 Begin
162  * provide definition for deblog variable
163  * ckcdeb.h declares as extern. DECC AXP is strict about ref/def model
164  * Variable is unused herein, to the best of my knowledge.
165  */
166 #ifdef VMS
167 int deblog;
168 #endif /* VMS */
169 /* lt 1992-10-08 End
170  */
171 
172 static int lines, nstates, nacts;
173 
174 static char tokval[MAXWORD];
175 
176 static int tbl[MAXSTATES*96];
177 
178 char *tbl_type = TBL_TYPE;
179 
180 char *txt1 = "\n#define BEGIN state =\n\nint state = 0;\n\nint\n";
181 
182 char *fname = FNAME;			/* Generated function name goes here */
183 
184 /* rest of program... */
185 
186 char *txt2 = "()\n\
187 {\n\
188     int c,actno;\n\
189     extern ";
190 
191 /* Data type of state table is inserted here (short or int) */
192 
193 char *txt2a =
194 " tbl[];\n\
195     while (1) {\n\
196 	c = input() - 32;\n\
197 	debug(F000,\"PROTO input\",ckitoa(state),c+32);\n\
198 	if (c < 0 || c > 95) c = 0;\n";
199 
200 char *txt2b = "	if ((actno = tbl[c + state*96]) != -1)\n\
201 	    switch(actno) {\n";
202 
203 /* this program's output goes here, followed by final text... */
204 
205 char *txt3 = "\n	    }\n    }\n}\n\n";
206 
207 
208 /*
209  * turn on the bit associated with the given state
210  *
211  */
212 VOID
setwstate(state,t)213 setwstate(state,t) int state; trans t; {
214     int idx,msk;
215     idx = state/8;			/* byte associated with state */
216     msk = 0x80 >> (state % 8);		/* bit mask for state */
217     t->states[idx] |= msk;
218 }
219 
220 /*
221  * see if the state is involved in the transition
222  *
223  */
224 int
teststate(state,t)225 teststate(state,t) int state; trans t; {
226     int idx,msk;
227     idx = state/8;
228     msk = 0x80 >> (state % 8);
229     return(t->states[idx] & msk);
230 }
231 
232 
233 /*
234  * read input from here...
235  *
236  */
237 
238 trans
rdinput(infp,outfp)239 rdinput(infp,outfp) FILE *infp,*outfp; {
240     trans x;
241     lines = 1;				/* line counter */
242     nstates = 0;			/* no states */
243     nacts = 0;				/* no actions yet */
244     fprintf(outfp,"\n%c* WARNING -- This C source program generated by ",'/');
245     fprintf(outfp,"Wart preprocessor. */\n");
246     fprintf(outfp,"%c* Do not edit this file; edit the Wart-format ",'/');
247     fprintf(outfp,"source file instead, */\n");
248     fprintf(outfp,"%c* and then run it through Wart to produce a new ",'/');
249     fprintf(outfp,"C source file.     */\n\n");
250     fprintf(outfp,"%c* Wart Version Info: */\n",'/');
251     fprintf(outfp,"char *wartv = \"%s\";\n\n",wartv);
252 
253     initial(infp,outfp);		/* read state names, initial defs */
254     prolog(outfp);			/* write out our initial code */
255     x = rdrules(infp,outfp);		/* read rules */
256     epilogue(outfp);			/* write out epilogue code */
257     return(x);
258 }
259 
260 
261 /*
262  * initial - read initial definitions and state names.  Returns
263  * on EOF or %%.
264  *
265  */
266 VOID
initial(infp,outfp)267 initial(infp,outfp) FILE *infp, *outfp; {
268     int c;
269     char wordbuf[MAXWORD];
270     while ((c = getc(infp)) != EOF) {
271 	if (c == '%') {
272 	    rdword(infp,wordbuf);
273 	    if (strcmp(wordbuf,"states") == 0)
274 	      rdstates(infp,outfp);
275 	    else if (strcmp(wordbuf,"%") == 0) return;
276 	    else fprintf(outfp,"%%%s",wordbuf);
277 	}
278 	else putc(c,outfp);
279 	if (c == '\n') lines++;
280     }
281 }
282 
283 /*
284  * boolean function to tell if the given character can be part of
285  * a word.
286  *
287  */
288 int
isin(s,c)289 isin(s,c) char *s; int c; {
290     for (; *s != '\0'; s++)
291       if (*s == (char) c) return(1);
292     return(0);
293 }
294 int
isword(c)295 isword(c) int c; {
296     static char special[] = ".%_-$@";	/* these are allowable */
297     return(isalnum(c) || isin(special,c));
298 }
299 
300 /*
301  * read the next word into the given buffer.
302  *
303  */
304 VOID
rdword(fp,buf)305 rdword(fp,buf) FILE *fp; char *buf; {
306     int len = 0,c;
307     while (isword(c = getc(fp)) && ++len < MAXWORD) *buf++ = (char) c;
308     *buf++ = '\0';			/* tie off word */
309     ungetc(c,fp);			/* put break char back */
310 }
311 
312 /*
313  * read state names, up to a newline.
314  *
315  */
316 VOID
rdstates(fp,ofp)317 rdstates(fp,ofp) FILE *fp,*ofp; {
318     int c;
319     char wordbuf[MAXWORD];
320     while ((c = getc(fp)) != EOF && c != '\n') {
321 	if (isspace(c) || c == C_L) continue;	/* skip whitespace */
322 	ungetc(c,fp);			/* put char back */
323 	rdword(fp,wordbuf);		/* read the whole word */
324 	enter(wordbuf,++nstates);	/* put into symbol tbl */
325 	fprintf(ofp,"#define %s %d\n",wordbuf,nstates);
326     }
327     lines++;
328 }
329 
330 /*
331  * allocate a new, empty transition node
332  *
333  */
334 trans
newtrans()335 newtrans() {
336     trans new;
337     int i;
338     new = (trans) malloc(sizeof (struct transx));
339     for (i=0; i<SBYTES; i++) new->states[i] = 0;
340     new->anyst = 0;
341     new->nxt = NULL;
342     return(new);
343 }
344 
345 
346 /*
347  * read all the rules.
348  *
349  */
350 
351 trans
rdrules(fp,out)352 rdrules(fp,out) FILE *fp,*out; {
353     trans head,cur,prev;
354     int curtok;
355     head = cur = prev = NULL;
356     while ((curtok = gettoken(fp)) != SEP)
357 
358       switch(curtok) {
359 	case LBRACK:
360 	  if (cur == NULL)
361 	    cur = newtrans();
362 	  else
363 	    fatal("duplicate state list");
364 	  statelist(fp,cur);		/* set states */
365 	  continue;			/* prepare to read char */
366 
367 	case WORD:
368 	  if ((int)strlen(tokval) != 1)
369 	    fatal("multiple chars in state");
370 	  if (cur == NULL) {
371 	      cur = newtrans();
372 	      cur->anyst = 1;
373 	  }
374 	  cur->actno = ++nacts;
375 	  cur->inchr = (char) (tokval[0] - 32);
376 	  if (head == NULL)
377 	    head = cur;
378 	  else
379 	    prev->nxt = cur;
380 	  prev = cur;
381 	  cur = NULL;
382 	  copyact(fp,out,nacts);
383 	  break;
384 	default: fatal("bad input format");
385       }
386     return(head);
387 }
388 
389 /*
390  * read a list of (comma-separated) states, set them in the
391  * given transition.
392  *
393  */
394 VOID
statelist(fp,t)395 statelist(fp,t) FILE *fp; trans t; {
396     int curtok,sval;
397     curtok = COMMA;
398     while (curtok != RBRACK) {
399 	if (curtok != COMMA) fatal("missing comma");
400 	if ((curtok = gettoken(fp)) != WORD) fatal("missing state name");
401 	if ((sval = lkup(tokval)) == -1) {
402 	    fprintf(stderr,"state %s undefined\n",tokval);
403 	    fatal("undefined state");
404 	}
405 	setwstate(sval,t);
406 	curtok = gettoken(fp);
407     }
408 }
409 
410 /*
411  * copy an action from the input to the output file
412  *
413  */
414 VOID
copyact(inp,outp,actno)415 copyact(inp,outp,actno) FILE *inp,*outp; int actno; {
416     int c,bcnt;
417     fprintf(outp,"case %d:\n",actno);
418     while (c = getc(inp), (isspace(c) || c == C_L))
419       if (c == '\n') lines++;
420     if (c == '{') {
421 	bcnt = 1;
422 	fputs("    {",outp);
423 	while (bcnt > 0 && (c = getc(inp)) != EOF) {
424 	    if (c == '{') bcnt++;
425 	    else if (c == '}') bcnt--;
426 	    else if (c == '\n') lines++;
427 	    putc(c,outp);
428 	}
429 	if (bcnt > 0) fatal("action doesn't end");
430     } else {
431 	while (c != '\n' && c != EOF) {
432 	    putc(c,outp);
433 	    c = getc(inp);
434 	}
435 	lines++;
436     }
437     fprintf(outp,"\n    break;\n");
438 }
439 
440 /*
441  * find the action associated with a given character and state.
442  * returns -1 if one can't be found.
443  *
444  */
445 int
faction(hd,state,chr)446 faction(hd,state,chr) trans hd; int state,chr; {
447     while (hd != NULL) {
448 	if (hd->anyst || teststate(state,hd))
449 	  if (hd->inchr == ('.' - 32) || hd->inchr == (char) chr)
450 	    return(hd->actno);
451 	hd = hd->nxt;
452     }
453     return(-1);
454 }
455 
456 /*
457  * empty the table...
458  *
459  */
460 VOID
emptytbl()461 emptytbl() {
462     int i;
463     for (i=0; i<nstates*96; i++) tbl[i] = -1;
464 }
465 
466 /*
467  * add the specified action to the output for the given state and chr.
468  *
469  */
470 VOID
addaction(act,state,chr)471 addaction(act,state,chr) int act,state,chr; {
472     tbl[state*96 + chr] = act;
473 }
474 
475 VOID
writetbl(fp)476 writetbl(fp) FILE *fp; {
477     warray(fp,"tbl",tbl,96*(nstates+1),TBL_TYPE);
478 }
479 
480 
481 /*
482  * write an array to the output file, given its name and size.
483  *
484  */
485 VOID
warray(fp,nam,cont,siz,typ)486 warray(fp,nam,cont,siz,typ) FILE *fp; char *nam; int cont[],siz; char *typ; {
487     int i;
488     fprintf(fp,"%s %s[] = {\n",typ,nam);
489     for (i = 0; i < siz - 1; ) {
490 	fprintf(fp," %2d,",cont[i]);
491 	if ((++i % 16) == 0) putc('\n',fp);
492     }
493     fprintf(fp,"%2d\n};\n",cont[siz-1]);
494 }
495 /*
496   There was an #ifdef rat's next here here regarding main's return type.
497   The following should be equivalnt and is much simpler.  OS2 actually
498   means IBM OS/2 or MS Windows, but OS/2 itself is long gone.
499   -fdc, Fri Sep 18 19:42:48 2020
500 */
501 #ifdef OS2
502 void
503 #else
504 int
505 #endif  /* OS2 */
main(argc,argv)506 main(argc,argv) int argc; char **argv; {
507     trans head;
508     int state,c;
509     FILE *infile,*outfile;
510 
511     if (argc > 1) {
512 	if ((infile = fopen(argv[1],"r")) == NULL) {
513 	    fprintf(stderr,"Can't open %s\n",argv[1]);
514 	    fatal("unreadable input file");
515 	}
516     } else infile = stdin;
517 
518     if (argc > 2) {
519 	if ((outfile = fopen(argv[2],"w")) == NULL) {
520 	    fprintf(stderr,"Can't write to %s\n",argv[2]);
521 	    fatal("bad output file");
522 	}
523     } else outfile = stdout;
524 
525     clrhash();				/* empty hash table */
526     head = rdinput(infile,outfile);	/* read input file */
527     emptytbl();				/* empty our tables */
528     for (state = 0; state <= nstates; state++)
529       for (c = 1; c < 96; c++)		/* find actions, */
530 	addaction(faction(head,state,c),state,c); /* add to tbl */
531     writetbl(outfile);
532     copyrest(infile,outfile);
533     printf("%d states, %d actions\n",nstates,nacts);
534     exit(GOOD_EXIT);
535 }
536 
537 
538 /*
539  * fatal error handler
540  *
541  */
542 
543 VOID
fatal(msg)544 fatal(msg) char *msg; {
545     fprintf(stderr,"error in line %d: %s\n",lines,msg);
546     exit(BAD_EXIT);
547 }
548 
549 VOID
prolog(outfp)550 prolog(outfp) FILE *outfp; {
551     int c;
552     while ((c = *txt1++)     != '\0') putc(c,outfp);
553     while ((c = *fname++)    != '\0') putc(c,outfp);
554     while ((c = *txt2++)     != '\0') putc(c,outfp);
555     while ((c = *tbl_type++) != '\0') putc(c,outfp);
556     while ((c = *txt2a++)    != '\0') putc(c,outfp);
557     while ((c = *txt2b++)    != '\0') putc(c,outfp);
558 }
559 
560 VOID
epilogue(outfp)561 epilogue(outfp) FILE *outfp; {
562     int c;
563     while ((c = *txt3++) != '\0') putc(c,outfp);
564 }
565 
566 VOID
copyrest(in,out)567 copyrest(in,out) FILE *in,*out; {
568     int c;
569     while ((c = getc(in)) != EOF) putc(c,out);
570 }
571 
572 /*
573  * gettoken - returns token type of next token, sets tokval
574  * to the string value of the token if appropriate.
575  *
576  */
577 
578 int
gettoken(fp)579 gettoken(fp) FILE *fp; {
580     int c;
581     while (1) {				/* loop if reading comments... */
582 	do {
583 	    c = getc(fp);
584 	    if (c == '\n') lines++;
585 	} while ((isspace(c) || c == C_L)); /* skip whitespace */
586 	switch(c) {
587 	  case EOF:
588 	    return(SEP);
589 	  case '%':
590 	    if ((c = getc(fp)) == '%') return(SEP);
591 	    tokval[0] = '%';
592 	    tokval[1] = (char) c;
593 	    rdword(fp,tokval+2);
594 	    return(WORD);
595 	  case '<':
596 	    return(LBRACK);
597 	  case '>':
598 	    return(RBRACK);
599 	  case ',':
600 	    return(COMMA);
601 	  case '/':
602 	    if ((c = getc(fp)) == '*') {
603 		rdcmnt(fp);		/* skip over the comment */
604 		continue;
605 	    } else {			/* and keep looping */
606 		ungetc(c,fp);		/* put this back into input */
607 		c = '/';		/* put character back, fall thru */
608 	    }
609 
610 	  default:
611 	    if (isword(c)) {
612 		ungetc(c,fp);
613 		rdword(fp,tokval);
614 		return(WORD);
615 	    } else fatal("Invalid character in input");
616 	}
617     }
618 }
619 
620 /*
621  * skip over a comment
622  *
623  */
624 
625 VOID
rdcmnt(fp)626 rdcmnt(fp) FILE *fp; {
627     int c,star,prcnt;
628     prcnt = star = 0;			/* no star seen yet */
629     while (!((c = getc(fp)) == '/' && star)) {
630 	if (c == EOF || (prcnt && c == '%')) fatal("Unterminated comment");
631 	prcnt = (c == '%');
632 	star = (c == '*');
633 	if (c == '\n') lines++;
634     }
635 }
636 
637 /*
638  * symbol table management for wart
639  *
640  * entry points:
641  *   clrhash - empty hash table.
642  *   enter - enter a name into the symbol table
643  *   lkup - find a name's value in the symbol table.
644  *
645  */
646 
647 #define HASHSIZE 101			/* # of entries in hash table */
648 
649 struct sym {
650     char *name;				/* symbol name */
651     int val;				/* value */
652     struct sym *hnxt;			/* next on collision chain */
653 } *htab[HASHSIZE];			/* the hash table */
654 
655 /*
656  * empty the hash table before using it...
657  *
658  */
659 VOID
clrhash()660 clrhash() {
661     int i;
662     for (i=0; i<HASHSIZE; i++) htab[i] = NULL;
663 }
664 
665 /*
666  * compute the value of the hash for a symbol
667  *
668  */
669 int
hash(name)670 hash(name) char *name; {
671     int sum;
672     for (sum = 0; *name != '\0'; name++) sum += (sum + *name);
673     sum %= HASHSIZE;			/* take sum mod hashsize */
674     if (sum < 0) sum += HASHSIZE;	/* disallow negative hash value */
675     return(sum);
676 }
677 
678 /*
679  * make a private copy of a string...
680  *
681  */
682 static char*
copy(s)683 copy(s) char *s; {
684     char *new;
685     new = (char *) malloc((int)strlen(s) + 1);
686     strcpy(new,s);
687     return(new);
688 }
689 
690 /*
691  * enter state name into the hash table
692  *
693  */
694 VOID
enter(name,svalue)695 enter(name,svalue) char *name; int svalue; {
696     int h;
697     struct sym *cur;
698     if (lkup(name) != -1) {
699 	fprintf(stderr,"state \"%s\" appears twice...\n", name);
700 	exit(BAD_EXIT);
701     }
702     h = hash(name);
703     cur = (struct sym *)malloc(sizeof (struct sym));
704     cur->name = copy(name);
705     cur->val = svalue;
706     cur->hnxt = htab[h];
707     htab[h] = cur;
708 }
709 
710 /*
711  * find name in the symbol table, return its value.  Returns -1
712  * if not found.
713  *
714  */
715 int
lkup(name)716 lkup(name) char *name; {
717     struct sym *cur;
718     for (cur = htab[hash(name)]; cur != NULL; cur = cur->hnxt)
719       if (strcmp(cur->name,name) == 0) return(cur->val);
720     return(-1);
721 }
722