1 /* Copyright (C) 1989, 1997, 1998, 1999 Aladdin Enterprises.  All rights reserved. */
2 
3 /*$Id: ansi2knr.c,v 1.14 1999/04/13 14:44:33 meyering Exp $*/
4 /* Convert ANSI C function definitions to K&R ("traditional C") syntax */
5 
6 /*
7 ansi2knr is distributed in the hope that it will be useful, but WITHOUT ANY
8 WARRANTY.  No author or distributor accepts responsibility to anyone for the
9 consequences of using it or for whether it serves any particular purpose or
10 works at all, unless he says so in writing.  Refer to the GNU General Public
11 License (the "GPL") for full details.
12 
13 Everyone is granted permission to copy, modify and redistribute ansi2knr,
14 but only under the conditions described in the GPL.  A copy of this license
15 is supposed to have been given to you along with ansi2knr so you can know
16 your rights and responsibilities.  It should be in a file named COPYLEFT,
17 or, if there is no file named COPYLEFT, a file named COPYING.  Among other
18 things, the copyright notice and this notice must be preserved on all
19 copies.
20 
21 We explicitly state here what we believe is already implied by the GPL: if
22 the ansi2knr program is distributed as a separate set of sources and a
23 separate executable file which are aggregated on a storage medium together
24 with another program, this in itself does not bring the other program under
25 the GPL, nor does the mere fact that such a program or the procedures for
26 constructing it invoke the ansi2knr executable bring any other part of the
27 program under the GPL.
28 */
29 
30 /*
31  * Usage:
32 	ansi2knr [--filename FILENAME] [INPUT_FILE [OUTPUT_FILE]]
33  * --filename provides the file name for the #line directive in the output,
34  * overriding input_file (if present).
35  * If no input_file is supplied, input is read from stdin.
36  * If no output_file is supplied, output goes to stdout.
37  * There are no error messages.
38  *
39  * ansi2knr recognizes function definitions by seeing a non-keyword
40  * identifier at the left margin, followed by a left parenthesis,
41  * with a right parenthesis as the last character on the line,
42  * and with a left brace as the first token on the following line
43  * (ignoring possible intervening comments), except that a line
44  * consisting of only
45  *	identifier1(identifier2)
46  * will not be considered a function definition unless identifier2 is
47  * the word "void", and a line consisting of
48  *	identifier1(identifier2, <<arbitrary>>)
49  * will not be considered a function definition.
50  * ansi2knr will recognize a multi-line header provided
51  * that no intervening line ends with a left or right brace or a semicolon.
52  * These algorithms ignore whitespace and comments, except that
53  * the function name must be the first thing on the line.
54  * The following constructs will confuse it:
55  *	- Any other construct that starts at the left margin and
56  *	    follows the above syntax (such as a macro or function call).
57  *	- Some macros that tinker with the syntax of function headers.
58  */
59 
60 /*
61  * The original and principal author of ansi2knr is L. Peter Deutsch
62  * <ghost@aladdin.com>.  Other authors are noted in the change history
63  * that follows (in reverse chronological order):
64 	lpd 1999-04-12 added minor fixes from Pavel Roskin
65 		<pavel_roskin@geocities.com> for clean compilation with
66 		gcc -W -Wall
67 	lpd 1999-03-22 added hack to recognize lines consisting of
68 		identifier1(identifier2, xxx) as *not* being procedures
69 	lpd 1999-02-03 made indentation of preprocessor commands consistent
70 	lpd 1999-01-28 fixed two bugs: a '/' in an argument list caused an
71 		endless loop; quoted strings within an argument list
72 		confused the parser
73 	lpd 1999-01-24 added a check for write errors on the output,
74 		suggested by Jim Meyering <meyering@ascend.com>
75 	lpd 1998-11-09 added further hack to recognize identifier(void)
76 		as being a procedure
77 	lpd 1998-10-23 added hack to recognize lines consisting of
78 		identifier1(identifier2) as *not* being procedures
79 	lpd 1997-12-08 made input_file optional; only closes input and/or
80 		output file if not stdin or stdout respectively; prints
81 		usage message on stderr rather than stdout; adds
82 		--filename switch (changes suggested by
83 		<ceder@lysator.liu.se>)
84 	lpd 1996-01-21 added code to cope with not HAVE_CONFIG_H and with
85 		compilers that don't understand void, as suggested by
86 		Tom Lane
87 	lpd 1996-01-15 changed to require that the first non-comment token
88 		on the line following a function header be a left brace,
89 		to reduce sensitivity to macros, as suggested by Tom Lane
90 		<tgl@sss.pgh.pa.us>
91 	lpd 1995-06-22 removed #ifndefs whose sole purpose was to define
92 		undefined preprocessor symbols as 0; changed all #ifdefs
93 		for configuration symbols to #ifs
94 	lpd 1995-04-05 changed copyright notice to make it clear that
95 		including ansi2knr in a program does not bring the entire
96 		program under the GPL
97 	lpd 1994-12-18 added conditionals for systems where ctype macros
98 		don't handle 8-bit characters properly, suggested by
99 		Francois Pinard <pinard@iro.umontreal.ca>;
100 		removed --varargs switch (this is now the default)
101 	lpd 1994-10-10 removed CONFIG_BROKETS conditional
102 	lpd 1994-07-16 added some conditionals to help GNU `configure',
103 		suggested by Francois Pinard <pinard@iro.umontreal.ca>;
104 		properly erase prototype args in function parameters,
105 		contributed by Jim Avera <jima@netcom.com>;
106 		correct error in writeblanks (it shouldn't erase EOLs)
107 	lpd 1989-xx-xx original version
108  */
109 
110 /* Most of the conditionals here are to make ansi2knr work with */
111 /* or without the GNU configure machinery. */
112 
113 #if HAVE_CONFIG_H
114 # include <config.h>
115 #endif
116 
117 #include <stdio.h>
118 #include <ctype.h>
119 
120 #if HAVE_CONFIG_H
121 
122 /*
123    For properly autoconfiguring ansi2knr, use AC_CONFIG_HEADER(config.h).
124    This will define HAVE_CONFIG_H and so, activate the following lines.
125  */
126 
127 # if STDC_HEADERS || HAVE_STRING_H
128 #  include <string.h>
129 # else
130 #  include <strings.h>
131 # endif
132 
133 #else /* not HAVE_CONFIG_H */
134 
135 /* Otherwise do it the hard way */
136 
137 # ifdef BSD
138 #  include <strings.h>
139 # else
140 #  ifdef VMS
141     extern int strlen(), strncmp();
142 #  else
143 #   include <string.h>
144 #  endif
145 # endif
146 
147 #endif /* not HAVE_CONFIG_H */
148 
149 #if STDC_HEADERS
150 # include <stdlib.h>
151 #else
152 /*
153    malloc and free should be declared in stdlib.h,
154    but if you've got a K&R compiler, they probably aren't.
155  */
156 # ifdef MSDOS
157 #  include <malloc.h>
158 # else
159 #  ifdef VMS
160      extern char *malloc();
161      extern void free();
162 #  else
163      extern char *malloc();
164      extern int free();
165 #  endif
166 # endif
167 
168 #endif
169 
170 /* Define NULL (for *very* old compilers). */
171 #ifndef NULL
172 # define NULL (0)
173 #endif
174 
175 /*
176  * The ctype macros don't always handle 8-bit characters correctly.
177  * Compensate for this here.
178  */
179 #ifdef isascii
180 # undef HAVE_ISASCII		/* just in case */
181 # define HAVE_ISASCII 1
182 #else
183 #endif
184 #if STDC_HEADERS || !HAVE_ISASCII
185 # define is_ascii(c) 1
186 #else
187 # define is_ascii(c) isascii(c)
188 #endif
189 
190 #define is_space(c) (is_ascii(c) && isspace(c))
191 #define is_alpha(c) (is_ascii(c) && isalpha(c))
192 #define is_alnum(c) (is_ascii(c) && isalnum(c))
193 
194 /* Scanning macros */
195 #define isidchar(ch) (is_alnum(ch) || (ch) == '_')
196 #define isidfirstchar(ch) (is_alpha(ch) || (ch) == '_')
197 
198 /* Forward references */
199 char *skipspace();
200 char *scanstring();
201 int writeblanks();
202 int test1();
203 int convert1();
204 
205 /* The main program */
206 int
main(argc,argv)207 main(argc, argv)
208     int argc;
209     char *argv[];
210 {	FILE *in = stdin;
211 	FILE *out = stdout;
212 	char *filename = 0;
213 	char *program_name = argv[0];
214 	char *output_name = 0;
215 #define bufsize 5000			/* arbitrary size */
216 	char *buf;
217 	char *line;
218 	char *more;
219 	char *usage =
220 	  "Usage: ansi2knr [--filename FILENAME] [INPUT_FILE [OUTPUT_FILE]]\n";
221 	/*
222 	 * In previous versions, ansi2knr recognized a --varargs switch.
223 	 * If this switch was supplied, ansi2knr would attempt to convert
224 	 * a ... argument to va_alist and va_dcl; if this switch was not
225 	 * supplied, ansi2knr would simply drop any such arguments.
226 	 * Now, ansi2knr always does this conversion, and we only
227 	 * check for this switch for backward compatibility.
228 	 */
229 	int convert_varargs = 1;
230 	int output_error;
231 
232 	while ( argc > 1 && argv[1][0] == '-' ) {
233 	  if ( !strcmp(argv[1], "--varargs") ) {
234 	    convert_varargs = 1;
235 	    argc--;
236 	    argv++;
237 	    continue;
238 	  }
239 	  if ( !strcmp(argv[1], "--filename") && argc > 2 ) {
240 	    filename = argv[2];
241 	    argc -= 2;
242 	    argv += 2;
243 	    continue;
244 	  }
245 	  fprintf(stderr, "%s: Unrecognized switch: %s\n", program_name,
246 		  argv[1]);
247 	  fprintf(stderr, usage);
248 	  exit(1);
249 	}
250 	switch ( argc )
251 	   {
252 	default:
253 		fprintf(stderr, usage);
254 		exit(0);
255 	case 3:
256 		output_name = argv[2];
257 		out = fopen(output_name, "w");
258 		if ( out == NULL ) {
259 		  fprintf(stderr, "%s: Cannot open output file %s\n",
260 			  program_name, output_name);
261 		  exit(1);
262 		}
263 		/* falls through */
264 	case 2:
265 		in = fopen(argv[1], "r");
266 		if ( in == NULL ) {
267 		  fprintf(stderr, "%s: Cannot open input file %s\n",
268 			  program_name, argv[1]);
269 		  exit(1);
270 		}
271 		if ( filename == 0 )
272 		  filename = argv[1];
273 		/* falls through */
274 	case 1:
275 		break;
276 	   }
277 	if ( filename )
278 	  fprintf(out, "#line 1 \"%s\"\n", filename);
279 	buf = malloc(bufsize);
280 	if ( buf == NULL )
281 	   {
282 		fprintf(stderr, "Unable to allocate read buffer!\n");
283 		exit(1);
284 	   }
285 	line = buf;
286 	while ( fgets(line, (unsigned)(buf + bufsize - line), in) != NULL )
287 	   {
288 test:		line += strlen(line);
289 		switch ( test1(buf) )
290 		   {
291 		case 2:			/* a function header */
292 			convert1(buf, out, 1, convert_varargs);
293 			break;
294 		case 1:			/* a function */
295 			/* Check for a { at the start of the next line. */
296 			more = ++line;
297 f:			if ( line >= buf + (bufsize - 1) ) /* overflow check */
298 			  goto wl;
299 			if ( fgets(line, (unsigned)(buf + bufsize - line), in) == NULL )
300 			  goto wl;
301 			switch ( *skipspace(more, 1) )
302 			  {
303 			  case '{':
304 			    /* Definitely a function header. */
305 			    convert1(buf, out, 0, convert_varargs);
306 			    fputs(more, out);
307 			    break;
308 			  case 0:
309 			    /* The next line was blank or a comment: */
310 			    /* keep scanning for a non-comment. */
311 			    line += strlen(line);
312 			    goto f;
313 			  default:
314 			    /* buf isn't a function header, but */
315 			    /* more might be. */
316 			    fputs(buf, out);
317 			    strcpy(buf, more);
318 			    line = buf;
319 			    goto test;
320 			  }
321 			break;
322 		case -1:		/* maybe the start of a function */
323 			if ( line != buf + (bufsize - 1) ) /* overflow check */
324 			  continue;
325 			/* falls through */
326 		default:		/* not a function */
327 wl:			fputs(buf, out);
328 			break;
329 		   }
330 		line = buf;
331 	   }
332 	if ( line != buf )
333 	  fputs(buf, out);
334 	free(buf);
335 	if ( output_name ) {
336 	  output_error = ferror(out);
337 	  output_error |= fclose(out);
338 	} else {		/* out == stdout */
339 	  fflush(out);
340 	  output_error = ferror(out);
341 	}
342 	if ( output_error ) {
343 	  fprintf(stderr, "%s: error writing to %s\n", program_name,
344 		  (output_name ? output_name : "stdout"));
345 	  exit(1);
346 	}
347 	if ( in != stdin )
348 	  fclose(in);
349 	return 0;
350 }
351 
352 /* Skip over whitespace and comments, in either direction. */
353 char *
skipspace(p,dir)354 skipspace(p, dir)
355     register char *p;
356     register int dir;			/* 1 for forward, -1 for backward */
357 {	for ( ; ; )
358 	   {	while ( is_space(*p) )
359 		  p += dir;
360 		if ( !(*p == '/' && p[dir] == '*') )
361 		  break;
362 		p += dir;  p += dir;
363 		while ( !(*p == '*' && p[dir] == '/') )
364 		   {	if ( *p == 0 )
365 			  return p;	/* multi-line comment?? */
366 			p += dir;
367 		   }
368 		p += dir;  p += dir;
369 	   }
370 	return p;
371 }
372 
373 /* Scan over a quoted string, in either direction. */
374 char *
scanstring(p,dir)375 scanstring(p, dir)
376     register char *p;
377     register int dir;
378 {
379     for (p += dir; ; p += dir)
380 	if (*p == '"' && p[-dir] != '\\')
381 	    return p + dir;
382 }
383 
384 /*
385  * Write blanks over part of a string.
386  * Don't overwrite end-of-line characters.
387  */
388 int
writeblanks(start,end)389 writeblanks(start, end)
390     char *start;
391     char *end;
392 {	char *p;
393 	for ( p = start; p < end; p++ )
394 	  if ( *p != '\r' && *p != '\n' )
395 	    *p = ' ';
396 	return 0;
397 }
398 
399 /*
400  * Test whether the string in buf is a function definition.
401  * The string may contain and/or end with a newline.
402  * Return as follows:
403  *	0 - definitely not a function definition;
404  *	1 - definitely a function definition;
405  *	2 - definitely a function prototype (NOT USED);
406  *	-1 - may be the beginning of a function definition,
407  *		append another line and look again.
408  * The reason we don't attempt to convert function prototypes is that
409  * Ghostscript's declaration-generating macros look too much like
410  * prototypes, and confuse the algorithms.
411  */
412 int
test1(buf)413 test1(buf)
414     char *buf;
415 {	register char *p = buf;
416 	char *bend;
417 	char *endfn;
418 	int contin;
419 
420 	if ( !isidfirstchar(*p) )
421 	  return 0;		/* no name at left margin */
422 	bend = skipspace(buf + strlen(buf) - 1, -1);
423 	switch ( *bend )
424 	   {
425 	   case ';': contin = 0 /*2*/; break;
426 	   case ')': contin = 1; break;
427 	   case '{': return 0;		/* not a function */
428 	   case '}': return 0;		/* not a function */
429 	   default: contin = -1;
430 	   }
431 	while ( isidchar(*p) )
432 	  p++;
433 	endfn = p;
434 	p = skipspace(p, 1);
435 	if ( *p++ != '(' )
436 	  return 0;		/* not a function */
437 	p = skipspace(p, 1);
438 	if ( *p == ')' )
439 	  return 0;		/* no parameters */
440 	/* Check that the apparent function name isn't a keyword. */
441 	/* We only need to check for keywords that could be followed */
442 	/* by a left parenthesis (which, unfortunately, is most of them). */
443 	   {	static char *words[] =
444 		   {	"asm", "auto", "case", "char", "const", "double",
445 			"extern", "float", "for", "if", "int", "long",
446 			"register", "return", "short", "signed", "sizeof",
447 			"static", "switch", "typedef", "unsigned",
448 			"void", "volatile", "while", 0
449 		   };
450 		char **key = words;
451 		char *kp;
452 		unsigned len = endfn - buf;
453 
454 		while ( (kp = *key) != 0 )
455 		   {	if ( strlen(kp) == len && !strncmp(kp, buf, len) )
456 			  return 0;	/* name is a keyword */
457 			key++;
458 		   }
459 	   }
460 	   {
461 	       char *id = p;
462 	       int len;
463 	       /*
464 		* Check for identifier1(identifier2) and not
465 		* identifier1(void), or identifier1(identifier2, xxxx).
466 		*/
467 
468 	       while ( isidchar(*p) )
469 		   p++;
470 	       len = p - id;
471 	       p = skipspace(p, 1);
472 	       if (*p == ',' ||
473 		   (*p == ')' && (len != 4 || strncmp(id, "void", 4)))
474 		   )
475 		   return 0;	/* not a function */
476 	   }
477 	/*
478 	 * If the last significant character was a ), we need to count
479 	 * parentheses, because it might be part of a formal parameter
480 	 * that is a procedure.
481 	 */
482 	if (contin > 0) {
483 	    int level = 0;
484 
485 	    for (p = skipspace(buf, 1); *p; p = skipspace(p + 1, 1))
486 		level += (*p == '(' ? 1 : *p == ')' ? -1 : 0);
487 	    if (level > 0)
488 		contin = -1;
489 	}
490 	return contin;
491 }
492 
493 /* Convert a recognized function definition or header to K&R syntax. */
494 int
convert1(buf,out,header,convert_varargs)495 convert1(buf, out, header, convert_varargs)
496     char *buf;
497     FILE *out;
498     int header;			/* Boolean */
499     int convert_varargs;	/* Boolean */
500 {	char *endfn;
501 	register char *p;
502 	/*
503 	 * The breaks table contains pointers to the beginning and end
504 	 * of each argument.
505 	 */
506 	char **breaks;
507 	unsigned num_breaks = 2;	/* for testing */
508 	char **btop;
509 	char **bp;
510 	char **ap;
511 	char *vararg = 0;
512 
513 	/* Pre-ANSI implementations don't agree on whether strchr */
514 	/* is called strchr or index, so we open-code it here. */
515 	for ( endfn = buf; *(endfn++) != '('; )
516 	  ;
517 top:	p = endfn;
518 	breaks = (char **)malloc(sizeof(char *) * num_breaks * 2);
519 	if ( breaks == NULL )
520 	   {	/* Couldn't allocate break table, give up */
521 		fprintf(stderr, "Unable to allocate break table!\n");
522 		fputs(buf, out);
523 		return -1;
524 	   }
525 	btop = breaks + num_breaks * 2 - 2;
526 	bp = breaks;
527 	/* Parse the argument list */
528 	do
529 	   {	int level = 0;
530 		char *lp = NULL;
531 		char *rp = NULL;
532 		char *end = NULL;
533 
534 		if ( bp >= btop )
535 		   {	/* Filled up break table. */
536 			/* Allocate a bigger one and start over. */
537 			free((char *)breaks);
538 			num_breaks <<= 1;
539 			goto top;
540 		   }
541 		*bp++ = p;
542 		/* Find the end of the argument */
543 		for ( ; end == NULL; p++ )
544 		   {	switch(*p)
545 			   {
546 			   case ',':
547 				if ( !level ) end = p;
548 				break;
549 			   case '(':
550 				if ( !level ) lp = p;
551 				level++;
552 				break;
553 			   case ')':
554 				if ( --level < 0 ) end = p;
555 				else rp = p;
556 				break;
557 			   case '/':
558 				if (p[1] == '*')
559 				    p = skipspace(p, 1) - 1;
560 				break;
561 			   case '"':
562 			       p = scanstring(p, 1) - 1;
563 			       break;
564 			   default:
565 				;
566 			   }
567 		   }
568 		/* Erase any embedded prototype parameters. */
569 		if ( lp && rp )
570 		  writeblanks(lp + 1, rp);
571 		p--;			/* back up over terminator */
572 		/* Find the name being declared. */
573 		/* This is complicated because of procedure and */
574 		/* array modifiers. */
575 		for ( ; ; )
576 		   {	p = skipspace(p - 1, -1);
577 			switch ( *p )
578 			   {
579 			   case ']':	/* skip array dimension(s) */
580 			   case ')':	/* skip procedure args OR name */
581 			   {	int level = 1;
582 				while ( level )
583 				 switch ( *--p )
584 				   {
585 				   case ']': case ')':
586 				       level++;
587 				       break;
588 				   case '[': case '(':
589 				       level--;
590 				       break;
591 				   case '/':
592 				       if (p > buf && p[-1] == '*')
593 					   p = skipspace(p, -1) + 1;
594 				       break;
595 				   case '"':
596 				       p = scanstring(p, -1) + 1;
597 				       break;
598 				   default: ;
599 				   }
600 			   }
601 				if ( *p == '(' && *skipspace(p + 1, 1) == '*' )
602 				   {	/* We found the name being declared */
603 					while ( !isidfirstchar(*p) )
604 					  p = skipspace(p, 1) + 1;
605 					goto found;
606 				   }
607 				break;
608 			   default:
609 				goto found;
610 			   }
611 		   }
612 found:		if ( *p == '.' && p[-1] == '.' && p[-2] == '.' )
613 		  {	if ( convert_varargs )
614 			  {	*bp++ = "va_alist";
615 				vararg = p-2;
616 			  }
617 			else
618 			  {	p++;
619 				if ( bp == breaks + 1 )	/* sole argument */
620 				  writeblanks(breaks[0], p);
621 				else
622 				  writeblanks(bp[-1] - 1, p);
623 				bp--;
624 			  }
625 		   }
626 		else
627 		   {	while ( isidchar(*p) ) p--;
628 			*bp++ = p+1;
629 		   }
630 		p = end;
631 	   }
632 	while ( *p++ == ',' );
633 	*bp = p;
634 	/* Make a special check for 'void' arglist */
635 	if ( bp == breaks+2 )
636 	   {	p = skipspace(breaks[0], 1);
637 		if ( !strncmp(p, "void", 4) )
638 		   {	p = skipspace(p+4, 1);
639 			if ( p == breaks[2] - 1 )
640 			   {	bp = breaks;	/* yup, pretend arglist is empty */
641 				writeblanks(breaks[0], p + 1);
642 			   }
643 		   }
644 	   }
645 	/* Put out the function name and left parenthesis. */
646 	p = buf;
647 	while ( p != endfn ) putc(*p, out), p++;
648 	/* Put out the declaration. */
649 	if ( header )
650 	  {	fputs(");", out);
651 		for ( p = breaks[0]; *p; p++ )
652 		  if ( *p == '\r' || *p == '\n' )
653 		    putc(*p, out);
654 	  }
655 	else
656 	  {	for ( ap = breaks+1; ap < bp; ap += 2 )
657 		  {	p = *ap;
658 			while ( isidchar(*p) )
659 			  putc(*p, out), p++;
660 			if ( ap < bp - 1 )
661 			  fputs(", ", out);
662 		  }
663 		fputs(")  ", out);
664 		/* Put out the argument declarations */
665 		for ( ap = breaks+2; ap <= bp; ap += 2 )
666 		  (*ap)[-1] = ';';
667 		if ( vararg != 0 )
668 		  {	*vararg = 0;
669 			fputs(breaks[0], out);		/* any prior args */
670 			fputs("va_dcl", out);		/* the final arg */
671 			fputs(bp[0], out);
672 		  }
673 		else
674 		  fputs(breaks[0], out);
675 	  }
676 	free((char *)breaks);
677 	return 0;
678 }
679