1// re2c $INPUT -o $OUTPUT -s
2/* re2c lesson 002_strip_comments, strip_002, (c) M. Boerger 2006 - 2007 */
3/*!ignore:re2c
4
5- complexity
6  . When a comment is preceeded by a new line and followed by whitespace and a
7    new line then we can drop the trailing whitespace and new line.
8  . Additional to what we strip out already what about two consequtive comment
9    blocks? When two comments are only separated by whitespace we want to drop
10    both. In other words when detecting the end of a comment block we need to
11    check whether it is followed by only whitespace and the a new comment in
12    which case we continure ignoring the input. If it is followed only by white
13    space and a new line we strip out the new white space and new line. In any
14    other case we start outputting all that follows.
15    But we cannot simply use the following two rules:
16	  "*" "/" WS* "/" "*" { continue; }
17	  "*" "/" WS* NL      { continue; }
18	The main problem is that WS* can get bigger then our buffer, so we need a
19	new scanner.
20  . Meanwhile our scanner gets a bit more complex and we have to add two more
21    things. First the scanner code now uses a YYMARKER to store backtracking
22    information.
23
24- backtracking information
25  . When the scanner has two rules that can have the same beginning but a
26    different ending then it needs to store the position that identifies the
27    common part. This is called backtracking. As mentioned above re2c expects
28    you to provide compiler define YYMARKER and a pointer variable.
29  . When shifting buffer contents as done in our fill function the marker needs
30    to be corrected, too.
31
32*/
33
34#include <stdlib.h>
35#include <stdio.h>
36#include <string.h>
37
38/*!max:re2c */
39#define	BSIZE	128
40
41#if BSIZE < YYMAXFILL
42# error BSIZE must be greater YYMAXFILL
43#endif
44
45#define	YYCTYPE		unsigned char
46#define	YYCURSOR	s.cur
47#define	YYLIMIT		s.lim
48#define YYMARKER	s.mrk
49#define	YYFILL(n)	{ if ((res = fill(&s, n)) >= 0) break; }
50
51typedef struct Scanner
52{
53	FILE			*fp;
54	unsigned char	*cur, *tok, *lim, *eof, *mrk;
55	unsigned char 	buffer[BSIZE];
56} Scanner;
57
58int fill(Scanner *s, int len)
59{
60	if (!len)
61	{
62		s->cur = s->tok = s->lim = s->mrk = s->buffer;
63		s->eof = 0;
64	}
65	if (!s->eof)
66	{
67		int got, cnt = s->tok - s->buffer;
68
69		if (cnt > 0)
70		{
71			memcpy(s->buffer, s->tok, s->lim - s->tok);
72			s->tok -= cnt;
73			s->cur -= cnt;
74			s->lim -= cnt;
75			s->mrk -= cnt;
76		}
77		cnt = BSIZE - cnt;
78		if ((got = fread(s->lim, 1, cnt, s->fp)) != cnt)
79		{
80			s->eof = &s->lim[got];
81		}
82		s->lim += got;
83	}
84	else if (s->cur + len > s->eof)
85	{
86		return 0; /* not enough input data */
87	}
88	return -1;
89}
90
91void echo(Scanner *s)
92{
93	fwrite(s->tok, 1, s->cur - s->tok, stdout);
94}
95
96int scan(FILE *fp)
97{
98	int  res = 0;
99    Scanner s;
100
101	if (!fp)
102	{
103		return 1; /* no file was opened */
104	}
105
106    s.fp = fp;
107
108	fill(&s, 0);
109
110	for(;;)
111	{
112		s.tok = s.cur;
113/*!re2c
114	re2c:indent:top = 2;
115
116	NL			= "\r"? "\n" ;
117	WS			= [\r\n\t ] ;
118	ANY			= [^] ;
119
120	"/" "/"		{ goto cppcomment; }
121	"/" "*"		{ goto comment; }
122	ANY			{ fputc(*s.tok, stdout); continue; }
123*/
124comment:
125		s.tok = s.cur;
126/*!re2c
127	"*" "/"		{ goto commentws; }
128	ANY			{ goto comment; }
129*/
130commentws:
131		s.tok = s.cur;
132/*!re2c
133	NL			{ echo(&s); continue; }
134	WS			{ goto commentws; }
135	ANY			{ echo(&s); continue; }
136*/
137cppcomment:
138		s.tok = s.cur;
139/*!re2c
140	NL			{ echo(&s); continue; }
141	ANY			{ goto cppcomment; }
142*/
143	}
144
145	if (fp != stdin)
146	{
147		fclose(fp); /* close only if not stdin */
148	}
149	return res; /* return result */
150}
151
152int main(int argc, char **argv)
153{
154	if (argc > 1)
155	{
156		return scan(!strcmp(argv[1], "-") ? stdin : fopen(argv[1], "r"));
157	}
158	else
159	{
160		fprintf(stderr, "%s <expr>\n", argv[0]);
161		return 1;
162	}
163}
164