1// re2c $INPUT -o $OUTPUT -b
2/* re2c lesson 002_strip_comments, strip_003, (c) M. Boerger 2006 - 2007 */
3/*!ignore:re2c
4
5- more complexity
6  . Right now we strip out trailing white space and new lines after a comment
7    block. This can be a problem when the comment block was not preceeded by
8    a new line.
9  . The solution is to use trailing contexts.
10
11-  trailing contexts
12  . Re2c allows to check for a portion of input and only recognize it when it
13    is followed by another portion. This is called a trailing context.
14  . The trailing context is not part of the identified input. That means that
15    it follows exactly at the cursor. A consequence is that the scanner has
16    already read more input and on the next run you need to restore begining
17    of input, in our case s.tok, from the cursor, here s.cur, rather then
18    restoring to the beginning of the buffer. This way the scanner can reuse
19    the portion it has already read.
20  . The position of the trailing context is stored in YYCTXMARKER for which
21    a pointer variable needs to be provided.
22  . As with YYMARKER the corrsponding variable needs to be corrected if we
23    shift in some buffer.
24  . Still this is not all we need to solve the problem. What is left is that
25    the information whether we detected a trailing context was detected has to
26    be stored somewhere. This is done by the new variable nlcomment.
27
28- formatting
29  . Until now we only used single line expression code and we always had the
30    opening { on the same line as the rule itself. If we have multiline rule
31    code and care for formatting we can no longer rely on re2c. Now we have
32    to indent the rule code ourself. Also we need to take care of the opening
33    {. If we keep it on the same line as the rule then re2c will indent it
34    correctly and the emitted #line informations will be correct. If we place
35    it on the next line then the #line directive will also point to that line
36    and not to the rule.
37*/
38
39#include <stdlib.h>
40#include <stdio.h>
41#include <string.h>
42
43/*!max:re2c */
44#define	BSIZE	128
45
46#if BSIZE < YYMAXFILL
47# error BSIZE must be greater YYMAXFILL
48#endif
49
50#define	YYCTYPE		unsigned char
51#define	YYCURSOR	s.cur
52#define	YYLIMIT		s.lim
53#define YYMARKER	s.mrk
54#define YYCTXMARKER s.ctx
55#define	YYFILL(n)	{ if ((res = fill(&s, n)) >= 0) break; }
56
57typedef struct Scanner
58{
59	FILE			*fp;
60	unsigned char	*cur, *tok, *lim, *eof, *ctx, *mrk;
61	unsigned char 	buffer[BSIZE];
62} Scanner;
63
64int fill(Scanner *s, int len)
65{
66	if (!len)
67	{
68		s->cur = s->tok = s->lim = s->mrk = s->buffer;
69		s->eof = 0;
70	}
71	if (!s->eof)
72	{
73		int got, cnt = s->tok - s->buffer;
74
75		if (cnt > 0)
76		{
77			memcpy(s->buffer, s->tok, s->lim - s->tok);
78			s->tok -= cnt;
79			s->cur -= cnt;
80			s->lim -= cnt;
81			s->mrk -= cnt;
82			s->ctx -= cnt;
83		}
84		cnt = BSIZE - cnt;
85		if ((got = fread(s->lim, 1, cnt, s->fp)) != cnt)
86		{
87			s->eof = &s->lim[got];
88		}
89		s->lim += got;
90	}
91	else if (s->cur + len > s->eof)
92	{
93		return 0; /* not enough input data */
94	}
95	return -1;
96}
97
98void echo(Scanner *s)
99{
100	fwrite(s->tok, 1, s->cur - s->tok, stdout);
101}
102
103int scan(FILE *fp)
104{
105	int  res = 0;
106	int  nlcomment = 0;
107    Scanner s;
108
109	if (!fp)
110	{
111		return 1; /* no file was opened */
112	}
113
114    s.fp = fp;
115
116	fill(&s, 0);
117
118	for(;;)
119	{
120		s.tok = s.cur;
121/*!re2c
122	re2c:indent:top = 2;
123
124	NL			= "\r"? "\n" ;
125	WS			= [\r\n\t ] ;
126	ANY			= [^] ;
127
128	"/" "/"		{ goto cppcomment; }
129	NL / "/""*"	{ echo(&s); nlcomment = 1; continue; }
130	"/" "*"		{ goto comment; }
131	ANY			{ fputc(*s.tok, stdout); continue; }
132*/
133comment:
134		s.tok = s.cur;
135/*!re2c
136	"*" "/"		{ goto commentws; }
137	ANY			{ goto comment; }
138*/
139commentws:
140		s.tok = s.cur;
141/*!re2c
142	NL? "/" "*"	{ goto comment; }
143	NL			{
144					if (!nlcomment)
145					{
146						echo(&s);
147					}
148					nlcomment = 0;
149					continue;
150				}
151	WS			{ goto commentws; }
152	ANY			{ echo(&s); nlcomment = 0; continue; }
153*/
154cppcomment:
155		s.tok = s.cur;
156/*!re2c
157	NL			{ echo(&s); continue; }
158	ANY			{ goto cppcomment; }
159*/
160	}
161
162	if (fp != stdin)
163	{
164		fclose(fp); /* close only if not stdin */
165	}
166	return res; /* return result */
167}
168
169int main(int argc, char **argv)
170{
171	if (argc > 1)
172	{
173		return scan(!strcmp(argv[1], "-") ? stdin : fopen(argv[1], "r"));
174	}
175	else
176	{
177		fprintf(stderr, "%s <expr>\n", argv[0]);
178		return 1;
179	}
180}
181