1/*
2 * Lexes Ragel input files.
3 */
4
5#include <iostream>
6#include <stdlib.h>
7#include <stdio.h>
8#include <string.h>
9
10using namespace std;
11
12void escapeXML( char *data )
13{
14	while ( *data != 0 ) {
15		switch ( *data ) {
16			case '<': cout << "&lt;"; break;
17			case '>': cout << "&gt;"; break;
18			case '&': cout << "&amp;"; break;
19			default: cout << *data; break;
20		}
21		data += 1;
22	}
23}
24
25void escapeXML( char c )
26{
27	switch ( c ) {
28		case '<': cout << "&lt;"; break;
29		case '>': cout << "&gt;"; break;
30		case '&': cout << "&amp;"; break;
31		default: cout << c; break;
32	}
33}
34
35void escapeXML( char *data, int len )
36{
37	for ( char *end = data + len; data != end; data++  ) {
38		switch ( *data ) {
39			case '<': cout << "&lt;"; break;
40			case '>': cout << "&gt;"; break;
41			case '&': cout << "&amp;"; break;
42			default: cout << *data; break;
43		}
44	}
45}
46
47inline void write( const char *data )
48{
49	cout << data;
50}
51
52inline void write( char c )
53{
54	cout << c;
55}
56
57inline void write( char *data, int len )
58{
59	cout.write( data, len );
60}
61
62
63%%{
64	machine RagelScan;
65
66	word = [a-zA-Z_][a-zA-Z_0-9]*;
67	integer = [0-9]+;
68	hex = '0x' [0-9a-fA-F] [0-9a-fA-F]*;
69
70	default = ^0;
71	EOF = 0;
72
73	# Handles comments in outside code and inline blocks.
74	c_comment :=
75		( default* :>> '*/' )
76		${ escapeXML( fc ); }
77		@{ fret; };
78
79	action emit {
80		escapeXML( ts, te-ts );
81	}
82
83	#
84	# Inline action code
85	#
86
87	ilscan := |*
88
89		"'" ( [^'\\] | /\\./ )* "'" => emit;
90		'"' ( [^"\\] | /\\./ )* '"' => emit;
91		'/*' {
92			write( "/*" );
93			fcall c_comment;
94		};
95		'//' [^\n]* '\n' => emit;
96
97		'{' {
98			write( '{' );
99			inline_depth += 1;
100		};
101
102		'}' {
103			write( '}' );
104			/* If dropping down to the last } then return
105			 * to ragel code. */
106			if ( --inline_depth == 0 ) {
107				write( "</inline>\n" );
108				fgoto rlscan;
109			}
110		};
111
112		default => { escapeXML( *ts ); };
113	*|;
114
115	#
116	# Ragel Tokens
117	#
118
119	rlscan := |*
120		'}%%' {
121			if ( !single_line ) {
122				write( "</section>\n" );
123				fgoto main;
124			}
125		};
126
127		'\n' {
128			if ( single_line ) {
129				write( "</section>\n" );
130				fgoto main;
131			}
132		};
133
134		# Word
135		word {
136			write( "<word>" );
137			write( ts, te-ts );
138			write( "</word>\n" );
139		};
140
141		# Decimal integer.
142		integer {
143			write( "<int>" );
144			write( ts, te-ts );
145			write( "</int>\n" );
146		};
147
148		# Hexidecimal integer.
149		hex {
150			write( "<hex>" );
151			write( ts, te-ts );
152			write( "</hex>\n" );
153		};
154
155		# Consume comments.
156		'#' [^\n]* '\n';
157
158		# Single literal string.
159		"'" ( [^'\\] | /\\./ )* "'" {
160			write( "<single_lit>" );
161			escapeXML( ts, te-ts );
162			write( "</single_lit>\n" );
163		};
164
165		# Double literal string.
166		'"' ( [^"\\] | /\\./ )* '"' {
167			write( "<double_lit>" );
168			escapeXML( ts, te-ts );
169			write( "</double_lit>\n" );
170		};
171
172		# Or literal.
173		'[' ( [^\]\\] | /\\./ )* ']' {
174			write( "<or_lit>" );
175			escapeXML( ts, te-ts );
176			write( "</or_lit>\n" );
177		};
178
179		# Regex Literal.
180		'/' ( [^/\\] | /\\./ ) * '/' {
181			write( "<re_lit>" );
182			escapeXML( ts, te-ts );
183			write( "</re_lit>\n" );
184		};
185
186		# Open an inline block
187		'{' {
188			inline_depth = 1;
189			write( "<inline>{" );
190			fgoto ilscan;
191		};
192
193		punct {
194			write( "<symbol>" );
195			escapeXML( fc );
196			write( "</symbol>\n" );
197		};
198
199		default;
200	*|;
201
202	#
203	# Outside code.
204	#
205
206	main := |*
207
208		"'" ( [^'\\] | /\\./ )* "'" => emit;
209		'"' ( [^"\\] | /\\./ )* '"' => emit;
210
211		'/*' {
212			escapeXML( ts, te-ts );
213			fcall c_comment;
214		};
215
216		'//' [^\n]* '\n' => emit;
217
218		'%%{' {
219			write( "<section>\n" );
220			single_line = false;
221			fgoto rlscan;
222		};
223
224		'%%' {
225			write( "<section>\n" );
226			single_line = true;
227			fgoto rlscan;
228		};
229
230		default {
231			escapeXML( *ts );
232		};
233
234		# EOF.
235		EOF;
236	*|;
237}%%
238
239%% write data nofinal;
240
241#define BUFSIZE 2048
242
243int main()
244{
245	std::ios::sync_with_stdio(false);
246
247	int cs, act;
248	char *ts, *te;
249	int stack[1], top;
250
251	static char inbuf[BUFSIZE];
252	bool single_line = false;
253	int inline_depth = 0;
254
255	%% write init;
256
257	bool done = false;
258	int have = 0;
259	while ( !done ) {
260		/* How much space is in the buffer? */
261		int space = BUFSIZE - have;
262		if ( space == 0 ) {
263			/* Buffer is full. */
264			cerr << "TOKEN TOO BIG" << endl;
265			exit(1);
266		}
267
268		/* Read in a block. */
269		char *p = inbuf + have;
270		cin.read( p, space );
271		int len = cin.gcount();
272		char *pe = p + len;
273		char *eof = 0;
274
275		/* Check for EOF. */
276		if ( len == 0 ) {
277			eof = pe;
278			done = true;
279		}
280
281		%% write exec;
282
283		if ( cs == RagelScan_error ) {
284			/* Machine failed before finding a token. */
285			cerr << "PARSE ERROR" << endl;
286			exit(1);
287		}
288
289		if ( ts == 0 )
290			have = 0;
291		else {
292			/* There is a prefix to preserve, shift it over. */
293			have = pe - ts;
294			memmove( inbuf, ts, have );
295			te = inbuf + (te-ts);
296			ts = inbuf;
297		}
298	}
299	return 0;
300}
301