1/*
2 * Lexes Ragel input files.
3 *
4 * @LANG: c++
5 *
6 * Test works with split code gen.
7 */
8
9#include <iostream>
10#include <stdlib.h>
11#include <stdio.h>
12#include <string.h>
13
14using namespace std;
15
16void escapeXML( const char *data )
17{
18	while ( *data != 0 ) {
19		switch ( *data ) {
20			case '<': cout << "&lt;"; break;
21			case '>': cout << "&gt;"; break;
22			case '&': cout << "&amp;"; break;
23			default: cout << *data; break;
24		}
25		data += 1;
26	}
27}
28
29void escapeXML( char c )
30{
31	switch ( c ) {
32		case '<': cout << "&lt;"; break;
33		case '>': cout << "&gt;"; break;
34		case '&': cout << "&amp;"; break;
35		default: cout << c; break;
36	}
37}
38
39void escapeXML( const char *data, int len )
40{
41	for ( const char *end = data + len; data != end; data++  ) {
42		switch ( *data ) {
43			case '<': cout << "&lt;"; break;
44			case '>': cout << "&gt;"; break;
45			case '&': cout << "&amp;"; break;
46			default: cout << *data; break;
47		}
48	}
49}
50
51inline void write( const char *data )
52{
53	cout << data;
54}
55
56inline void write( char c )
57{
58	cout << c;
59}
60
61inline void write( const char *data, int len )
62{
63	cout.write( data, len );
64}
65
66
67%%{
68	machine RagelScan;
69
70	word = [a-zA-Z_][a-zA-Z_0-9]*;
71	integer = [0-9]+;
72	hex = '0x' [0-9a-fA-F] [0-9a-fA-F]*;
73
74	default = ^0;
75	EOF = 0;
76
77	# Handles comments in outside code and inline blocks.
78	c_comment :=
79		( default* :>> '*/' )
80		${ escapeXML( fc ); }
81		@{ fret; };
82
83	action emit {
84		escapeXML( ts, te-ts );
85	}
86
87	#
88	# Inline action code
89	#
90
91	ilscan := |*
92
93		"'" ( [^'\\] | /\\./ )* "'" => emit;
94		'"' ( [^"\\] | /\\./ )* '"' => emit;
95		'/*' {
96			write( "/*" );
97			fcall c_comment;
98		};
99		'//' [^\n]* '\n' => emit;
100
101		'{' {
102			write( '{' );
103			inline_depth += 1;
104		};
105
106		'}' {
107			write( '}' );
108			/* If dropping down to the last } then return
109			 * to ragel code. */
110			if ( --inline_depth == 0 ) {
111				write( "</inline>\n" );
112				fgoto rlscan;
113			}
114		};
115
116		default => { escapeXML( *ts ); };
117	*|;
118
119	#
120	# Ragel Tokens
121	#
122
123	rlscan := |*
124		'}%%' {
125			if ( !single_line ) {
126				write( "</section>\n" );
127				fgoto main;
128			}
129		};
130
131		'\n' {
132			if ( single_line ) {
133				write( "</section>\n" );
134				fgoto main;
135			}
136		};
137
138		# Word
139		word {
140			write( "<word>" );
141			write( ts, te-ts );
142			write( "</word>\n" );
143		};
144
145		# Decimal integer.
146		integer {
147			write( "<int>" );
148			write( ts, te-ts );
149			write( "</int>\n" );
150		};
151
152		# Hexidecimal integer.
153		hex {
154			write( "<hex>" );
155			write( ts, te-ts );
156			write( "</hex>\n" );
157		};
158
159		# Consume comments.
160		'#' [^\n]* '\n';
161
162		# Single literal string.
163		"'" ( [^'\\] | /\\./ )* "'" {
164			write( "<single_lit>" );
165			escapeXML( ts, te-ts );
166			write( "</single_lit>\n" );
167		};
168
169		# Double literal string.
170		'"' ( [^"\\] | /\\./ )* '"' {
171			write( "<double_lit>" );
172			escapeXML( ts, te-ts );
173			write( "</double_lit>\n" );
174		};
175
176		# Or literal.
177		'[' ( [^\]\\] | /\\./ )* ']' {
178			write( "<or_lit>" );
179			escapeXML( ts, te-ts );
180			write( "</or_lit>\n" );
181		};
182
183		# Regex Literal.
184		'/' ( [^/\\] | /\\./ ) * '/' {
185			write( "<re_lit>" );
186			escapeXML( ts, te-ts );
187			write( "</re_lit>\n" );
188		};
189
190		# Open an inline block
191		'{' {
192			inline_depth = 1;
193			write( "<inline>{" );
194			fgoto ilscan;
195		};
196
197		punct {
198			write( "<symbol>" );
199			escapeXML( fc );
200			write( "</symbol>\n" );
201		};
202
203		default;
204	*|;
205
206	#
207	# Outside code.
208	#
209
210	main := |*
211
212		"'" ( [^'\\] | /\\./ )* "'" => emit;
213		'"' ( [^"\\] | /\\./ )* '"' => emit;
214
215		'/*' {
216			escapeXML( ts, te-ts );
217			fcall c_comment;
218		};
219
220		'//' [^\n]* '\n' => emit;
221
222		'%%{' {
223			write( "<section>\n" );
224			single_line = false;
225			fgoto rlscan;
226		};
227
228		'%%' {
229			write( "<section>\n" );
230			single_line = true;
231			fgoto rlscan;
232		};
233
234		default {
235			escapeXML( *ts );
236		};
237
238		# EOF.
239		EOF;
240	*|;
241}%%
242
243%% write data nofinal;
244
245void test( const char *data )
246{
247	std::ios::sync_with_stdio(false);
248
249	int cs, act;
250	const char *ts, *te;
251	int stack[1], top;
252
253	bool single_line = false;
254	int inline_depth = 0;
255
256	%% write init;
257
258	/* Read in a block. */
259	const char *p = data;
260	const char *pe = data + strlen( data );
261	const char *eof = pe;
262	%% write exec;
263
264	if ( cs == RagelScan_error ) {
265		/* Machine failed before finding a token. */
266		cerr << "PARSE ERROR" << endl;
267		exit(1);
268	}
269}
270
271#define BUFSIZE 2048
272
273int main()
274{
275	std::ios::sync_with_stdio(false);
276
277	test("hi %%{ /'}%%'/ { /*{*/ {} } + '\\'' }%%there\n");
278
279	return 0;
280}
281