1/*
2 * @LANG: c++
3 *
4 * Test works with split code gen.
5 */
6
7#include "cppscan1.h"
8
9%%{
10	machine Scanner;
11	access fsm->;
12
13	action pass { fsm->pass(fc); }
14	action buf { fsm->buf(fc); }
15
16	action emit_slit { fsm->token( TK_Slit ); }
17	action emit_dlit { fsm->token( TK_Dlit ); }
18	action emit_id { fsm->token( TK_Id ); }
19	action emit_integer_decimal { fsm->token( TK_IntegerDecimal ); }
20	action emit_integer_octal { fsm->token( TK_IntegerOctal ); }
21	action emit_integer_hex { fsm->token( TK_IntegerHex ); }
22	action emit_float { fsm->token( TK_Float ); }
23	action emit_symbol { fsm->token( fsm->tokBuf.data[0] ); }
24	action tokst { fsm->tokStart = fsm->col; }
25
26	# Single and double literals.
27	slit = ( 'L'? ( "'" ( [^'\\\n] | /\\./ )* "'" ) $buf ) >tokst %emit_slit;
28	dlit = ( 'L'? ( '"' ( [^"\\\n] | /\\./ )* '"' ) $buf ) >tokst %emit_dlit;
29
30	# Identifiers
31	id = ( [a-zA-Z_] [a-zA-Z0-9_]* ) >tokst $buf %emit_id;
32
33	# Floating literals.
34	fract_const = digit* '.' digit+ | digit+ '.';
35	exponent = [eE] [+\-]? digit+;
36	float_suffix = [flFL];
37	float =
38		( fract_const exponent? float_suffix? |
39		digit+ exponent float_suffix? ) >tokst $buf %emit_float;
40
41	# Integer decimal. Leading part buffered by float.
42	integer_decimal = ( ( '0' | [1-9] [0-9]* ) [ulUL]{0,3} $buf ) %emit_integer_decimal;
43
44	# Integer octal. Leading part buffered by float.
45	integer_octal = ( '0' [0-9]+ [ulUL]{0,2} $buf ) %emit_integer_octal;
46
47	# Integer hex. Leading 0 buffered by float.
48	integer_hex = ( '0' ( 'x' [0-9a-fA-F]+ [ulUL]{0,2} ) $buf ) %emit_integer_hex;
49
50	# Only buffer the second item, first buffered by symbol. */
51	namesep = '::' @buf %{fsm->token( TK_NameSep );};
52	deqs = '==' @buf %{fsm->token( TK_EqualsEquals );};
53	neqs = '!=' @buf %{fsm->token( TK_NotEquals );};
54	and_and = '&&' @buf %{fsm->token( TK_AndAnd );};
55	or_or = '||' @buf %{fsm->token( TK_OrOr );};
56	mult_assign = '*=' @buf %{fsm->token( TK_MultAssign );};
57	percent_assign = '%=' @buf %{fsm->token( TK_PercentAssign );};
58	plus_assign = '+=' @buf %{fsm->token( TK_PlusAssign );};
59	minus_assign = '-=' @buf %{fsm->token( TK_MinusAssign );};
60	amp_assign = '&=' @buf %{fsm->token( TK_AmpAssign );};
61	caret_assign = '^=' @buf %{fsm->token( TK_CaretAssign );};
62	bar_assign = '|=' @buf %{fsm->token( TK_BarAssign );};
63	plus_plus = '++' @buf %{fsm->token( TK_PlusPlus );};
64	minus_minus = '--' @buf %{fsm->token( TK_MinusMinus );};
65	arrow = '->' @buf %{fsm->token( TK_Arrow );};
66	arrow_star = '->*' @buf %{fsm->token( TK_ArrowStar );};
67	dot_star = '.*' @buf %{fsm->token( TK_DotStar );};
68
69	# Buffer both items. *
70	div_assign = '/=' @{fsm->buf('/');fsm->buf(fc);} %{fsm->token( TK_DivAssign );};
71
72	# Double dot is sent as two dots.
73	dot_dot = '..' %{fsm->token('.'); fsm->buf('.'); fsm->token('.');};
74
75	# Three char compounds, first item already buffered. */
76	dot_dot_dot = '...' %{fsm->buf('.'); fsm->buf('.'); fsm->token( TK_DotDotDot );};
77
78	# All compunds
79	compound = namesep | deqs | neqs | and_and | or_or | mult_assign |
80			div_assign | percent_assign | plus_assign | minus_assign |
81			amp_assign | caret_assign | bar_assign | plus_plus | minus_minus |
82			arrow | arrow_star | dot_star | dot_dot | dot_dot_dot;
83
84	# Single char symbols.
85	symbol =
86		( punct - [./_"'] ) >tokst $buf %emit_symbol |
87		# Do not immediately buffer slash, may be start of comment.
88		'/' >tokst %{ fsm->buf('/'); fsm->token( '/' ); } |
89		# Dot covered by float.
90		'.' %emit_symbol;
91
92	# Comments and whitespace.
93	commc = '/*' @{fsm->pass('/'); fsm->pass('*');} ( any* $0 '*/' @1 ) $pass;
94	commcc = '//' @{fsm->pass('/'); fsm->pass('/');} ( any* $0 '\n' @1 ) $pass;
95	whitespace = ( any - ( 0 | 33..126 ) )+ $pass;
96
97	action onEOFChar {
98		/* On EOF char, write out the non token buffer. */
99		fsm->nonTokBuf.append(0);
100		cout << fsm->nonTokBuf.data;
101		fsm->nonTokBuf.clear();
102	}
103
104	# Using 0 as eof. If seeingAs a result all null characters get ignored.
105	EOF = 0 @onEOFChar;
106
107	# All outside code tokens.
108	tokens = (
109		id | slit | dlit | float | integer_decimal |
110		integer_octal | integer_hex | compound | symbol );
111	nontok = ( commc | commcc | whitespace | EOF );
112
113	position = (
114		'\n' @{ fsm->line += 1; fsm->col = 1; } |
115		[^\n] @{ fsm->col += 1; } )*;
116
117	main := ( ( tokens | nontok )** ) & position;
118}%%
119
120%% write data;
121
122void Scanner::init( )
123{
124	Scanner *fsm = this;
125	/* A count of the number of characters in
126	 * a token. Used for % sequences. */
127	count = 0;
128	line = 1;
129	col = 1;
130
131	%% write init;
132}
133
134int Scanner::execute( const char *data, int len )
135{
136	Scanner *fsm = this;
137	const char *p = data;
138	const char *pe = data + len;
139	const char *eof = pe;
140
141	%% write exec;
142	if ( cs == Scanner_error )
143		return -1;
144	if ( cs >= Scanner_first_final )
145		return 1;
146	return 0;
147}
148
149int Scanner::finish( )
150{
151	if ( cs == Scanner_error )
152		return -1;
153	if ( cs >= Scanner_first_final )
154		return 1;
155	return 0;
156}
157
158void Scanner::token( int id )
159{
160	/* Leader. */
161	if ( nonTokBuf.length > 0 ) {
162		nonTokBuf.append(0);
163		cout << nonTokBuf.data;
164		nonTokBuf.clear();
165	}
166
167	/* Token data. */
168	tokBuf.append(0);
169	cout << '<' << id << '>' << tokBuf.data;
170	tokBuf.clear();
171}
172
173void Buffer::empty()
174{
175	if ( data != 0 ) {
176		free( data );
177
178		data = 0;
179		length = 0;
180		allocated = 0;
181	}
182}
183
184void Buffer::upAllocate( int len )
185{
186	if ( data == 0 )
187		data = (char*) malloc( len );
188	else
189		data = (char*) realloc( data, len );
190	allocated = len;
191}
192
193void test( const char *buf )
194{
195	Scanner scanner(cout);
196	scanner.init();
197	scanner.execute( buf, strlen(buf) );
198
199	/* The last token is ignored (because there is no next token). Send
200	 * trailing null to force the last token into whitespace. */
201	char eof = 0;
202	if ( scanner.execute( &eof, 1 ) <= 0 ) {
203		cerr << "cppscan: scan failed" << endl;
204		return;
205	}
206	cout.flush();
207}
208
209int main()
210{
211	test(
212		"/*\n"
213		" *  Copyright \n"
214		" */\n"
215		"\n"
216		"/* Construct an fsmmachine from a graph. */\n"
217		"RedFsmAp::RedFsmAp( FsmAp *graph, bool complete )\n"
218		":\n"
219		"	graph(graph),\n"
220		"{\n"
221		"	assert( sizeof(RedTransAp) <= sizeof(TransAp) );\n"
222		"\n"
223		"	reduceMachine();\n"
224		"}\n"
225		"\n"
226		"{\n"
227		"	/* Get the transition that we want to extend. */\n"
228		"	RedTransAp *extendTrans = list[pos].value;\n"
229		"\n"
230		"	/* Look ahead in the transition list. */\n"
231		"	for ( int next = pos + 1; next < list.length(); pos++, next++ ) {\n"
232		"		if ( ! keyOps->eq( list[pos].highKey, nextKey ) )\n"
233		"			break;\n"
234		"	}\n"
235		"	return false;\n"
236		"}\n"
237		"\n" );
238
239	test(
240		"->*\n"
241		".*\n"
242		"/*\"*/\n"
243		"\"/*\"\n"
244		"L'\"'\n"
245		"L\"'\"\n" );
246
247	return 0;
248}
249
250#ifdef _____OUTPUT_____
251/*
252 *  Copyright
253 */
254
255/* Construct an fsmmachine from a graph. */
256<195>RedFsmAp<197>::<195>RedFsmAp<40>( <195>FsmAp <42>*<195>graph<44>, <195>bool <195>complete <41>)
257<58>:
258	<195>graph<40>(<195>graph<41>)<44>,
259<123>{
260	<195>assert<40>( <195>sizeof<40>(<195>RedTransAp<41>) <60><<61>= <195>sizeof<40>(<195>TransAp<41>) <41>)<59>;
261
262	<195>reduceMachine<40>(<41>)<59>;
263<125>}
264
265<123>{
266	/* Get the transition that we want to extend. */
267	<195>RedTransAp <42>*<195>extendTrans <61>= <195>list<91>[<195>pos<93>]<46>.<195>value<59>;
268
269	/* Look ahead in the transition list. */
270	<195>for <40>( <195>int <195>next <61>= <195>pos <43>+ <218>1<59>; <195>next <60>< <195>list<46>.<195>length<40>(<41>)<59>; <195>pos<212>++<44>, <195>next<212>++ <41>) <123>{
271		<195>if <40>( <33>! <195>keyOps<211>-><195>eq<40>( <195>list<91>[<195>pos<93>]<46>.<195>highKey<44>, <195>nextKey <41>) <41>)
272			<195>break<59>;
273	<125>}
274	<195>return <195>false<59>;
275<125>}
276
277<214>->*
278<215>.*
279/*"*/
280<192>"/*"
281<193>L'"'
282<192>L"'"
283#endif
284