1/*
2 * @LANG: c++
3 */
4
5#include <iostream>
6#include <string.h>
7using namespace std;
8
9extern char buf[];
10
11struct Scanner
12{
13	int cs, act;
14	char *ts, *te;
15
16	// Initialize the machine. Invokes any init statement blocks. Returns 0
17	// if the machine begins in a non-accepting state and 1 if the machine
18	// begins in an accepting state.
19	void init( );
20
21	// Execute the machine on a block of data. Returns -1 if after processing
22	// the data, the machine is in the error state and can never accept, 0 if
23	// the machine is in a non-accepting state and 1 if the machine is in an
24	// accepting state.
25	int execute( char *data, int len );
26
27	// Indicate that there is no more data. Returns -1 if the machine finishes
28	// in the error state and does not accept, 0 if the machine finishes
29	// in any other non-accepting state and 1 if the machine finishes in an
30	// accepting state.
31	int finish( );
32};
33
34%%{
35	machine Scanner;
36
37	action to_act {
38		cout << "to:   fc = ";
39		if ( fc == '\'' )
40			cout << (int)fc;
41		else
42			cout << fc;
43		cout << " ts = " << ( ts == 0 ? -1 : ts-buf ) << endl;
44	}
45	action from_act {
46		cout << "from: fc = ";
47		if ( fc == '\'' )
48			cout << (int)fc;
49		else
50			cout << fc;
51		cout << " ts = " << ( ts == 0 ? -1 : ts-buf ) << endl;
52	}
53
54	c_comm := ( any* $0 '*/' @1 @{ fgoto main; } ) $~to_act $*from_act;
55	cxx_comm := ( any* $0 '\n' @1 @{ fgoto main; } ) $~to_act $*from_act;
56
57	main := |*
58
59	# Single and double literals.
60	( 'L'? "'" ( [^'\\\n] | /\\./ )* "'" ) $~ to_act $* from_act;
61	( 'L'? '"' ( [^"\\\n] | /\\./ )* '"' ) $~ to_act $* from_act;
62
63	# Identifiers
64	( [a-zA-Z_] [a-zA-Z0-9_]* ) $~ to_act $* from_act;
65
66	# Floating literals.
67	fract_const = digit* '.' digit+ | digit+ '.';
68	exponent = [eE] [+\-]? digit+;
69	float_suffix = [flFL];
70
71	( fract_const exponent? float_suffix? |
72		digit+ exponent float_suffix? ) $~ to_act $* from_act;
73
74	# Integer decimal. Leading part buffered by float.
75	( ( '0' | [1-9] [0-9]* ) [ulUL]{0,3} ) $~ to_act $* from_act;
76
77	# Integer octal. Leading part buffered by float.
78	( '0' [0-9]+ [ulUL]{0,2} ) $~ to_act $* from_act;
79
80	# Integer hex. Leading 0 buffered by float.
81	( '0x' [0-9a-fA-F]+ [ulUL]{0,2} ) $~ to_act $* from_act;
82
83	# Three char compounds, first item already buffered. */
84	( '...' ) $~ to_act $* from_act;
85
86	# Single char symbols.
87	( punct - [_"'] ) $~ to_act $* from_act;
88
89	# Comments and whitespace.
90	( '/*' ) $~ to_act $* from_act { fgoto c_comm; };
91	( '//' ) $~ to_act $* from_act { fgoto cxx_comm; };
92
93	( any - 33..126 )+ $~ to_act $* from_act;
94
95	*|;
96}%%
97
98%% write data;
99
100void Scanner::init( )
101{
102	%% write init;
103}
104
105int Scanner::execute( char *data, int len )
106{
107	char *p = data;
108	char *pe = data + len;
109	char *eof = pe;
110
111	%% write exec;
112
113	return 0;
114}
115
116int Scanner::finish( )
117{
118	if ( cs == Scanner_error )
119		return -1;
120	if ( cs >= Scanner_first_final )
121		return 1;
122	return 0;
123}
124
125void test( )
126{
127	int len = strlen( buf );
128	Scanner scanner;
129
130	scanner.init();
131	scanner.execute( buf, len );
132	if ( scanner.cs == Scanner_error ) {
133		/* Machine failed before finding a token. */
134		cout << "PARSE ERROR" << endl;
135	}
136	scanner.finish();
137}
138
139char buf[4096];
140
141int main()
142{
143	strcpy( buf,
144		"a b 0.98 /*\n"
145		"9 */'\\''//hi\n"
146		"there\n"
147	);
148	test();
149	return 0;
150}
151
152#ifdef _____OUTPUT_____
153from: fc = a ts = 0
154to:   fc = a ts = 0
155from: fc =   ts = 0
156to:   fc = a ts = -1
157from: fc =   ts = 1
158to:   fc =   ts = 1
159from: fc = b ts = 1
160to:   fc =   ts = -1
161from: fc = b ts = 2
162to:   fc = b ts = 2
163from: fc =   ts = 2
164to:   fc = b ts = -1
165from: fc =   ts = 3
166to:   fc =   ts = 3
167from: fc = 0 ts = 3
168to:   fc =   ts = -1
169from: fc = 0 ts = 4
170to:   fc = 0 ts = 4
171from: fc = . ts = 4
172to:   fc = . ts = 4
173from: fc = 9 ts = 4
174to:   fc = 9 ts = 4
175from: fc = 8 ts = 4
176to:   fc = 8 ts = 4
177from: fc =   ts = 4
178to:   fc = 8 ts = -1
179from: fc =   ts = 8
180to:   fc =   ts = 8
181from: fc = / ts = 8
182to:   fc =   ts = -1
183from: fc = / ts = 9
184to:   fc = / ts = 9
185from: fc = * ts = 9
186to:   fc = * ts = -1
187from: fc =
188 ts = -1
189to:   fc =
190 ts = -1
191from: fc = 9 ts = -1
192to:   fc = 9 ts = -1
193from: fc =   ts = -1
194to:   fc =   ts = -1
195from: fc = * ts = -1
196to:   fc = * ts = -1
197from: fc = / ts = -1
198to:   fc = / ts = -1
199from: fc = 39 ts = 16
200to:   fc = 39 ts = 16
201from: fc = \ ts = 16
202to:   fc = \ ts = 16
203from: fc = 39 ts = 16
204to:   fc = 39 ts = 16
205from: fc = 39 ts = 16
206to:   fc = 39 ts = -1
207from: fc = / ts = 20
208to:   fc = / ts = 20
209from: fc = / ts = 20
210to:   fc = / ts = -1
211from: fc = h ts = -1
212to:   fc = h ts = -1
213from: fc = i ts = -1
214to:   fc = i ts = -1
215from: fc =
216 ts = -1
217to:   fc =
218 ts = -1
219from: fc = t ts = 25
220to:   fc = t ts = 25
221from: fc = h ts = 25
222to:   fc = h ts = 25
223from: fc = e ts = 25
224to:   fc = e ts = 25
225from: fc = r ts = 25
226to:   fc = r ts = 25
227from: fc = e ts = 25
228to:   fc = e ts = 25
229from: fc =
230 ts = 25
231to:   fc = e ts = -1
232from: fc =
233 ts = 30
234to:   fc =
235 ts = 30
236to:   fc =
237 ts = -1
238#endif
239