1/* 2 * @LANG: c++ 3 */ 4 5#include <iostream> 6#include <string.h> 7using namespace std; 8 9extern char buf[]; 10 11struct Scanner 12{ 13 int cs, act; 14 char *ts, *te; 15 16 // Initialize the machine. Invokes any init statement blocks. Returns 0 17 // if the machine begins in a non-accepting state and 1 if the machine 18 // begins in an accepting state. 19 void init( ); 20 21 // Execute the machine on a block of data. Returns -1 if after processing 22 // the data, the machine is in the error state and can never accept, 0 if 23 // the machine is in a non-accepting state and 1 if the machine is in an 24 // accepting state. 25 int execute( char *data, int len ); 26 27 // Indicate that there is no more data. Returns -1 if the machine finishes 28 // in the error state and does not accept, 0 if the machine finishes 29 // in any other non-accepting state and 1 if the machine finishes in an 30 // accepting state. 31 int finish( ); 32}; 33 34%%{ 35 machine Scanner; 36 37 action to_act { 38 cout << "to: fc = "; 39 if ( fc == '\'' ) 40 cout << (int)fc; 41 else 42 cout << fc; 43 cout << " ts = " << ( ts == 0 ? -1 : ts-buf ) << endl; 44 } 45 action from_act { 46 cout << "from: fc = "; 47 if ( fc == '\'' ) 48 cout << (int)fc; 49 else 50 cout << fc; 51 cout << " ts = " << ( ts == 0 ? -1 : ts-buf ) << endl; 52 } 53 54 c_comm := ( any* $0 '*/' @1 @{ fgoto main; } ) $~to_act $*from_act; 55 cxx_comm := ( any* $0 '\n' @1 @{ fgoto main; } ) $~to_act $*from_act; 56 57 main := |* 58 59 # Single and double literals. 60 ( 'L'? "'" ( [^'\\\n] | /\\./ )* "'" ) $~ to_act $* from_act; 61 ( 'L'? '"' ( [^"\\\n] | /\\./ )* '"' ) $~ to_act $* from_act; 62 63 # Identifiers 64 ( [a-zA-Z_] [a-zA-Z0-9_]* ) $~ to_act $* from_act; 65 66 # Floating literals. 67 fract_const = digit* '.' digit+ | digit+ '.'; 68 exponent = [eE] [+\-]? digit+; 69 float_suffix = [flFL]; 70 71 ( fract_const exponent? float_suffix? | 72 digit+ exponent float_suffix? ) $~ to_act $* from_act; 73 74 # Integer decimal. Leading part buffered by float. 75 ( ( '0' | [1-9] [0-9]* ) [ulUL]{0,3} ) $~ to_act $* from_act; 76 77 # Integer octal. Leading part buffered by float. 78 ( '0' [0-9]+ [ulUL]{0,2} ) $~ to_act $* from_act; 79 80 # Integer hex. Leading 0 buffered by float. 81 ( '0x' [0-9a-fA-F]+ [ulUL]{0,2} ) $~ to_act $* from_act; 82 83 # Three char compounds, first item already buffered. */ 84 ( '...' ) $~ to_act $* from_act; 85 86 # Single char symbols. 87 ( punct - [_"'] ) $~ to_act $* from_act; 88 89 # Comments and whitespace. 90 ( '/*' ) $~ to_act $* from_act { fgoto c_comm; }; 91 ( '//' ) $~ to_act $* from_act { fgoto cxx_comm; }; 92 93 ( any - 33..126 )+ $~ to_act $* from_act; 94 95 *|; 96}%% 97 98%% write data; 99 100void Scanner::init( ) 101{ 102 %% write init; 103} 104 105int Scanner::execute( char *data, int len ) 106{ 107 char *p = data; 108 char *pe = data + len; 109 char *eof = pe; 110 111 %% write exec; 112 113 return 0; 114} 115 116int Scanner::finish( ) 117{ 118 if ( cs == Scanner_error ) 119 return -1; 120 if ( cs >= Scanner_first_final ) 121 return 1; 122 return 0; 123} 124 125void test( ) 126{ 127 int len = strlen( buf ); 128 Scanner scanner; 129 130 scanner.init(); 131 scanner.execute( buf, len ); 132 if ( scanner.cs == Scanner_error ) { 133 /* Machine failed before finding a token. */ 134 cout << "PARSE ERROR" << endl; 135 } 136 scanner.finish(); 137} 138 139char buf[4096]; 140 141int main() 142{ 143 strcpy( buf, 144 "a b 0.98 /*\n" 145 "9 */'\\''//hi\n" 146 "there\n" 147 ); 148 test(); 149 return 0; 150} 151 152#ifdef _____OUTPUT_____ 153from: fc = a ts = 0 154to: fc = a ts = 0 155from: fc = ts = 0 156to: fc = a ts = -1 157from: fc = ts = 1 158to: fc = ts = 1 159from: fc = b ts = 1 160to: fc = ts = -1 161from: fc = b ts = 2 162to: fc = b ts = 2 163from: fc = ts = 2 164to: fc = b ts = -1 165from: fc = ts = 3 166to: fc = ts = 3 167from: fc = 0 ts = 3 168to: fc = ts = -1 169from: fc = 0 ts = 4 170to: fc = 0 ts = 4 171from: fc = . ts = 4 172to: fc = . ts = 4 173from: fc = 9 ts = 4 174to: fc = 9 ts = 4 175from: fc = 8 ts = 4 176to: fc = 8 ts = 4 177from: fc = ts = 4 178to: fc = 8 ts = -1 179from: fc = ts = 8 180to: fc = ts = 8 181from: fc = / ts = 8 182to: fc = ts = -1 183from: fc = / ts = 9 184to: fc = / ts = 9 185from: fc = * ts = 9 186to: fc = * ts = -1 187from: fc = 188 ts = -1 189to: fc = 190 ts = -1 191from: fc = 9 ts = -1 192to: fc = 9 ts = -1 193from: fc = ts = -1 194to: fc = ts = -1 195from: fc = * ts = -1 196to: fc = * ts = -1 197from: fc = / ts = -1 198to: fc = / ts = -1 199from: fc = 39 ts = 16 200to: fc = 39 ts = 16 201from: fc = \ ts = 16 202to: fc = \ ts = 16 203from: fc = 39 ts = 16 204to: fc = 39 ts = 16 205from: fc = 39 ts = 16 206to: fc = 39 ts = -1 207from: fc = / ts = 20 208to: fc = / ts = 20 209from: fc = / ts = 20 210to: fc = / ts = -1 211from: fc = h ts = -1 212to: fc = h ts = -1 213from: fc = i ts = -1 214to: fc = i ts = -1 215from: fc = 216 ts = -1 217to: fc = 218 ts = -1 219from: fc = t ts = 25 220to: fc = t ts = 25 221from: fc = h ts = 25 222to: fc = h ts = 25 223from: fc = e ts = 25 224to: fc = e ts = 25 225from: fc = r ts = 25 226to: fc = r ts = 25 227from: fc = e ts = 25 228to: fc = e ts = 25 229from: fc = 230 ts = 25 231to: fc = e ts = -1 232from: fc = 233 ts = 30 234to: fc = 235 ts = 30 236to: fc = 237 ts = -1 238#endif 239