1/* 2 * @LANG: c++ 3 * 4 * Test works with split code gen. 5 */ 6 7#include "cppscan1.h" 8 9%%{ 10 machine Scanner; 11 access fsm->; 12 13 action pass { fsm->pass(fc); } 14 action buf { fsm->buf(fc); } 15 16 action emit_slit { fsm->token( TK_Slit ); } 17 action emit_dlit { fsm->token( TK_Dlit ); } 18 action emit_id { fsm->token( TK_Id ); } 19 action emit_integer_decimal { fsm->token( TK_IntegerDecimal ); } 20 action emit_integer_octal { fsm->token( TK_IntegerOctal ); } 21 action emit_integer_hex { fsm->token( TK_IntegerHex ); } 22 action emit_float { fsm->token( TK_Float ); } 23 action emit_symbol { fsm->token( fsm->tokBuf.data[0] ); } 24 action tokst { fsm->tokStart = fsm->col; } 25 26 # Single and double literals. 27 slit = ( 'L'? ( "'" ( [^'\\\n] | /\\./ )* "'" ) $buf ) >tokst %emit_slit; 28 dlit = ( 'L'? ( '"' ( [^"\\\n] | /\\./ )* '"' ) $buf ) >tokst %emit_dlit; 29 30 # Identifiers 31 id = ( [a-zA-Z_] [a-zA-Z0-9_]* ) >tokst $buf %emit_id; 32 33 # Floating literals. 34 fract_const = digit* '.' digit+ | digit+ '.'; 35 exponent = [eE] [+\-]? digit+; 36 float_suffix = [flFL]; 37 float = 38 ( fract_const exponent? float_suffix? | 39 digit+ exponent float_suffix? ) >tokst $buf %emit_float; 40 41 # Integer decimal. Leading part buffered by float. 42 integer_decimal = ( ( '0' | [1-9] [0-9]* ) [ulUL]{0,3} $buf ) %emit_integer_decimal; 43 44 # Integer octal. Leading part buffered by float. 45 integer_octal = ( '0' [0-9]+ [ulUL]{0,2} $buf ) %emit_integer_octal; 46 47 # Integer hex. Leading 0 buffered by float. 48 integer_hex = ( '0' ( 'x' [0-9a-fA-F]+ [ulUL]{0,2} ) $buf ) %emit_integer_hex; 49 50 # Only buffer the second item, first buffered by symbol. */ 51 namesep = '::' @buf %{fsm->token( TK_NameSep );}; 52 deqs = '==' @buf %{fsm->token( TK_EqualsEquals );}; 53 neqs = '!=' @buf %{fsm->token( TK_NotEquals );}; 54 and_and = '&&' @buf %{fsm->token( TK_AndAnd );}; 55 or_or = '||' @buf %{fsm->token( TK_OrOr );}; 56 mult_assign = '*=' @buf %{fsm->token( TK_MultAssign );}; 57 percent_assign = '%=' @buf %{fsm->token( TK_PercentAssign );}; 58 plus_assign = '+=' @buf %{fsm->token( TK_PlusAssign );}; 59 minus_assign = '-=' @buf %{fsm->token( TK_MinusAssign );}; 60 amp_assign = '&=' @buf %{fsm->token( TK_AmpAssign );}; 61 caret_assign = '^=' @buf %{fsm->token( TK_CaretAssign );}; 62 bar_assign = '|=' @buf %{fsm->token( TK_BarAssign );}; 63 plus_plus = '++' @buf %{fsm->token( TK_PlusPlus );}; 64 minus_minus = '--' @buf %{fsm->token( TK_MinusMinus );}; 65 arrow = '->' @buf %{fsm->token( TK_Arrow );}; 66 arrow_star = '->*' @buf %{fsm->token( TK_ArrowStar );}; 67 dot_star = '.*' @buf %{fsm->token( TK_DotStar );}; 68 69 # Buffer both items. * 70 div_assign = '/=' @{fsm->buf('/');fsm->buf(fc);} %{fsm->token( TK_DivAssign );}; 71 72 # Double dot is sent as two dots. 73 dot_dot = '..' %{fsm->token('.'); fsm->buf('.'); fsm->token('.');}; 74 75 # Three char compounds, first item already buffered. */ 76 dot_dot_dot = '...' %{fsm->buf('.'); fsm->buf('.'); fsm->token( TK_DotDotDot );}; 77 78 # All compunds 79 compound = namesep | deqs | neqs | and_and | or_or | mult_assign | 80 div_assign | percent_assign | plus_assign | minus_assign | 81 amp_assign | caret_assign | bar_assign | plus_plus | minus_minus | 82 arrow | arrow_star | dot_star | dot_dot | dot_dot_dot; 83 84 # Single char symbols. 85 symbol = 86 ( punct - [./_"'] ) >tokst $buf %emit_symbol | 87 # Do not immediately buffer slash, may be start of comment. 88 '/' >tokst %{ fsm->buf('/'); fsm->token( '/' ); } | 89 # Dot covered by float. 90 '.' %emit_symbol; 91 92 # Comments and whitespace. 93 commc = '/*' @{fsm->pass('/'); fsm->pass('*');} ( any* $0 '*/' @1 ) $pass; 94 commcc = '//' @{fsm->pass('/'); fsm->pass('/');} ( any* $0 '\n' @1 ) $pass; 95 whitespace = ( any - ( 0 | 33..126 ) )+ $pass; 96 97 action onEOFChar { 98 /* On EOF char, write out the non token buffer. */ 99 fsm->nonTokBuf.append(0); 100 cout << fsm->nonTokBuf.data; 101 fsm->nonTokBuf.clear(); 102 } 103 104 # Using 0 as eof. If seeingAs a result all null characters get ignored. 105 EOF = 0 @onEOFChar; 106 107 # All outside code tokens. 108 tokens = ( 109 id | slit | dlit | float | integer_decimal | 110 integer_octal | integer_hex | compound | symbol ); 111 nontok = ( commc | commcc | whitespace | EOF ); 112 113 position = ( 114 '\n' @{ fsm->line += 1; fsm->col = 1; } | 115 [^\n] @{ fsm->col += 1; } )*; 116 117 main := ( ( tokens | nontok )** ) & position; 118}%% 119 120%% write data; 121 122void Scanner::init( ) 123{ 124 Scanner *fsm = this; 125 /* A count of the number of characters in 126 * a token. Used for % sequences. */ 127 count = 0; 128 line = 1; 129 col = 1; 130 131 %% write init; 132} 133 134int Scanner::execute( const char *data, int len ) 135{ 136 Scanner *fsm = this; 137 const char *p = data; 138 const char *pe = data + len; 139 const char *eof = pe; 140 141 %% write exec; 142 if ( cs == Scanner_error ) 143 return -1; 144 if ( cs >= Scanner_first_final ) 145 return 1; 146 return 0; 147} 148 149int Scanner::finish( ) 150{ 151 if ( cs == Scanner_error ) 152 return -1; 153 if ( cs >= Scanner_first_final ) 154 return 1; 155 return 0; 156} 157 158void Scanner::token( int id ) 159{ 160 /* Leader. */ 161 if ( nonTokBuf.length > 0 ) { 162 nonTokBuf.append(0); 163 cout << nonTokBuf.data; 164 nonTokBuf.clear(); 165 } 166 167 /* Token data. */ 168 tokBuf.append(0); 169 cout << '<' << id << '>' << tokBuf.data; 170 tokBuf.clear(); 171} 172 173void Buffer::empty() 174{ 175 if ( data != 0 ) { 176 free( data ); 177 178 data = 0; 179 length = 0; 180 allocated = 0; 181 } 182} 183 184void Buffer::upAllocate( int len ) 185{ 186 if ( data == 0 ) 187 data = (char*) malloc( len ); 188 else 189 data = (char*) realloc( data, len ); 190 allocated = len; 191} 192 193void test( const char *buf ) 194{ 195 Scanner scanner(cout); 196 scanner.init(); 197 scanner.execute( buf, strlen(buf) ); 198 199 /* The last token is ignored (because there is no next token). Send 200 * trailing null to force the last token into whitespace. */ 201 char eof = 0; 202 if ( scanner.execute( &eof, 1 ) <= 0 ) { 203 cerr << "cppscan: scan failed" << endl; 204 return; 205 } 206 cout.flush(); 207} 208 209int main() 210{ 211 test( 212 "/*\n" 213 " * Copyright \n" 214 " */\n" 215 "\n" 216 "/* Construct an fsmmachine from a graph. */\n" 217 "RedFsmAp::RedFsmAp( FsmAp *graph, bool complete )\n" 218 ":\n" 219 " graph(graph),\n" 220 "{\n" 221 " assert( sizeof(RedTransAp) <= sizeof(TransAp) );\n" 222 "\n" 223 " reduceMachine();\n" 224 "}\n" 225 "\n" 226 "{\n" 227 " /* Get the transition that we want to extend. */\n" 228 " RedTransAp *extendTrans = list[pos].value;\n" 229 "\n" 230 " /* Look ahead in the transition list. */\n" 231 " for ( int next = pos + 1; next < list.length(); pos++, next++ ) {\n" 232 " if ( ! keyOps->eq( list[pos].highKey, nextKey ) )\n" 233 " break;\n" 234 " }\n" 235 " return false;\n" 236 "}\n" 237 "\n" ); 238 239 test( 240 "->*\n" 241 ".*\n" 242 "/*\"*/\n" 243 "\"/*\"\n" 244 "L'\"'\n" 245 "L\"'\"\n" ); 246 247 return 0; 248} 249 250#ifdef _____OUTPUT_____ 251/* 252 * Copyright 253 */ 254 255/* Construct an fsmmachine from a graph. */ 256<195>RedFsmAp<197>::<195>RedFsmAp<40>( <195>FsmAp <42>*<195>graph<44>, <195>bool <195>complete <41>) 257<58>: 258 <195>graph<40>(<195>graph<41>)<44>, 259<123>{ 260 <195>assert<40>( <195>sizeof<40>(<195>RedTransAp<41>) <60><<61>= <195>sizeof<40>(<195>TransAp<41>) <41>)<59>; 261 262 <195>reduceMachine<40>(<41>)<59>; 263<125>} 264 265<123>{ 266 /* Get the transition that we want to extend. */ 267 <195>RedTransAp <42>*<195>extendTrans <61>= <195>list<91>[<195>pos<93>]<46>.<195>value<59>; 268 269 /* Look ahead in the transition list. */ 270 <195>for <40>( <195>int <195>next <61>= <195>pos <43>+ <218>1<59>; <195>next <60>< <195>list<46>.<195>length<40>(<41>)<59>; <195>pos<212>++<44>, <195>next<212>++ <41>) <123>{ 271 <195>if <40>( <33>! <195>keyOps<211>-><195>eq<40>( <195>list<91>[<195>pos<93>]<46>.<195>highKey<44>, <195>nextKey <41>) <41>) 272 <195>break<59>; 273 <125>} 274 <195>return <195>false<59>; 275<125>} 276 277<214>->* 278<215>.* 279/*"*/ 280<192>"/*" 281<193>L'"' 282<192>L"'" 283#endif 284