1/* 2 * Lexes Ragel input files. 3 * 4 * @LANG: c++ 5 * 6 * Test works with split code gen. 7 */ 8 9#include <iostream> 10#include <stdlib.h> 11#include <stdio.h> 12#include <string.h> 13 14using namespace std; 15 16void escapeXML( const char *data ) 17{ 18 while ( *data != 0 ) { 19 switch ( *data ) { 20 case '<': cout << "<"; break; 21 case '>': cout << ">"; break; 22 case '&': cout << "&"; break; 23 default: cout << *data; break; 24 } 25 data += 1; 26 } 27} 28 29void escapeXML( char c ) 30{ 31 switch ( c ) { 32 case '<': cout << "<"; break; 33 case '>': cout << ">"; break; 34 case '&': cout << "&"; break; 35 default: cout << c; break; 36 } 37} 38 39void escapeXML( const char *data, int len ) 40{ 41 for ( const char *end = data + len; data != end; data++ ) { 42 switch ( *data ) { 43 case '<': cout << "<"; break; 44 case '>': cout << ">"; break; 45 case '&': cout << "&"; break; 46 default: cout << *data; break; 47 } 48 } 49} 50 51inline void write( const char *data ) 52{ 53 cout << data; 54} 55 56inline void write( char c ) 57{ 58 cout << c; 59} 60 61inline void write( const char *data, int len ) 62{ 63 cout.write( data, len ); 64} 65 66 67%%{ 68 machine RagelScan; 69 70 word = [a-zA-Z_][a-zA-Z_0-9]*; 71 integer = [0-9]+; 72 hex = '0x' [0-9a-fA-F] [0-9a-fA-F]*; 73 74 default = ^0; 75 EOF = 0; 76 77 # Handles comments in outside code and inline blocks. 78 c_comment := 79 ( default* :>> '*/' ) 80 ${ escapeXML( fc ); } 81 @{ fret; }; 82 83 action emit { 84 escapeXML( ts, te-ts ); 85 } 86 87 # 88 # Inline action code 89 # 90 91 ilscan := |* 92 93 "'" ( [^'\\] | /\\./ )* "'" => emit; 94 '"' ( [^"\\] | /\\./ )* '"' => emit; 95 '/*' { 96 write( "/*" ); 97 fcall c_comment; 98 }; 99 '//' [^\n]* '\n' => emit; 100 101 '{' { 102 write( '{' ); 103 inline_depth += 1; 104 }; 105 106 '}' { 107 write( '}' ); 108 /* If dropping down to the last } then return 109 * to ragel code. */ 110 if ( --inline_depth == 0 ) { 111 write( "</inline>\n" ); 112 fgoto rlscan; 113 } 114 }; 115 116 default => { escapeXML( *ts ); }; 117 *|; 118 119 # 120 # Ragel Tokens 121 # 122 123 rlscan := |* 124 '}%%' { 125 if ( !single_line ) { 126 write( "</section>\n" ); 127 fgoto main; 128 } 129 }; 130 131 '\n' { 132 if ( single_line ) { 133 write( "</section>\n" ); 134 fgoto main; 135 } 136 }; 137 138 # Word 139 word { 140 write( "<word>" ); 141 write( ts, te-ts ); 142 write( "</word>\n" ); 143 }; 144 145 # Decimal integer. 146 integer { 147 write( "<int>" ); 148 write( ts, te-ts ); 149 write( "</int>\n" ); 150 }; 151 152 # Hexidecimal integer. 153 hex { 154 write( "<hex>" ); 155 write( ts, te-ts ); 156 write( "</hex>\n" ); 157 }; 158 159 # Consume comments. 160 '#' [^\n]* '\n'; 161 162 # Single literal string. 163 "'" ( [^'\\] | /\\./ )* "'" { 164 write( "<single_lit>" ); 165 escapeXML( ts, te-ts ); 166 write( "</single_lit>\n" ); 167 }; 168 169 # Double literal string. 170 '"' ( [^"\\] | /\\./ )* '"' { 171 write( "<double_lit>" ); 172 escapeXML( ts, te-ts ); 173 write( "</double_lit>\n" ); 174 }; 175 176 # Or literal. 177 '[' ( [^\]\\] | /\\./ )* ']' { 178 write( "<or_lit>" ); 179 escapeXML( ts, te-ts ); 180 write( "</or_lit>\n" ); 181 }; 182 183 # Regex Literal. 184 '/' ( [^/\\] | /\\./ ) * '/' { 185 write( "<re_lit>" ); 186 escapeXML( ts, te-ts ); 187 write( "</re_lit>\n" ); 188 }; 189 190 # Open an inline block 191 '{' { 192 inline_depth = 1; 193 write( "<inline>{" ); 194 fgoto ilscan; 195 }; 196 197 punct { 198 write( "<symbol>" ); 199 escapeXML( fc ); 200 write( "</symbol>\n" ); 201 }; 202 203 default; 204 *|; 205 206 # 207 # Outside code. 208 # 209 210 main := |* 211 212 "'" ( [^'\\] | /\\./ )* "'" => emit; 213 '"' ( [^"\\] | /\\./ )* '"' => emit; 214 215 '/*' { 216 escapeXML( ts, te-ts ); 217 fcall c_comment; 218 }; 219 220 '//' [^\n]* '\n' => emit; 221 222 '%%{' { 223 write( "<section>\n" ); 224 single_line = false; 225 fgoto rlscan; 226 }; 227 228 '%%' { 229 write( "<section>\n" ); 230 single_line = true; 231 fgoto rlscan; 232 }; 233 234 default { 235 escapeXML( *ts ); 236 }; 237 238 # EOF. 239 EOF; 240 *|; 241}%% 242 243%% write data nofinal; 244 245void test( const char *data ) 246{ 247 std::ios::sync_with_stdio(false); 248 249 int cs, act; 250 const char *ts, *te; 251 int stack[1], top; 252 253 bool single_line = false; 254 int inline_depth = 0; 255 256 %% write init; 257 258 /* Read in a block. */ 259 const char *p = data; 260 const char *pe = data + strlen( data ); 261 const char *eof = pe; 262 %% write exec; 263 264 if ( cs == RagelScan_error ) { 265 /* Machine failed before finding a token. */ 266 cerr << "PARSE ERROR" << endl; 267 exit(1); 268 } 269} 270 271#define BUFSIZE 2048 272 273int main() 274{ 275 std::ios::sync_with_stdio(false); 276 277 test("hi %%{ /'}%%'/ { /*{*/ {} } + '\\'' }%%there\n"); 278 279 return 0; 280} 281