1/* 2 * Lexes Ragel input files. 3 */ 4 5#include <iostream> 6#include <stdlib.h> 7#include <stdio.h> 8#include <string.h> 9 10using namespace std; 11 12void escapeXML( char *data ) 13{ 14 while ( *data != 0 ) { 15 switch ( *data ) { 16 case '<': cout << "<"; break; 17 case '>': cout << ">"; break; 18 case '&': cout << "&"; break; 19 default: cout << *data; break; 20 } 21 data += 1; 22 } 23} 24 25void escapeXML( char c ) 26{ 27 switch ( c ) { 28 case '<': cout << "<"; break; 29 case '>': cout << ">"; break; 30 case '&': cout << "&"; break; 31 default: cout << c; break; 32 } 33} 34 35void escapeXML( char *data, int len ) 36{ 37 for ( char *end = data + len; data != end; data++ ) { 38 switch ( *data ) { 39 case '<': cout << "<"; break; 40 case '>': cout << ">"; break; 41 case '&': cout << "&"; break; 42 default: cout << *data; break; 43 } 44 } 45} 46 47inline void write( const char *data ) 48{ 49 cout << data; 50} 51 52inline void write( char c ) 53{ 54 cout << c; 55} 56 57inline void write( char *data, int len ) 58{ 59 cout.write( data, len ); 60} 61 62 63%%{ 64 machine RagelScan; 65 66 word = [a-zA-Z_][a-zA-Z_0-9]*; 67 integer = [0-9]+; 68 hex = '0x' [0-9a-fA-F] [0-9a-fA-F]*; 69 70 default = ^0; 71 EOF = 0; 72 73 # Handles comments in outside code and inline blocks. 74 c_comment := 75 ( default* :>> '*/' ) 76 ${ escapeXML( fc ); } 77 @{ fret; }; 78 79 action emit { 80 escapeXML( ts, te-ts ); 81 } 82 83 # 84 # Inline action code 85 # 86 87 ilscan := |* 88 89 "'" ( [^'\\] | /\\./ )* "'" => emit; 90 '"' ( [^"\\] | /\\./ )* '"' => emit; 91 '/*' { 92 write( "/*" ); 93 fcall c_comment; 94 }; 95 '//' [^\n]* '\n' => emit; 96 97 '{' { 98 write( '{' ); 99 inline_depth += 1; 100 }; 101 102 '}' { 103 write( '}' ); 104 /* If dropping down to the last } then return 105 * to ragel code. */ 106 if ( --inline_depth == 0 ) { 107 write( "</inline>\n" ); 108 fgoto rlscan; 109 } 110 }; 111 112 default => { escapeXML( *ts ); }; 113 *|; 114 115 # 116 # Ragel Tokens 117 # 118 119 rlscan := |* 120 '}%%' { 121 if ( !single_line ) { 122 write( "</section>\n" ); 123 fgoto main; 124 } 125 }; 126 127 '\n' { 128 if ( single_line ) { 129 write( "</section>\n" ); 130 fgoto main; 131 } 132 }; 133 134 # Word 135 word { 136 write( "<word>" ); 137 write( ts, te-ts ); 138 write( "</word>\n" ); 139 }; 140 141 # Decimal integer. 142 integer { 143 write( "<int>" ); 144 write( ts, te-ts ); 145 write( "</int>\n" ); 146 }; 147 148 # Hexidecimal integer. 149 hex { 150 write( "<hex>" ); 151 write( ts, te-ts ); 152 write( "</hex>\n" ); 153 }; 154 155 # Consume comments. 156 '#' [^\n]* '\n'; 157 158 # Single literal string. 159 "'" ( [^'\\] | /\\./ )* "'" { 160 write( "<single_lit>" ); 161 escapeXML( ts, te-ts ); 162 write( "</single_lit>\n" ); 163 }; 164 165 # Double literal string. 166 '"' ( [^"\\] | /\\./ )* '"' { 167 write( "<double_lit>" ); 168 escapeXML( ts, te-ts ); 169 write( "</double_lit>\n" ); 170 }; 171 172 # Or literal. 173 '[' ( [^\]\\] | /\\./ )* ']' { 174 write( "<or_lit>" ); 175 escapeXML( ts, te-ts ); 176 write( "</or_lit>\n" ); 177 }; 178 179 # Regex Literal. 180 '/' ( [^/\\] | /\\./ ) * '/' { 181 write( "<re_lit>" ); 182 escapeXML( ts, te-ts ); 183 write( "</re_lit>\n" ); 184 }; 185 186 # Open an inline block 187 '{' { 188 inline_depth = 1; 189 write( "<inline>{" ); 190 fgoto ilscan; 191 }; 192 193 punct { 194 write( "<symbol>" ); 195 escapeXML( fc ); 196 write( "</symbol>\n" ); 197 }; 198 199 default; 200 *|; 201 202 # 203 # Outside code. 204 # 205 206 main := |* 207 208 "'" ( [^'\\] | /\\./ )* "'" => emit; 209 '"' ( [^"\\] | /\\./ )* '"' => emit; 210 211 '/*' { 212 escapeXML( ts, te-ts ); 213 fcall c_comment; 214 }; 215 216 '//' [^\n]* '\n' => emit; 217 218 '%%{' { 219 write( "<section>\n" ); 220 single_line = false; 221 fgoto rlscan; 222 }; 223 224 '%%' { 225 write( "<section>\n" ); 226 single_line = true; 227 fgoto rlscan; 228 }; 229 230 default { 231 escapeXML( *ts ); 232 }; 233 234 # EOF. 235 EOF; 236 *|; 237}%% 238 239%% write data nofinal; 240 241#define BUFSIZE 2048 242 243int main() 244{ 245 std::ios::sync_with_stdio(false); 246 247 int cs, act; 248 char *ts, *te; 249 int stack[1], top; 250 251 static char inbuf[BUFSIZE]; 252 bool single_line = false; 253 int inline_depth = 0; 254 255 %% write init; 256 257 bool done = false; 258 int have = 0; 259 while ( !done ) { 260 /* How much space is in the buffer? */ 261 int space = BUFSIZE - have; 262 if ( space == 0 ) { 263 /* Buffer is full. */ 264 cerr << "TOKEN TOO BIG" << endl; 265 exit(1); 266 } 267 268 /* Read in a block. */ 269 char *p = inbuf + have; 270 cin.read( p, space ); 271 int len = cin.gcount(); 272 char *pe = p + len; 273 char *eof = 0; 274 275 /* Check for EOF. */ 276 if ( len == 0 ) { 277 eof = pe; 278 done = true; 279 } 280 281 %% write exec; 282 283 if ( cs == RagelScan_error ) { 284 /* Machine failed before finding a token. */ 285 cerr << "PARSE ERROR" << endl; 286 exit(1); 287 } 288 289 if ( ts == 0 ) 290 have = 0; 291 else { 292 /* There is a prefix to preserve, shift it over. */ 293 have = pe - ts; 294 memmove( inbuf, ts, have ); 295 te = inbuf + (te-ts); 296 ts = inbuf; 297 } 298 } 299 return 0; 300} 301