1// re2c $INPUT -o $OUTPUT -s 2/* re2c lesson 002_strip_comments, strip_002, (c) M. Boerger 2006 - 2007 */ 3/*!ignore:re2c 4 5- complexity 6 . When a comment is preceeded by a new line and followed by whitespace and a 7 new line then we can drop the trailing whitespace and new line. 8 . Additional to what we strip out already what about two consequtive comment 9 blocks? When two comments are only separated by whitespace we want to drop 10 both. In other words when detecting the end of a comment block we need to 11 check whether it is followed by only whitespace and the a new comment in 12 which case we continure ignoring the input. If it is followed only by white 13 space and a new line we strip out the new white space and new line. In any 14 other case we start outputting all that follows. 15 But we cannot simply use the following two rules: 16 "*" "/" WS* "/" "*" { continue; } 17 "*" "/" WS* NL { continue; } 18 The main problem is that WS* can get bigger then our buffer, so we need a 19 new scanner. 20 . Meanwhile our scanner gets a bit more complex and we have to add two more 21 things. First the scanner code now uses a YYMARKER to store backtracking 22 information. 23 24- backtracking information 25 . When the scanner has two rules that can have the same beginning but a 26 different ending then it needs to store the position that identifies the 27 common part. This is called backtracking. As mentioned above re2c expects 28 you to provide compiler define YYMARKER and a pointer variable. 29 . When shifting buffer contents as done in our fill function the marker needs 30 to be corrected, too. 31 32*/ 33 34#include <stdlib.h> 35#include <stdio.h> 36#include <string.h> 37 38/*!max:re2c */ 39#define BSIZE 128 40 41#if BSIZE < YYMAXFILL 42# error BSIZE must be greater YYMAXFILL 43#endif 44 45#define YYCTYPE unsigned char 46#define YYCURSOR s.cur 47#define YYLIMIT s.lim 48#define YYMARKER s.mrk 49#define YYFILL(n) { if ((res = fill(&s, n)) >= 0) break; } 50 51typedef struct Scanner 52{ 53 FILE *fp; 54 unsigned char *cur, *tok, *lim, *eof, *mrk; 55 unsigned char buffer[BSIZE]; 56} Scanner; 57 58int fill(Scanner *s, int len) 59{ 60 if (!len) 61 { 62 s->cur = s->tok = s->lim = s->mrk = s->buffer; 63 s->eof = 0; 64 } 65 if (!s->eof) 66 { 67 int got, cnt = s->tok - s->buffer; 68 69 if (cnt > 0) 70 { 71 memcpy(s->buffer, s->tok, s->lim - s->tok); 72 s->tok -= cnt; 73 s->cur -= cnt; 74 s->lim -= cnt; 75 s->mrk -= cnt; 76 } 77 cnt = BSIZE - cnt; 78 if ((got = fread(s->lim, 1, cnt, s->fp)) != cnt) 79 { 80 s->eof = &s->lim[got]; 81 } 82 s->lim += got; 83 } 84 else if (s->cur + len > s->eof) 85 { 86 return 0; /* not enough input data */ 87 } 88 return -1; 89} 90 91void echo(Scanner *s) 92{ 93 fwrite(s->tok, 1, s->cur - s->tok, stdout); 94} 95 96int scan(FILE *fp) 97{ 98 int res = 0; 99 Scanner s; 100 101 if (!fp) 102 { 103 return 1; /* no file was opened */ 104 } 105 106 s.fp = fp; 107 108 fill(&s, 0); 109 110 for(;;) 111 { 112 s.tok = s.cur; 113/*!re2c 114 re2c:indent:top = 2; 115 116 NL = "\r"? "\n" ; 117 WS = [\r\n\t ] ; 118 ANY = [^] ; 119 120 "/" "/" { goto cppcomment; } 121 "/" "*" { goto comment; } 122 ANY { fputc(*s.tok, stdout); continue; } 123*/ 124comment: 125 s.tok = s.cur; 126/*!re2c 127 "*" "/" { goto commentws; } 128 ANY { goto comment; } 129*/ 130commentws: 131 s.tok = s.cur; 132/*!re2c 133 NL { echo(&s); continue; } 134 WS { goto commentws; } 135 ANY { echo(&s); continue; } 136*/ 137cppcomment: 138 s.tok = s.cur; 139/*!re2c 140 NL { echo(&s); continue; } 141 ANY { goto cppcomment; } 142*/ 143 } 144 145 if (fp != stdin) 146 { 147 fclose(fp); /* close only if not stdin */ 148 } 149 return res; /* return result */ 150} 151 152int main(int argc, char **argv) 153{ 154 if (argc > 1) 155 { 156 return scan(!strcmp(argv[1], "-") ? stdin : fopen(argv[1], "r")); 157 } 158 else 159 { 160 fprintf(stderr, "%s <expr>\n", argv[0]); 161 return 1; 162 } 163} 164