1// re2c $INPUT -o $OUTPUT -b 2/* re2c lesson 002_strip_comments, strip_003, (c) M. Boerger 2006 - 2007 */ 3/*!ignore:re2c 4 5- more complexity 6 . Right now we strip out trailing white space and new lines after a comment 7 block. This can be a problem when the comment block was not preceeded by 8 a new line. 9 . The solution is to use trailing contexts. 10 11- trailing contexts 12 . Re2c allows to check for a portion of input and only recognize it when it 13 is followed by another portion. This is called a trailing context. 14 . The trailing context is not part of the identified input. That means that 15 it follows exactly at the cursor. A consequence is that the scanner has 16 already read more input and on the next run you need to restore begining 17 of input, in our case s.tok, from the cursor, here s.cur, rather then 18 restoring to the beginning of the buffer. This way the scanner can reuse 19 the portion it has already read. 20 . The position of the trailing context is stored in YYCTXMARKER for which 21 a pointer variable needs to be provided. 22 . As with YYMARKER the corrsponding variable needs to be corrected if we 23 shift in some buffer. 24 . Still this is not all we need to solve the problem. What is left is that 25 the information whether we detected a trailing context was detected has to 26 be stored somewhere. This is done by the new variable nlcomment. 27 28- formatting 29 . Until now we only used single line expression code and we always had the 30 opening { on the same line as the rule itself. If we have multiline rule 31 code and care for formatting we can no longer rely on re2c. Now we have 32 to indent the rule code ourself. Also we need to take care of the opening 33 {. If we keep it on the same line as the rule then re2c will indent it 34 correctly and the emitted #line informations will be correct. If we place 35 it on the next line then the #line directive will also point to that line 36 and not to the rule. 37*/ 38 39#include <stdlib.h> 40#include <stdio.h> 41#include <string.h> 42 43/*!max:re2c */ 44#define BSIZE 128 45 46#if BSIZE < YYMAXFILL 47# error BSIZE must be greater YYMAXFILL 48#endif 49 50#define YYCTYPE unsigned char 51#define YYCURSOR s.cur 52#define YYLIMIT s.lim 53#define YYMARKER s.mrk 54#define YYCTXMARKER s.ctx 55#define YYFILL(n) { if ((res = fill(&s, n)) >= 0) break; } 56 57typedef struct Scanner 58{ 59 FILE *fp; 60 unsigned char *cur, *tok, *lim, *eof, *ctx, *mrk; 61 unsigned char buffer[BSIZE]; 62} Scanner; 63 64int fill(Scanner *s, int len) 65{ 66 if (!len) 67 { 68 s->cur = s->tok = s->lim = s->mrk = s->buffer; 69 s->eof = 0; 70 } 71 if (!s->eof) 72 { 73 int got, cnt = s->tok - s->buffer; 74 75 if (cnt > 0) 76 { 77 memcpy(s->buffer, s->tok, s->lim - s->tok); 78 s->tok -= cnt; 79 s->cur -= cnt; 80 s->lim -= cnt; 81 s->mrk -= cnt; 82 s->ctx -= cnt; 83 } 84 cnt = BSIZE - cnt; 85 if ((got = fread(s->lim, 1, cnt, s->fp)) != cnt) 86 { 87 s->eof = &s->lim[got]; 88 } 89 s->lim += got; 90 } 91 else if (s->cur + len > s->eof) 92 { 93 return 0; /* not enough input data */ 94 } 95 return -1; 96} 97 98void echo(Scanner *s) 99{ 100 fwrite(s->tok, 1, s->cur - s->tok, stdout); 101} 102 103int scan(FILE *fp) 104{ 105 int res = 0; 106 int nlcomment = 0; 107 Scanner s; 108 109 if (!fp) 110 { 111 return 1; /* no file was opened */ 112 } 113 114 s.fp = fp; 115 116 fill(&s, 0); 117 118 for(;;) 119 { 120 s.tok = s.cur; 121/*!re2c 122 re2c:indent:top = 2; 123 124 NL = "\r"? "\n" ; 125 WS = [\r\n\t ] ; 126 ANY = [^] ; 127 128 "/" "/" { goto cppcomment; } 129 NL / "/""*" { echo(&s); nlcomment = 1; continue; } 130 "/" "*" { goto comment; } 131 ANY { fputc(*s.tok, stdout); continue; } 132*/ 133comment: 134 s.tok = s.cur; 135/*!re2c 136 "*" "/" { goto commentws; } 137 ANY { goto comment; } 138*/ 139commentws: 140 s.tok = s.cur; 141/*!re2c 142 NL? "/" "*" { goto comment; } 143 NL { 144 if (!nlcomment) 145 { 146 echo(&s); 147 } 148 nlcomment = 0; 149 continue; 150 } 151 WS { goto commentws; } 152 ANY { echo(&s); nlcomment = 0; continue; } 153*/ 154cppcomment: 155 s.tok = s.cur; 156/*!re2c 157 NL { echo(&s); continue; } 158 ANY { goto cppcomment; } 159*/ 160 } 161 162 if (fp != stdin) 163 { 164 fclose(fp); /* close only if not stdin */ 165 } 166 return res; /* return result */ 167} 168 169int main(int argc, char **argv) 170{ 171 if (argc > 1) 172 { 173 return scan(!strcmp(argv[1], "-") ? stdin : fopen(argv[1], "r")); 174 } 175 else 176 { 177 fprintf(stderr, "%s <expr>\n", argv[0]); 178 return 1; 179 } 180} 181