1 // COVTOOL -- test coverage analysis tool.
2 // Copyright (C) 2002, Lowell Boggs Jr.
3 // mailto:lowell.boggs@attbi.com
4
5 // This file contains free software. You can redistribute it
6 // and/or modify it under the terms of the GNU General Public License
7 // as published by the Free Software Foundation; either version 2, or
8 // (at your option) any later version.
9
10 // This source code is distributed in the hope that it will be useful,
11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 // GNU General Public License for more details.
14
15 // Write to the Free Software Foundation, 59 Temple Place - Suite 330,
16 // Boston, MA 02111-1307, USA for a copy of the GNU General Public License.
17 //
18
19
20 //
21 // This file defines a 'stream' of tokens. Basically it is the Lex
22 // mechanism for covtool.c. You create a stream with a given filename
23 // then you parse the stream into C++ tokens by calling the CovStream::
24 // parse_token(). A 'token' is pair of CovStream iterators. This
25 // only works because a CovStream is read into memory in its entirety
26 // when you open it. Thus all stream iterators point to memory which
27 // is guaranteed to still exist.
28 //
29
30 #include <covstream.h>
31 #include <ctype.h>
32 #include <char_sets.h>
33 #include <bomb.h>
34 #include <fstream>
35 #include <iostream>
36 #include <unistd.h>
37 #include <algorithm>
38 #include <string.h>
39
40 using namespace std;
41
42 #define chunk CovStream::chunk
43
44 typedef CovStream::token token;
45
46 CovStream::
CovStream(FILE * f)47 CovStream(FILE *f)
48 {
49 // read a file into a CovStream
50
51 int bytes_read;
52
53 chunks_ = new chunk;
54
55 // read the first chunk
56
57 bytes_read = fread(chunks_->data_, 1, sizeof(chunks_->data_), f);
58
59 chunks_->end_ = chunks_->data_ + bytes_read;
60
61 chunk *scan = chunks_;
62
63 // read the remaining chunks
64
65 while( bytes_read == sizeof(scan->data_) )
66 {
67 scan->next_ = new chunk;
68
69 scan->next_->offset_ = scan->offset_ + bytes_read;
70
71 scan = scan->next_;
72
73 bytes_read = fread(scan->data_, 1, sizeof(scan->data_), f);
74
75 scan->end_ = scan->data_ + bytes_read;
76
77 }
78
79 file_size_ = scan->offset_ + bytes_read;
80
81 }
82
83 CovStream::
~CovStream()84 ~CovStream()
85 {
86 chunk *scan = chunks_;
87
88 while(scan)
89 {
90 chunk *next = scan->next_;
91
92 delete scan;
93
94 scan = next;
95
96 }
97
98 }
99
100 // define a quick way to check for operator characters: +, -, *, etc
101
102 char operator_chars[256];
103
104 #define isoperator(c) operator_chars[unsigned(c)]
105
load_table()106 static int load_table()
107 {
108 char *scan = operator_chars;
109 char *end = scan + sizeof(operator_chars);
110
111 while(scan != end) *scan++ = 0;
112
113 operator_chars[unsigned('+')] = 1;
114 operator_chars[unsigned('-')] = 1;
115 operator_chars[unsigned('*')] = 1;
116 operator_chars[unsigned('/')] = 1;
117 operator_chars[unsigned('=')] = 1;
118 operator_chars[unsigned('^')] = 1;
119 operator_chars[unsigned('%')] = 1;
120 operator_chars[unsigned('|')] = 1;
121 operator_chars[unsigned('!')] = 1;
122 operator_chars[unsigned('&')] = 1;
123 operator_chars[unsigned('<')] = 1;
124 operator_chars[unsigned('>')] = 1;
125 operator_chars[unsigned(':')] = 1;
126 operator_chars[unsigned('?')] = 1;
127 operator_chars[unsigned('.')] = 1;
128
129 return 0;
130 }
131
132 int force_table_load = load_table(); // static init loads operator_chars
133
134
135 token
136 CovStream::
parse_token(iterator start,iterator end)137 parse_token(iterator start, iterator end)
138 {
139 // skip whitespace and return 1 c++ token
140
141 if(start == end)
142 return token(end,end);
143
144 while(start != end)
145 {
146 char c = *start;
147
148 if( c == '\n' || !isspace(c))
149 break;
150
151 ++start;
152 }
153
154 if(start == end)
155 return token(end,end);
156
157 iterator begin = start;
158
159 switch(*start)
160 {
161 LETTERS
162 {
163 while(start != end && ( *start == '_' || isalnum(*start) ) ) ++start;
164 }
165 break;
166
167 case '.':
168 {
169 // handle these goofy syntaxes:
170 //
171 // .1E-99
172 // a .b
173
174 ++start; // accept the .
175
176 if( !isdigit(*start) && *start != '*' && *start != '.')
177 break; // if this isn't part of number, a member function pointer reference or an elipses
178 // call it a 1 character token
179
180 }
181 // drop through
182 NUMBERS
183 {
184 // we really aren't parsing the language so we don't care about
185 // getting tokens exactly right. expressions are not parsed
186 // into binary trees, they are just treated as sequences of
187 // tokens that are terminated with a semicolon -- so we don't care
188 // about exactness.
189
190 while(start != end && ( isalnum(*start) ||
191 *start == '.' ||
192 *start == '*' ||
193 *start == '_'
194 )
195 )
196 {
197 char c = *start;
198
199 // handle 1.17549435E-38
200
201 if(c == 'e' || c == 'E') // floatingpoint expontents
202 {
203 ++start;
204
205 if(start != end)
206 {
207 c = *start;
208
209 if(c == '-' || c == '+')
210 ++start;
211
212 }
213
214 }
215 else
216 ++start;
217 }
218 }
219 break;
220
221 case '\'':
222 {
223 ++start;
224
225 if(*start == '\\')
226 ++start;
227
228 if(start == end) bomb("unexpected end of file");
229
230 ++start;
231
232 if(start == end) bomb("unexpected end of file");
233
234 if(*start != '\'')
235 bomb("missing closing single quote");
236
237 if(start == end) bomb("unexpected end of file");
238
239 ++start;
240
241 }
242 break;
243
244 case '"':
245 {
246 ++start;
247
248 while(start != end && *start != '"')
249 {
250
251 if(*start == '\\')
252 {
253 ++start;
254 }
255
256 if(start == end)
257 break;
258
259 ++start;
260 }
261
262 if(start != end)
263 ++start;
264
265 }
266 break;
267
268 // we don't care about language constructs
269 // we are only interested in recognizing functions
270 // and statements, so we don't need to poperly match
271 // all operator characters to some real token they are
272 // part of. See the comment about 'NUMBERS' above
273
274 default:
275
276 if(isoperator(*start))
277 {
278 while(start != end && isoperator(*start))
279 ++start;
280 }
281 else
282 ++start;
283 break;
284
285 }
286
287 return token(begin,start);
288
289 }
290
291
292 #define isSlash(c) ( c == '/' || c == '\\' )
293
normalizePathname(char const * path,char * dirname,char * basename)294 static void normalizePathname(char const *path,
295 char *dirname,
296 char *basename
297 )
298 //
299 // This function removes /../ and /./ fragments from filenames
300 // then gives you the dirname parth of the pathname and the
301 // nodename part. The dirname will end in /. Pathnames beginning
302 // with ~ are also supported. That is, ~/dir/path is equivalent to
303 // $HOME/dir/path and ~user/path is equivalent to $HOME/../user/path
304 //
305 //
306 {
307 char *dirStart = dirname;
308
309 if(path[1] == ':')
310 { // don't screw up the drive info
311
312 *dirname++ = path[0];
313 *dirname++ = ':';
314
315 path += 2;
316
317 *dirname++ = '/';
318
319 dirStart = dirname;
320
321 } // don't screw up the drive info
322 else
323 if(path[0] == '/' && path[1] == '/' && path[3] == '/')
324 { // handle unixifications of the dos drive info
325
326 *dirname++ = '/';
327 *dirname++ = '/';
328 *dirname++ = path[2];
329 *dirname++ = '/';
330
331 path += 3;
332
333 dirStart = dirname;
334
335 } // handle unixifications of the dos drive info
336 else if(isSlash(path[0]))
337 { // handle full unix style pathnames
338 *(dirname++) = '/';
339
340 dirStart = dirname;
341 } // handle full unix style pathnames
342 else
343 if( path[0] == '~' )
344 { // handle user id relative paths ($HOME relative actually)
345
346 char *home = getenv("HOME");
347
348 if(home)
349 {
350 ++path; // remove the ~ from the input path
351
352 while(*home)
353 { // copy home dir to output directory
354
355 *(dirname++) = *(home++);
356
357 } // copy home dir to output directory
358
359 if(isSlash(*path))
360 { // current user id relative
361
362 ++path;
363 *(dirname++) = '/';
364
365 } // current user id relative
366 else
367 { // some other user id relative path
368
369 while(dirname > dirStart &&
370 !isSlash(dirname[-1])
371 )
372 { // remove current user name from path
373 --dirname;
374 } // remove current user name from path
375
376 } // some other user id relative path
377
378 }
379
380 } // handle user id relative paths ($HOME relative actually)
381 else
382 { // no path specified, assume current directory
383
384 getcwd(dirname, 256);
385
386 dirname += strlen(dirname);
387
388 if( !isSlash(dirname[-1]) )
389 *(dirname++) = '/';
390
391 } // no path specified, assume current directory
392
393 // at this point, path pointers to one of the following strings:
394 //
395 // name base name only case
396 // /name/... pathname case
397 // name/path curdir relative path
398
399
400 // at this point, dirname ends in a slash but no nul
401
402 if( ! isSlash(*path) )
403 {
404 // basename only case
405
406 while(path[0] == '.')
407 {
408 if(isSlash(path[1]))
409 {
410 path += 2;
411 }
412 else
413 if( path[1] == '.' && isSlash(path[2]) && dirname > dirStart)
414 {
415 --dirname;
416
417 while(dirname > dirStart &&
418 !isSlash(dirname[-1])
419 )
420 {
421 --dirname;
422 }
423 path += 3;
424 }
425 else
426 {
427 break;
428 }
429
430 }
431
432 char const *scan = path;
433 while(*scan != 0 && !isSlash(*scan)) ++ scan;
434
435 if(*scan)
436 { // this is a directory relative path
437
438 while(path < scan)
439 {
440 *(dirname++) = *(path++);
441 }
442
443 *(dirname++) = '/';
444 } // this is a directory relative path
445 else
446 { // plain vanilla simple name
447
448 strcpy(basename, path);
449 *dirname = 0;
450 return;
451 } // plain vanilla simple name
452 }
453
454 while(*path)
455 { // parse directory nodes in the path
456
457 // assumption: isSlash(*path) is true right here
458
459 if(path[1] == '.' && isSlash(path[2]))
460 { // ignore /./
461
462 path += 2;
463
464 } // ignore /./
465 else
466 if(path[1] == '.' && path[2] == '.' && isSlash(path[3]))
467 { // /../ means go backup up to parent
468
469 path += 3;
470
471 if(dirname > dirStart)
472 {
473 --dirname;
474
475 while(dirname > dirStart &&
476 !isSlash(dirname[-1])
477 )
478 {
479 --dirname;
480 }
481 }
482
483
484 } // /../ means go backup up to parent
485
486 // assumption: isSlash(*path) is true right here
487 // assumption: isSlash(dirname[0]) is true here
488
489 char const *scan;
490
491 for(scan = path+1; *scan && !isSlash(*scan); ++scan);
492
493 if(*scan == 0)
494 { // this is last node -- quit now
495
496 if(path[1] == '.' && path[2] == '.' && path[3] == 0)
497 { // handle trailing /..
498
499 if(dirname > dirStart)
500 {
501 --dirname;
502
503 while(dirname > dirStart &&
504 !isSlash(dirname[-1])
505 )
506 {
507 --dirname;
508 }
509 }
510
511 } // handle trailing /..
512
513 *dirname = 0;
514
515 strcpy(basename, path+1);
516
517 return;
518
519 } // this is last node -- quit now
520
521 ++path;
522
523 while(path < scan) *(dirname++) = *(path++);
524
525 *(dirname++) = '/';
526
527 } // parse directory nodes in the path
528 }
529
530
531 string
532 CovStream::
fullname(string const & s)533 fullname(string const &s)
534 {
535 char input_name[1025];
536
537 #ifdef DEBUGGING
538
539 if(s[0] != '"')
540 {
541 bomb("Whoa: missing \"'s in filename passed to CovStream::fullname");
542 }
543
544 #endif
545
546 if(s[0] != '"')
547 return s; // big no no, improper formatting
548
549 string::const_iterator first = s.begin();
550 string::const_iterator last = s.end();
551
552 *copy(++first, --last, input_name) = 0;
553
554 char dirname[1025];
555
556 char basename[1025];
557
558 normalizePathname(input_name, dirname, basename);
559
560 string rv = string("\"");
561
562 rv += dirname;
563 rv += basename;
564 rv += "\"";
565
566 return rv;
567
568 }
569