1 // COVTOOL -- test coverage analysis tool.
2 // Copyright (C) 2002, Lowell Boggs Jr.
3 // mailto:lowell.boggs@attbi.com
4 
5 // This file contains free software.  You can redistribute it
6 // and/or modify it under the terms of the GNU General Public License
7 // as published by the Free Software Foundation; either version 2, or
8 // (at your option) any later version.
9 
10 // This source code is distributed in the hope that it will be useful,
11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 // GNU General Public License for more details.
14 
15 // Write to the Free Software Foundation, 59 Temple Place - Suite 330,
16 // Boston, MA  02111-1307, USA for a copy of the GNU General Public License.
17 //
18 
19 
20 //
21 // This file defines a 'stream' of tokens.  Basically it is the Lex
22 // mechanism for covtool.c.  You create a stream with a given filename
23 // then you parse the stream into C++ tokens by calling the CovStream::
24 // parse_token().  A 'token' is pair of CovStream iterators.  This
25 // only works because a CovStream is read into memory in its entirety
26 // when you open it.  Thus all stream iterators point to memory which
27 // is guaranteed to still exist.
28 //
29 
30 #include <covstream.h>
31 #include <ctype.h>
32 #include <char_sets.h>
33 #include <bomb.h>
34 #include <fstream>
35 #include <iostream>
36 #include <unistd.h>
37 #include <algorithm>
38 #include <string.h>
39 
40 using namespace std;
41 
42 #define chunk                CovStream::chunk
43 
44 typedef CovStream::token     token;
45 
46 CovStream::
CovStream(FILE * f)47 CovStream(FILE *f)
48 {
49   // read a file into a CovStream
50 
51   int  bytes_read;
52 
53   chunks_ = new chunk;
54 
55   // read the first chunk
56 
57   bytes_read = fread(chunks_->data_, 1, sizeof(chunks_->data_), f);
58 
59   chunks_->end_ = chunks_->data_ + bytes_read;
60 
61   chunk *scan = chunks_;
62 
63   // read the remaining chunks
64 
65   while( bytes_read == sizeof(scan->data_) )
66   {
67     scan->next_ = new chunk;
68 
69     scan->next_->offset_ = scan->offset_ + bytes_read;
70 
71     scan = scan->next_;
72 
73     bytes_read = fread(scan->data_, 1, sizeof(scan->data_), f);
74 
75     scan->end_ = scan->data_ + bytes_read;
76 
77   }
78 
79   file_size_ = scan->offset_ + bytes_read;
80 
81 }
82 
83 CovStream::
~CovStream()84 ~CovStream()
85 {
86   chunk *scan = chunks_;
87 
88   while(scan)
89   {
90     chunk *next = scan->next_;
91 
92     delete scan;
93 
94     scan = next;
95 
96   }
97 
98 }
99 
100 // define a quick way to check for operator characters:  +, -, *, etc
101 
102 char operator_chars[256];
103 
104 #define isoperator(c) operator_chars[unsigned(c)]
105 
load_table()106 static int load_table()
107 {
108   char *scan = operator_chars;
109   char *end  = scan + sizeof(operator_chars);
110 
111   while(scan != end) *scan++ = 0;
112 
113   operator_chars[unsigned('+')] = 1;
114   operator_chars[unsigned('-')] = 1;
115   operator_chars[unsigned('*')] = 1;
116   operator_chars[unsigned('/')] = 1;
117   operator_chars[unsigned('=')] = 1;
118   operator_chars[unsigned('^')] = 1;
119   operator_chars[unsigned('%')] = 1;
120   operator_chars[unsigned('|')] = 1;
121   operator_chars[unsigned('!')] = 1;
122   operator_chars[unsigned('&')] = 1;
123   operator_chars[unsigned('<')] = 1;
124   operator_chars[unsigned('>')] = 1;
125   operator_chars[unsigned(':')] = 1;
126   operator_chars[unsigned('?')] = 1;
127   operator_chars[unsigned('.')] = 1;
128 
129   return 0;
130 }
131 
132 int force_table_load = load_table();  // static init loads operator_chars
133 
134 
135 token
136 CovStream::
parse_token(iterator start,iterator end)137 parse_token(iterator start, iterator end)
138 {
139   // skip whitespace and return 1 c++ token
140 
141   if(start == end)
142     return token(end,end);
143 
144   while(start != end)
145   {
146     char c = *start;
147 
148     if( c == '\n' || !isspace(c))
149       break;
150 
151     ++start;
152   }
153 
154   if(start == end)
155     return token(end,end);
156 
157   iterator begin = start;
158 
159   switch(*start)
160   {
161     LETTERS
162       {
163 	while(start != end && ( *start == '_' || isalnum(*start) ) ) ++start;
164       }
165       break;
166 
167     case '.':
168       {
169 	// handle these goofy syntaxes:
170 	//
171 	//   .1E-99
172 	//   a .b
173 
174 	++start;  // accept the .
175 
176 	if( !isdigit(*start) && *start != '*' && *start != '.')
177 	  break;  // if this isn't part of number, a member function pointer reference or an elipses
178 		  // call it a 1 character token
179 
180       }
181       // drop through
182     NUMBERS
183       {
184 	// we really aren't parsing the language so we don't care about
185 	// getting tokens exactly right.  expressions are not parsed
186 	// into binary trees, they are just treated as sequences of
187 	// tokens that are terminated with a semicolon -- so we don't care
188 	// about exactness.
189 
190 	while(start != end && ( isalnum(*start) ||
191 				*start == '.'   ||
192 				*start == '*'   ||
193 				*start == '_'
194 			      )
195 	     )
196 	{
197 	  char c = *start;
198 
199 	  // handle 1.17549435E-38
200 
201 	  if(c == 'e' || c == 'E')   // floatingpoint expontents
202 	  {
203 	    ++start;
204 
205 	    if(start != end)
206 	    {
207 	       c = *start;
208 
209 	       if(c == '-' || c == '+')
210 		 ++start;
211 
212 	    }
213 
214 	  }
215 	  else
216 	    ++start;
217 	}
218       }
219       break;
220 
221     case '\'':
222       {
223 	++start;
224 
225 	if(*start == '\\')
226 	  ++start;
227 
228 	if(start == end) bomb("unexpected end of file");
229 
230 	++start;
231 
232 	if(start == end) bomb("unexpected end of file");
233 
234 	if(*start != '\'')
235 	  bomb("missing closing single quote");
236 
237 	if(start == end) bomb("unexpected end of file");
238 
239 	++start;
240 
241       }
242       break;
243 
244     case '"':
245       {
246 	++start;
247 
248 	while(start != end && *start != '"')
249 	{
250 
251 	  if(*start == '\\')
252 	  {
253 	    ++start;
254 	  }
255 
256 	  if(start == end)
257 	    break;
258 
259 	  ++start;
260 	}
261 
262 	if(start != end)
263 	  ++start;
264 
265       }
266       break;
267 
268     // we don't care about language constructs
269     // we are only interested in recognizing functions
270     // and statements, so we don't need to poperly match
271     // all operator characters to some real token they are
272     // part of.  See the comment about 'NUMBERS' above
273 
274     default:
275 
276       if(isoperator(*start))
277       {
278 	while(start != end && isoperator(*start))
279 	  ++start;
280       }
281       else
282 	++start;
283       break;
284 
285   }
286 
287   return token(begin,start);
288 
289 }
290 
291 
292 #define isSlash(c)  ( c == '/' || c == '\\' )
293 
normalizePathname(char const * path,char * dirname,char * basename)294 static void normalizePathname(char const *path,
295 			      char *dirname,
296 			      char *basename
297 			     )
298     //
299     //  This function removes /../ and /./ fragments from filenames
300     //  then gives you the dirname parth of the pathname and the
301     //  nodename part.  The dirname will end in /.  Pathnames beginning
302     //  with ~ are also supported.  That is, ~/dir/path is equivalent to
303     //  $HOME/dir/path and ~user/path is equivalent to $HOME/../user/path
304     //
305     //
306 {
307     char *dirStart = dirname;
308 
309     if(path[1] == ':')
310 	{ // don't screw up the drive info
311 
312 	    *dirname++ = path[0];
313 	    *dirname++ = ':';
314 
315 	    path += 2;
316 
317 	    *dirname++ = '/';
318 
319 	    dirStart = dirname;
320 
321 	} // don't screw up the drive info
322     else
323     if(path[0] == '/' && path[1] == '/' && path[3] == '/')
324 	{ // handle unixifications of the dos drive info
325 
326 	    *dirname++ = '/';
327 	    *dirname++ = '/';
328 	    *dirname++ = path[2];
329 	    *dirname++ = '/';
330 
331 	    path += 3;
332 
333 	    dirStart = dirname;
334 
335 	} // handle unixifications of the dos drive info
336     else if(isSlash(path[0]))
337 	{ // handle full unix style pathnames
338 	    *(dirname++) = '/';
339 
340 	    dirStart = dirname;
341 	} // handle full unix style pathnames
342     else
343     if( path[0] == '~' )
344 	{ // handle user id relative paths ($HOME relative actually)
345 
346 	    char *home = getenv("HOME");
347 
348 	    if(home)
349 		{
350 		    ++path; // remove the ~ from the input path
351 
352 		    while(*home)
353 			{ // copy home dir to output directory
354 
355 			    *(dirname++) = *(home++);
356 
357 			} // copy home dir to output directory
358 
359 		    if(isSlash(*path))
360 			{ // current user id relative
361 
362 			    ++path;
363 			    *(dirname++) = '/';
364 
365 			} // current user id relative
366 		    else
367 			{ // some other user id relative path
368 
369 			    while(dirname > dirStart &&
370 				  !isSlash(dirname[-1])
371 				 )
372 				{ // remove current user name from path
373 				    --dirname;
374 				} // remove current user name from path
375 
376 			} // some other user id relative path
377 
378 		}
379 
380 	} // handle user id relative paths ($HOME relative actually)
381     else
382 	{ // no path specified, assume current directory
383 
384 	    getcwd(dirname, 256);
385 
386 	    dirname += strlen(dirname);
387 
388 	    if( !isSlash(dirname[-1]) )
389 		*(dirname++) = '/';
390 
391 	} // no path specified, assume current directory
392 
393     // at this point, path pointers to one of the following strings:
394     //
395     //      name        base name only case
396     //      /name/...   pathname case
397     //      name/path   curdir relative path
398 
399 
400     // at this point, dirname ends in a slash but no nul
401 
402     if( ! isSlash(*path) )
403 	{
404 	    // basename only case
405 
406 	    while(path[0] == '.')
407 		{
408 		    if(isSlash(path[1]))
409 			{
410 			    path += 2;
411 			}
412 		    else
413 		    if( path[1] == '.' && isSlash(path[2]) && dirname > dirStart)
414 			{
415 			    --dirname;
416 
417 			    while(dirname > dirStart &&
418 				  !isSlash(dirname[-1])
419 				 )
420 				    {
421 					--dirname;
422 				    }
423 			    path += 3;
424 			}
425 		    else
426 			{
427 			    break;
428 			}
429 
430 		}
431 
432 	    char const *scan = path;
433 	    while(*scan != 0 && !isSlash(*scan)) ++ scan;
434 
435 	    if(*scan)
436 		{ // this is a directory relative path
437 
438 		    while(path < scan)
439 			{
440 			    *(dirname++) = *(path++);
441 			}
442 
443 		    *(dirname++) = '/';
444 		} // this is a directory relative path
445 	    else
446 		{ // plain vanilla simple name
447 
448 		    strcpy(basename, path);
449 		    *dirname = 0;
450 		    return;
451 		} // plain vanilla simple name
452 	}
453 
454     while(*path)
455 	{ // parse directory nodes in the path
456 
457 	    // assumption:  isSlash(*path) is true right here
458 
459 	    if(path[1] == '.' && isSlash(path[2]))
460 		{ // ignore /./
461 
462 		    path += 2;
463 
464 		} // ignore /./
465 	    else
466 	    if(path[1] == '.' && path[2] == '.' && isSlash(path[3]))
467 		{ // /../ means go backup up to parent
468 
469 		    path += 3;
470 
471 		    if(dirname > dirStart)
472 			{
473 			    --dirname;
474 
475 			    while(dirname > dirStart &&
476 				  !isSlash(dirname[-1])
477 				 )
478 				    {
479 					--dirname;
480 				    }
481 			}
482 
483 
484 		} // /../ means go backup up to parent
485 
486 	    // assumption:  isSlash(*path) is true right here
487 	    // assumption:  isSlash(dirname[0]) is true here
488 
489 	    char const *scan;
490 
491 	    for(scan = path+1; *scan && !isSlash(*scan); ++scan);
492 
493 	    if(*scan == 0)
494 		{ // this is last node -- quit now
495 
496 		    if(path[1] == '.' && path[2] == '.' && path[3] == 0)
497 			{ // handle trailing /..
498 
499 			    if(dirname > dirStart)
500 				{
501 				    --dirname;
502 
503 				    while(dirname > dirStart &&
504 					  !isSlash(dirname[-1])
505 					 )
506 					    {
507 						--dirname;
508 					    }
509 				}
510 
511 			} // handle trailing /..
512 
513 		    *dirname = 0;
514 
515 		    strcpy(basename, path+1);
516 
517 		    return;
518 
519 		} // this is last node -- quit now
520 
521 	    ++path;
522 
523 	    while(path < scan) *(dirname++) = *(path++);
524 
525 	    *(dirname++) = '/';
526 
527 	} // parse directory nodes in the path
528 }
529 
530 
531 string
532 CovStream::
fullname(string const & s)533 fullname(string const &s)
534 {
535    char input_name[1025];
536 
537    #ifdef DEBUGGING
538 
539      if(s[0] != '"')
540      {
541        bomb("Whoa:  missing \"'s in filename passed to CovStream::fullname");
542      }
543 
544    #endif
545 
546    if(s[0] != '"')
547      return s;  // big no no, improper formatting
548 
549    string::const_iterator first = s.begin();
550    string::const_iterator last = s.end();
551 
552    *copy(++first, --last, input_name) = 0;
553 
554    char dirname[1025];
555 
556    char basename[1025];
557 
558    normalizePathname(input_name, dirname, basename);
559 
560    string rv = string("\"");
561 
562    rv += dirname;
563    rv += basename;
564    rv += "\"";
565 
566    return rv;
567 
568 }
569