1 #include "tfilter.h"
2 
3 #include "maptok.h"
4 #include <ctype.h>
5 #include <string.h>
6 
7 /*
8  * This file contains routines to map names from string to another.
9  * The mechanism is fairly general, and is implemented as an OutStream,
10  * allowing it to be inserted almost anywhere.
11  *
12  * The most convenient way to generating the mapping table is by
13  * reading a file; for this we use the command file-reading object.
14  * Should probably remove Outstream version, or unify code
15  */
16 
17 /* Local info for replacement data */
18 typedef struct {
19     char *repname;
20     char *url;
21     } MapData;
22 
Setup(int in_maxlen)23 void OutStreamMap::Setup( int in_maxlen )
24 {
25     int i;
26 
27     squote	  = 0;
28     equote	  = 0;
29     maxlen	  = in_maxlen;
30     activetok	  = new char[maxlen];
31     activetok[0]  = 0;
32     lctok         = new char[maxlen];  // Used for a lower case version of activetok
33     curlen	  = 0;
34     position	  = activetok;
35     maptable	  = new SrList();
36     print_matched = 0;  // For debugging
37     ignore_case   = 0;
38 
39     for (i=0; i<255; i++) {
40 	breaktable[i] = BREAK_OTHER;
41 	if (isascii(i)) {
42 	    if      (isalpha(i)) breaktable[i] = BREAK_ALPHA;
43 	    else if (isdigit(i)) breaktable[i] = BREAK_ALPHA;
44 	    }
45 	}
46     breaktable['_'] = BREAK_ALPHA;
47 }
48 
OutStreamMap(OutStream * outs,int in_maxlen)49 OutStreamMap::OutStreamMap( OutStream *outs, int in_maxlen )
50 {
51     Setup( in_maxlen );
52     next      = outs;
53 }
54 
OutStreamMap(OutStream * outs)55 OutStreamMap::OutStreamMap( OutStream *outs )
56 {
57     Setup( 1024 );
58     next = outs;
59 }
OutStreamMap(int in_maxlen)60 OutStreamMap::OutStreamMap( int in_maxlen )
61 {
62     Setup( in_maxlen );
63     next = 0;
64 }
OutStreamMap(OutStream * outs,int in_maxlen,int pflag)65 OutStreamMap::OutStreamMap( OutStream *outs, int in_maxlen, int pflag )
66 {
67   if (in_maxlen <= 0) in_maxlen = 1024;
68   Setup( in_maxlen );
69   next = outs;
70   print_matched = pflag;
71 }
FlushTokBuf(void)72 void OutStreamMap::FlushTokBuf( void )
73 {
74   if (curlen) {
75     SrEntry *entry;
76     // Add an option to only compare lower case version?  In that case,
77     // also insert entries only in lower case.
78 
79     strcpy(lctok,activetok);
80     if (ignore_case) {
81 	toLower(lctok);
82     }
83     if (maptable->Lookup( lctok, &entry ) == 0) {
84       PutLink( activetok, entry );
85     }
86     else {
87       next->PutToken( 0, activetok );
88     }
89     curlen = 0;
90   }
91 }
92 
toLower(char * s)93 void OutStreamMap::toLower(char *s)
94 {
95     char c;
96     while (*s) {
97 	c = *s;
98 	if (isascii(c) && isupper(c)) *s = tolower(c);
99 	s++;
100     }
101 }
102 
103 /*
104  * This routine works by putting a token into the internal buffer.
105  * Once it knows that it can (found a delimiter), it tries to look it
106  * up and replace it.
107  */
PutToken(int nsp,const char * token)108 int OutStreamMap::PutToken( int nsp, const char *token )
109 {
110     SrEntry *entry;
111 
112     /*
113        First, tokenize the output and check each token.  Special case:
114        if we reach the end of the string without a "break" character, we must
115        delay processing until we get the break character, since the token may
116        be incomplete (for example, MPI_Send may be delivered to this
117        routine as MPI, then _, then Send.
118      */
119 
120     /* Spaces are a delimiter.  Try to flush activetok, and rerun PutToken */
121     if (nsp) {
122       FlushTokBuf();
123       next->PutToken( nsp, (char *)0 );
124       return PutToken( 0, token );
125     }
126 
127     /* Copy token to activetok while the breaktable entries are the same.
128        Careful of the BREAK_OTHER and BREAK_SPACE case.
129        We ONLY lookup tokens with break values of 2 (BREAK_ALPHA) */
130     if (!token) return 0;
131     while (*token) {
132 	/* This really needs to tokenize the string */
133 	if (breaktable[(int)*token] == BREAK_ALPHA) {
134 	    /* The character is a "alphanum" */
135 	    while (*token && breaktable[(int)*token] == BREAK_ALPHA) {
136 		activetok[curlen++] = *token++;
137 		if (curlen >= maxlen) {
138 		    activetok[maxlen-1] = 0;
139 		    fprintf( stderr, "Token too long (%s)=n", activetok );
140 		    return 1;
141 		    }
142 		}
143 	    activetok[curlen] = 0;
144 	    /* If we ended at a alnum-like token, don't output yet. */
145 	    if (*token == 0) break;
146 	    }
147     else {
148 	/* If there is already something in the token buffer, drain it first
149 	 */
150 	if (curlen == 0) {
151 	    /* Skip over the non-alphanum characters */
152 	    while (*token && breaktable[(int)*token] != BREAK_ALPHA) {
153 		activetok[curlen++] = *token++;
154 		if (curlen >= maxlen) {
155 		    activetok[maxlen-1] = 0;
156 		    fprintf( stderr, "Token too long (%s)=n", activetok );
157 		    return 1;
158 		}
159 	    }
160 	}
161 	activetok[curlen] = 0;
162     }
163 	strcpy(lctok,activetok);
164 	if (ignore_case) {
165 	    toLower(lctok);
166 	}
167     if (maptable->Lookup( lctok, &entry ) == 0) {
168       // Allow debugging output of all matched tokens
169       // Eventually, we should instead have a "token stream" operation;
170       // then this filter is simply built on top of that token stream
171       if (print_matched) {
172 	printf( "%s\n", activetok );
173       }
174       PutLink( activetok, entry );
175 	}
176     else {
177 	next->PutToken( 0, activetok );
178 	}
179     /* We've flushed the token */
180     curlen = 0;
181     }
182     return 0;
183 }
184 
PutQuoted(int nsp,const char * token)185 int OutStreamMap::PutQuoted( int nsp, const char *token )
186 {
187   //  if (debug_flag && token && *token)
188   //    printf( "OutStreamMap::PutQuoted %s\n", token );
189   FlushTokBuf();
190   return next->PutQuoted( nsp, token );
191 }
192 
PutChar(const char c)193 int OutStreamMap::PutChar( const char c )
194 {
195     char cc[2];
196     cc[0] = c;
197     cc[1] = 0;
198     return PutToken( 0, cc );
199 }
200 
201 #define MAX_NAME_LEN 128
202 #define MAX_REPNAME_LEN 128
203 #define MAX_URL_LEN 512
204 /*
205    Read a map file from the instream
206  */
ReadMap(InStream * ins,int ignore_case,int ignoreRepl)207 int OutStreamMap::ReadMap( InStream *ins, int ignore_case, int ignoreRepl )
208 {
209     char    *name, *reptext, *url, *p, *lcname;
210     char ch, sepchar;
211     SrEntry *entry;
212     MapData *info;
213     int     ln;
214 
215     name    = new char[MAX_NAME_LEN];
216     lcname  = new char[MAX_NAME_LEN];
217     reptext = new char[MAX_REPNAME_LEN];
218     url     = new char[MAX_URL_LEN];
219 
220     while (!ins->GetChar( &ch )) {
221 	/* Skip comments */
222 	if (ch == '#') {
223 	    ins->SkipLine();
224 	    continue;
225 	    }
226 	/* Read file line.  One format is
227 	   tagtype:%cname%c%crepname%c%c%c%cskip%cURL
228 
229 	   where %c is any character, but the same character must be used
230 	   in all places (on a line-bu-line basis).
231 
232 	   repname may be empty (null).
233          */
234 	while (!ins->GetChar( &ch ) && ch != ':' && ch != '\n') ;
235 	ins->GetChar( &sepchar );
236 	p = name; ln = 0;
237 	while (!ins->GetChar( &ch ) && ch != sepchar && ++ln < MAX_NAME_LEN)
238 	  *p++ = ch;
239 	if (ch != sepchar) {
240 	  fprintf( stderr, "Name too long in map\n" );
241 	  return 1;
242 	}
243 	ins->GetChar( &ch );
244 	p = reptext; ln = 0;
245 	while (!ins->GetChar( &ch ) && ch != sepchar &&
246 	       ++ln < MAX_REPNAME_LEN) *p++ = ch;
247 	if (ch != sepchar) {
248 	  fprintf( stderr, "Replacement name too long in map\n" );
249 	  return 1;
250 	}
251 	while (!ins->GetChar( &ch ) && ch == sepchar) ;
252 	while (!ins->GetChar( &ch ) && ch != sepchar) ;
253 	p = url; ln = 0;
254 	while (!ins->GetChar( &ch ) && ch != '\n' && ++ln < MAX_URL_LEN)
255 	  *p++ = ch;
256 	if (ch != '\n') {
257 	  fprintf( stderr, "URL too long in map\n" );
258 	  return 1;
259 	}
260 	/* Install this name */
261 	strcpy(lcname,name);
262 	if (ignore_case) toLower(lcname);
263 	//if (debug) printf( "Inserting :%s:\n", lcname );
264 	maptable->Insert( lcname, &entry );
265 	info          = new MapData;
266 	if (ignoreRepl) {
267 	    // Ignore the replacement - just null out the string.
268 	    // if (debug) printf( "Ignoring the replacement text\n" );
269 	    reptext[0] = 0;
270 	}
271 	info->repname = new char[strlen(reptext)+1];
272 	info->url     = new char[strlen(url)+1];
273 	strcpy( info->repname, reptext );
274 	strcpy( info->url, url );
275 	entry->extra_data = (void *)info;
276 	}
277 
278     this->ignore_case = ignore_case;
279     delete[] name;
280     delete[] lcname;
281     delete[] reptext;
282     delete[] url;
283 
284     return 0;
285 }
ReadMap(InStream * ins,int ignore_case)286 int OutStreamMap::ReadMap( InStream *ins, int ignore_case )
287 {
288     return ReadMap( ins, ignore_case, 0 );
289 }
ReadMap(InStream * ins)290 int OutStreamMap::ReadMap( InStream *ins )
291 {
292     return ReadMap( ins, 0, 0 );
293 }
294 
295 // This should really use a PutOp( "link", url, repname ) call to
296 // the appropriate textout handler.
PutLink(const char * name,SrEntry * entry)297 int OutStreamMap::PutLink( const char *name, SrEntry *entry )
298 {
299     MapData *info = (MapData *)entry->extra_data;
300     next->PutToken( 0, "<a href=\"" );
301     next->PutToken( 0, info->url );
302     next->PutToken( 0, "\">" );
303     // If there is no replacement name, use the original name
304     if (info->repname && info->repname[0] != 0) {
305 	//if (debug) printf( "OutStreamMap: repname nonnull = :%s:\n", info->repname );
306 	next->PutToken( 0, info->repname );
307     }
308     else {
309 	//if (debug) printf( "repname null, using name = :%s:\n", name );
310 	next->PutToken( 0, name );
311     }
312     next->PutToken( 0, "</a>" );
313     return 0;
314 }
~OutStreamMap()315 OutStreamMap::~OutStreamMap()
316 {
317     delete activetok;
318     delete maptable;
319     if (next)
320 	delete next;
321 }
322 
323 //
324 // Textout version
325 //
Setup(int in_maxlen)326 void TextOutMap::Setup( int in_maxlen )
327 {
328     int i;
329 
330     squote    = 0;
331     equote    = 0;
332     maxlen    = in_maxlen;
333     activetok = new char[maxlen];
334     activetok[0] = 0;
335     lctok      = new char[maxlen];  // Used for a lower case version of activetok
336     curlen    = 0;
337     position  = activetok;
338     maptable  = new SrList();
339 
340     for (i=0; i<255; i++) {
341 	breaktable[i] = BREAK_OTHER;
342 	if (isascii(i)) {
343 	    if      (isalpha(i)) breaktable[i] = BREAK_ALPHA;
344 	    else if (isdigit(i)) breaktable[i] = BREAK_ALPHA;
345 	    }
346 	}
347     breaktable['_'] = BREAK_ALPHA;
348 
349     //
350     err		  = new ErrHandMsg();
351     lfont	  = 0;
352     nl		  = 0;
353     last_was_nl	  = 1;
354     last_was_par  = 1;
355     debug_flag	  = 0;
356     next	  = 0;
357     userops	  = 0;
358     print_matched = 0;
359     ignore_case   = 0;
360 
361     debug	  = 0;
362 }
363 
364 // Allow the user to apply the same control to the tokenization in the
365 // map output as they may have set for the input
SetBreakChar(char c,int kind)366 int TextOutMap::SetBreakChar( char c, int kind )
367 {
368     breaktable[c] = kind;
369 	return 0;
370 }
371 
TextOutMap(TextOut * textout)372 TextOutMap::TextOutMap( TextOut *textout )
373 {
374   Setup( 1024 );
375   if (debug_flag) textout->Debug( debug_flag );
376   next = textout;
377 }
378 
TextOutMap()379 TextOutMap::TextOutMap( )
380 {
381   Setup( 1024 );
382   next = 0;
383 }
384 
TextOutMap(TextOut * textout,int pflag)385 TextOutMap::TextOutMap( TextOut *textout, int pflag )
386 {
387   Setup( 1024 );
388   if (debug_flag) textout->Debug( debug_flag );
389   next = textout;
390   print_matched = pflag;
391 }
392 
PutChar(const char c)393 int TextOutMap::PutChar( const char c )
394 {
395     char cc[2];
396     cc[0] = c;
397     cc[1] = 0;
398     return PutToken( 0, cc );
399 }
400 
PutNewline(void)401 int TextOutMap::PutNewline( void )
402 {
403   int rc;
404   UpdateNL( 1 );
405   if (*newline_onoutput)
406     rc = PutToken( 0, newline_onoutput );
407   else
408     rc = PutToken( 0, "\n" );
409   return rc;
410 }
411 
FlushTokBuf(void)412 void TextOutMap::FlushTokBuf( void )
413 {
414   if (curlen) {
415     SrEntry *entry;
416     strcpy(lctok,activetok);
417     if (ignore_case) {
418 	toLower(lctok);
419     }
420     if (debug) printf( "Looking up :%s:\n", lctok );
421     if (maptable->Lookup( lctok, &entry ) == 0) {
422       PutLink( activetok, entry );
423     }
424     else {
425       next->PutToken( 0, activetok );
426     }
427     curlen = 0;
428   }
429 }
PutToken(int nsp,const char * token)430 int TextOutMap::PutToken( int nsp, const char *token )
431 {
432     SrEntry *entry;
433 
434     if (debug && token) printf( "Mapping token |%s|\n", token );
435     /*
436        First, tokenize the output and check each token.  Special case:
437        if we reach the end of the string without a "break" character, we must
438        delay processing until we get the break character, since the token may
439        be incomplete (for example, MPI_Send may be delivered to this
440        routine as MPI, then _, then Send.
441      */
442 
443     /* Spaces are a delimiter.  Try to flush activetok, and rerun PutToken */
444     if (nsp) {
445       FlushTokBuf();
446       next->PutToken( nsp, (char *)0 );
447       return PutToken( 0, token );
448     }
449 
450     /* Copy token to activetok while the breaktable entries are the same.
451        Careful of the BREAK_OTHER and BREAK_SPACE case.
452        We ONLY lookup tokens with break values of 2 (BREAK_ALPHA) */
453     if (!token) return 0;
454     // We can flush by sending a null token.
455     if (token[0] == 0) {
456       if (debug) {
457         printf( "Looking up %s ...", activetok );
458       }
459       if (activetok[0] == 0) return 0;
460       strcpy(lctok,activetok);
461       if (ignore_case) {
462 	  toLower(lctok);
463       }
464       if (debug) printf( "Looking up :%s:\n", lctok );
465       if (maptable->Lookup( lctok, &entry ) == 0) {
466         if (debug) printf( "Found entry\n" );
467         PutLink( activetok, entry );
468       }
469       else {
470 	if (debug) printf( "Did not find entry\n" );
471 	next->PutToken( 0, activetok );
472       }
473       /* We've flushed the token */
474       curlen = 0;
475       activetok[0] = 0;
476     }
477 
478     while (*token) {
479 	/* This really needs to tokenize the string */
480 	if (breaktable[(int)*token] == BREAK_ALPHA) {
481 	    /* The character is a "alphanum" */
482 	    while (*token && breaktable[(int)*token] == BREAK_ALPHA) {
483 		activetok[curlen++] = *token++;
484 		if (curlen >= maxlen) {
485 		    activetok[maxlen-1] = 0;
486 		    fprintf( stderr, "Token too long (%s)=n", activetok );
487 		    return 1;
488 		    }
489 		}
490 	    activetok[curlen] = 0;
491 	    /* If we ended at a alnum-like token, don't output yet. */
492 	    if (*token == 0) break;
493 	    }
494 	else {
495 	/* If there is already something in the token buffer, drain it first
496 	 */
497 	  if (curlen == 0) {
498 	    /* Skip over the non-alphanum characters */
499 	    while (*token && breaktable[(int)*token] != BREAK_ALPHA) {
500 		activetok[curlen++] = *token++;
501 		if (curlen >= maxlen) {
502 		    activetok[maxlen-1] = 0;
503 		    fprintf( stderr, "Token too long (%s)=n", activetok );
504 		    return 1;
505 		    }
506 		}
507 	    }
508 	activetok[curlen] = 0;
509 	}
510     if (debug) {
511         printf( "Looking up %s ...", activetok );
512 	}
513     strcpy(lctok,activetok);
514     if (ignore_case) {
515 	toLower(lctok);
516     }
517     if (debug) printf( "Looking up :%s:\n", lctok );
518     if (maptable->Lookup( lctok, &entry ) == 0) {
519         if (debug) printf( "Found entry\n" );
520 	else if (print_matched) printf( "%s\n", activetok );
521         PutLink( activetok, entry );
522 	}
523     else {
524         if (debug) printf( "Did not find entry\n" );
525 	next->PutToken( 0, activetok );
526 	}
527     /* We've flushed the token */
528     curlen = 0;
529     activetok[0] = 0;
530     }
531     return 0;
532 }
ReadMap(InStream * ins,int ignore_case,int ignoreRepl)533 int TextOutMap::ReadMap( InStream *ins, int ignore_case, int ignoreRepl )
534 {
535     char *name, *reptext, *url, *lcname, *p;
536     char ch, sepchar;
537     SrEntry *entry;
538     MapData *info;
539     int     ln;
540 
541     name    = new char[MAX_NAME_LEN];
542     lcname  = new char[MAX_NAME_LEN];
543     reptext = new char[MAX_REPNAME_LEN];
544     url     = new char[MAX_URL_LEN];
545 
546 
547     if (debug) printf( "Reading mappings\n" );
548     while (!ins->GetChar( &ch )) {
549 	/* Skip comments */
550 	if (ch == '#') {
551 	    ins->SkipLine();
552 	    continue;
553 	    }
554 	/* Read file line.  One format is
555 	   tagtype:%cname%c%crepname%c%c%c%cskip%cURL
556 
557 	   where %c is any character, but the same character must be used
558 	   in all places (on a line-bu-line basis).
559          */
560 	while (!ins->GetChar( &ch ) && ch != ':' && ch != '\n') ;
561 	ins->GetChar( &sepchar );
562 	p = name; ln = 0;
563 	while (!ins->GetChar( &ch ) && ch != sepchar && ++ln < MAX_NAME_LEN)
564 	  *p++ = ch;
565 	if (ch != sepchar) {
566 	  fprintf( stderr, "Name too long in map\n" );
567 	  return 1;
568 	}
569 	*p = 0;
570 	ins->GetChar( &ch );
571 	p = reptext; ln = 0;
572 	while (!ins->GetChar( &ch ) && ch != sepchar &&
573 	       ++ln < MAX_REPNAME_LEN) *p++ = ch;
574 	if (ch != sepchar) {
575 	  fprintf( stderr, "Replacement name too long in map\n" );
576 	  return 1;
577 	}
578 	*p = 0;
579 	while (!ins->GetChar( &ch ) && ch == sepchar) ;
580 	while (!ins->GetChar( &ch ) && ch != sepchar) ;
581 	p = url; ln = 0;
582 	while (!ins->GetChar( &ch ) && ch != '\n' && ++ln < MAX_URL_LEN)
583 	  *p++ = ch;
584 	if (ch != '\n') {
585 	  fprintf( stderr, "URL too long in map\n" );
586 	  return 1;
587 	}
588 	*p = 0;
589 
590 	/* Install this name */
591 	strcpy(lcname,name);
592 	if (ignore_case) toLower(lcname);
593 	//if (debug) printf( "Inserting :%s:\n", lcname );
594 	maptable->Insert( lcname, &entry );
595 	info = new MapData;
596 	if (ignoreRepl) {
597 	    // Ignore the replacement - just null out the string.
598 	    if (debug) printf( "Ignoring the replacement text\n" );
599 	    reptext[0] = 0;
600 	}
601 	info->repname = new char[strlen(reptext)+1];
602 	info->url     = new char[strlen(url)+1];
603 	strcpy( info->repname, reptext );
604 	strcpy( info->url, url );
605 	entry->extra_data = (void *)info;
606 	if (debug) {
607 	    printf( "Installing %s with url=%s, text=%s\n",
608 		    name, url, reptext );
609 	}
610     }
611 
612     this->ignore_case = ignore_case;
613 
614     delete[] name;
615     delete[] lcname;
616     delete[] reptext;
617     delete[] url;
618 
619     return 0;
620 }
ReadMap(InStream * ins,int ignore_case)621 int TextOutMap::ReadMap( InStream *ins, int ignore_case )
622 {
623     return ReadMap( ins, ignore_case, 0 );
624 }
ReadMap(InStream * ins)625 int TextOutMap::ReadMap( InStream *ins )
626 {
627     return ReadMap( ins, 0, 0 );
628 }
629 
PutLink(const char * name,SrEntry * entry)630 int TextOutMap::PutLink( const char *name, SrEntry *entry )
631 {
632     MapData *info = (MapData *)entry->extra_data;
633     // We may need to suppress the output message when no link command
634     // is specified, since that is a common case
635     if (next->userops->Lookup( "link", 0 )) {
636       next->PutToken( 0, "<a href=\"" );
637       next->PutToken( 0, info->url );
638       next->PutToken( 0, "\">" );
639       // If there is no replacement name, use the original name
640       if (info->repname && info->repname[0] != 0) {
641 	  if (debug) printf( "TextOutMap:repname nonnull = :%s:\n", info->repname );
642 	  next->PutToken( 0, info->repname );
643       }
644       else {
645 	  if (debug) printf( "repname null, using name = :%s:\n", name );
646 	  next->PutToken( 0, name );
647       }
648       next->PutToken( 0, "</a>" );
649     }
650     else {
651       next->PutOp( "link", info->url, info->repname, 0 );
652     }
653     return 0;
654 }
PutQuoted(int nsp,const char * token)655 int TextOutMap::PutQuoted( int nsp, const char *token )
656 {
657   //  if (debug_flag && token && *token)
658   //    printf( "OutStreamMap::PutQuoted %s\n", token );
659   FlushTokBuf();
660   return next->PutQuoted( nsp, token );
661 }
662 
~TextOutMap()663 TextOutMap::~TextOutMap()
664 {
665     delete activetok;
666     delete maptable;
667     if (next)
668 	delete next;
669 }
SetRegisterValue(int regnum,const char * val)670 int TextOutMap::SetRegisterValue( int regnum, const char * val )
671 {
672   if (next)
673     next->SetRegisterValue( regnum, val );
674   return 0;
675 }
toLower(char * s)676 void TextOutMap::toLower(char *s)
677 {
678     char c;
679     while (*s) {
680 	c = *s;
681 	if (isascii(c) && isupper(c)) *s = tolower(c);
682 	s++;
683     }
684 }
685