1 #include "tfilter.h"
2
3 #include "maptok.h"
4 #include <ctype.h>
5 #include <string.h>
6
7 /*
8 * This file contains routines to map names from string to another.
9 * The mechanism is fairly general, and is implemented as an OutStream,
10 * allowing it to be inserted almost anywhere.
11 *
12 * The most convenient way to generating the mapping table is by
13 * reading a file; for this we use the command file-reading object.
14 * Should probably remove Outstream version, or unify code
15 */
16
17 /* Local info for replacement data */
18 typedef struct {
19 char *repname;
20 char *url;
21 } MapData;
22
Setup(int in_maxlen)23 void OutStreamMap::Setup( int in_maxlen )
24 {
25 int i;
26
27 squote = 0;
28 equote = 0;
29 maxlen = in_maxlen;
30 activetok = new char[maxlen];
31 activetok[0] = 0;
32 lctok = new char[maxlen]; // Used for a lower case version of activetok
33 curlen = 0;
34 position = activetok;
35 maptable = new SrList();
36 print_matched = 0; // For debugging
37 ignore_case = 0;
38
39 for (i=0; i<255; i++) {
40 breaktable[i] = BREAK_OTHER;
41 if (isascii(i)) {
42 if (isalpha(i)) breaktable[i] = BREAK_ALPHA;
43 else if (isdigit(i)) breaktable[i] = BREAK_ALPHA;
44 }
45 }
46 breaktable['_'] = BREAK_ALPHA;
47 }
48
OutStreamMap(OutStream * outs,int in_maxlen)49 OutStreamMap::OutStreamMap( OutStream *outs, int in_maxlen )
50 {
51 Setup( in_maxlen );
52 next = outs;
53 }
54
OutStreamMap(OutStream * outs)55 OutStreamMap::OutStreamMap( OutStream *outs )
56 {
57 Setup( 1024 );
58 next = outs;
59 }
OutStreamMap(int in_maxlen)60 OutStreamMap::OutStreamMap( int in_maxlen )
61 {
62 Setup( in_maxlen );
63 next = 0;
64 }
OutStreamMap(OutStream * outs,int in_maxlen,int pflag)65 OutStreamMap::OutStreamMap( OutStream *outs, int in_maxlen, int pflag )
66 {
67 if (in_maxlen <= 0) in_maxlen = 1024;
68 Setup( in_maxlen );
69 next = outs;
70 print_matched = pflag;
71 }
FlushTokBuf(void)72 void OutStreamMap::FlushTokBuf( void )
73 {
74 if (curlen) {
75 SrEntry *entry;
76 // Add an option to only compare lower case version? In that case,
77 // also insert entries only in lower case.
78
79 strcpy(lctok,activetok);
80 if (ignore_case) {
81 toLower(lctok);
82 }
83 if (maptable->Lookup( lctok, &entry ) == 0) {
84 PutLink( activetok, entry );
85 }
86 else {
87 next->PutToken( 0, activetok );
88 }
89 curlen = 0;
90 }
91 }
92
toLower(char * s)93 void OutStreamMap::toLower(char *s)
94 {
95 char c;
96 while (*s) {
97 c = *s;
98 if (isascii(c) && isupper(c)) *s = tolower(c);
99 s++;
100 }
101 }
102
103 /*
104 * This routine works by putting a token into the internal buffer.
105 * Once it knows that it can (found a delimiter), it tries to look it
106 * up and replace it.
107 */
PutToken(int nsp,const char * token)108 int OutStreamMap::PutToken( int nsp, const char *token )
109 {
110 SrEntry *entry;
111
112 /*
113 First, tokenize the output and check each token. Special case:
114 if we reach the end of the string without a "break" character, we must
115 delay processing until we get the break character, since the token may
116 be incomplete (for example, MPI_Send may be delivered to this
117 routine as MPI, then _, then Send.
118 */
119
120 /* Spaces are a delimiter. Try to flush activetok, and rerun PutToken */
121 if (nsp) {
122 FlushTokBuf();
123 next->PutToken( nsp, (char *)0 );
124 return PutToken( 0, token );
125 }
126
127 /* Copy token to activetok while the breaktable entries are the same.
128 Careful of the BREAK_OTHER and BREAK_SPACE case.
129 We ONLY lookup tokens with break values of 2 (BREAK_ALPHA) */
130 if (!token) return 0;
131 while (*token) {
132 /* This really needs to tokenize the string */
133 if (breaktable[(int)*token] == BREAK_ALPHA) {
134 /* The character is a "alphanum" */
135 while (*token && breaktable[(int)*token] == BREAK_ALPHA) {
136 activetok[curlen++] = *token++;
137 if (curlen >= maxlen) {
138 activetok[maxlen-1] = 0;
139 fprintf( stderr, "Token too long (%s)=n", activetok );
140 return 1;
141 }
142 }
143 activetok[curlen] = 0;
144 /* If we ended at a alnum-like token, don't output yet. */
145 if (*token == 0) break;
146 }
147 else {
148 /* If there is already something in the token buffer, drain it first
149 */
150 if (curlen == 0) {
151 /* Skip over the non-alphanum characters */
152 while (*token && breaktable[(int)*token] != BREAK_ALPHA) {
153 activetok[curlen++] = *token++;
154 if (curlen >= maxlen) {
155 activetok[maxlen-1] = 0;
156 fprintf( stderr, "Token too long (%s)=n", activetok );
157 return 1;
158 }
159 }
160 }
161 activetok[curlen] = 0;
162 }
163 strcpy(lctok,activetok);
164 if (ignore_case) {
165 toLower(lctok);
166 }
167 if (maptable->Lookup( lctok, &entry ) == 0) {
168 // Allow debugging output of all matched tokens
169 // Eventually, we should instead have a "token stream" operation;
170 // then this filter is simply built on top of that token stream
171 if (print_matched) {
172 printf( "%s\n", activetok );
173 }
174 PutLink( activetok, entry );
175 }
176 else {
177 next->PutToken( 0, activetok );
178 }
179 /* We've flushed the token */
180 curlen = 0;
181 }
182 return 0;
183 }
184
PutQuoted(int nsp,const char * token)185 int OutStreamMap::PutQuoted( int nsp, const char *token )
186 {
187 // if (debug_flag && token && *token)
188 // printf( "OutStreamMap::PutQuoted %s\n", token );
189 FlushTokBuf();
190 return next->PutQuoted( nsp, token );
191 }
192
PutChar(const char c)193 int OutStreamMap::PutChar( const char c )
194 {
195 char cc[2];
196 cc[0] = c;
197 cc[1] = 0;
198 return PutToken( 0, cc );
199 }
200
201 #define MAX_NAME_LEN 128
202 #define MAX_REPNAME_LEN 128
203 #define MAX_URL_LEN 512
204 /*
205 Read a map file from the instream
206 */
ReadMap(InStream * ins,int ignore_case,int ignoreRepl)207 int OutStreamMap::ReadMap( InStream *ins, int ignore_case, int ignoreRepl )
208 {
209 char *name, *reptext, *url, *p, *lcname;
210 char ch, sepchar;
211 SrEntry *entry;
212 MapData *info;
213 int ln;
214
215 name = new char[MAX_NAME_LEN];
216 lcname = new char[MAX_NAME_LEN];
217 reptext = new char[MAX_REPNAME_LEN];
218 url = new char[MAX_URL_LEN];
219
220 while (!ins->GetChar( &ch )) {
221 /* Skip comments */
222 if (ch == '#') {
223 ins->SkipLine();
224 continue;
225 }
226 /* Read file line. One format is
227 tagtype:%cname%c%crepname%c%c%c%cskip%cURL
228
229 where %c is any character, but the same character must be used
230 in all places (on a line-bu-line basis).
231
232 repname may be empty (null).
233 */
234 while (!ins->GetChar( &ch ) && ch != ':' && ch != '\n') ;
235 ins->GetChar( &sepchar );
236 p = name; ln = 0;
237 while (!ins->GetChar( &ch ) && ch != sepchar && ++ln < MAX_NAME_LEN)
238 *p++ = ch;
239 if (ch != sepchar) {
240 fprintf( stderr, "Name too long in map\n" );
241 return 1;
242 }
243 ins->GetChar( &ch );
244 p = reptext; ln = 0;
245 while (!ins->GetChar( &ch ) && ch != sepchar &&
246 ++ln < MAX_REPNAME_LEN) *p++ = ch;
247 if (ch != sepchar) {
248 fprintf( stderr, "Replacement name too long in map\n" );
249 return 1;
250 }
251 while (!ins->GetChar( &ch ) && ch == sepchar) ;
252 while (!ins->GetChar( &ch ) && ch != sepchar) ;
253 p = url; ln = 0;
254 while (!ins->GetChar( &ch ) && ch != '\n' && ++ln < MAX_URL_LEN)
255 *p++ = ch;
256 if (ch != '\n') {
257 fprintf( stderr, "URL too long in map\n" );
258 return 1;
259 }
260 /* Install this name */
261 strcpy(lcname,name);
262 if (ignore_case) toLower(lcname);
263 //if (debug) printf( "Inserting :%s:\n", lcname );
264 maptable->Insert( lcname, &entry );
265 info = new MapData;
266 if (ignoreRepl) {
267 // Ignore the replacement - just null out the string.
268 // if (debug) printf( "Ignoring the replacement text\n" );
269 reptext[0] = 0;
270 }
271 info->repname = new char[strlen(reptext)+1];
272 info->url = new char[strlen(url)+1];
273 strcpy( info->repname, reptext );
274 strcpy( info->url, url );
275 entry->extra_data = (void *)info;
276 }
277
278 this->ignore_case = ignore_case;
279 delete[] name;
280 delete[] lcname;
281 delete[] reptext;
282 delete[] url;
283
284 return 0;
285 }
ReadMap(InStream * ins,int ignore_case)286 int OutStreamMap::ReadMap( InStream *ins, int ignore_case )
287 {
288 return ReadMap( ins, ignore_case, 0 );
289 }
ReadMap(InStream * ins)290 int OutStreamMap::ReadMap( InStream *ins )
291 {
292 return ReadMap( ins, 0, 0 );
293 }
294
295 // This should really use a PutOp( "link", url, repname ) call to
296 // the appropriate textout handler.
PutLink(const char * name,SrEntry * entry)297 int OutStreamMap::PutLink( const char *name, SrEntry *entry )
298 {
299 MapData *info = (MapData *)entry->extra_data;
300 next->PutToken( 0, "<a href=\"" );
301 next->PutToken( 0, info->url );
302 next->PutToken( 0, "\">" );
303 // If there is no replacement name, use the original name
304 if (info->repname && info->repname[0] != 0) {
305 //if (debug) printf( "OutStreamMap: repname nonnull = :%s:\n", info->repname );
306 next->PutToken( 0, info->repname );
307 }
308 else {
309 //if (debug) printf( "repname null, using name = :%s:\n", name );
310 next->PutToken( 0, name );
311 }
312 next->PutToken( 0, "</a>" );
313 return 0;
314 }
~OutStreamMap()315 OutStreamMap::~OutStreamMap()
316 {
317 delete activetok;
318 delete maptable;
319 if (next)
320 delete next;
321 }
322
323 //
324 // Textout version
325 //
Setup(int in_maxlen)326 void TextOutMap::Setup( int in_maxlen )
327 {
328 int i;
329
330 squote = 0;
331 equote = 0;
332 maxlen = in_maxlen;
333 activetok = new char[maxlen];
334 activetok[0] = 0;
335 lctok = new char[maxlen]; // Used for a lower case version of activetok
336 curlen = 0;
337 position = activetok;
338 maptable = new SrList();
339
340 for (i=0; i<255; i++) {
341 breaktable[i] = BREAK_OTHER;
342 if (isascii(i)) {
343 if (isalpha(i)) breaktable[i] = BREAK_ALPHA;
344 else if (isdigit(i)) breaktable[i] = BREAK_ALPHA;
345 }
346 }
347 breaktable['_'] = BREAK_ALPHA;
348
349 //
350 err = new ErrHandMsg();
351 lfont = 0;
352 nl = 0;
353 last_was_nl = 1;
354 last_was_par = 1;
355 debug_flag = 0;
356 next = 0;
357 userops = 0;
358 print_matched = 0;
359 ignore_case = 0;
360
361 debug = 0;
362 }
363
364 // Allow the user to apply the same control to the tokenization in the
365 // map output as they may have set for the input
SetBreakChar(char c,int kind)366 int TextOutMap::SetBreakChar( char c, int kind )
367 {
368 breaktable[c] = kind;
369 return 0;
370 }
371
TextOutMap(TextOut * textout)372 TextOutMap::TextOutMap( TextOut *textout )
373 {
374 Setup( 1024 );
375 if (debug_flag) textout->Debug( debug_flag );
376 next = textout;
377 }
378
TextOutMap()379 TextOutMap::TextOutMap( )
380 {
381 Setup( 1024 );
382 next = 0;
383 }
384
TextOutMap(TextOut * textout,int pflag)385 TextOutMap::TextOutMap( TextOut *textout, int pflag )
386 {
387 Setup( 1024 );
388 if (debug_flag) textout->Debug( debug_flag );
389 next = textout;
390 print_matched = pflag;
391 }
392
PutChar(const char c)393 int TextOutMap::PutChar( const char c )
394 {
395 char cc[2];
396 cc[0] = c;
397 cc[1] = 0;
398 return PutToken( 0, cc );
399 }
400
PutNewline(void)401 int TextOutMap::PutNewline( void )
402 {
403 int rc;
404 UpdateNL( 1 );
405 if (*newline_onoutput)
406 rc = PutToken( 0, newline_onoutput );
407 else
408 rc = PutToken( 0, "\n" );
409 return rc;
410 }
411
FlushTokBuf(void)412 void TextOutMap::FlushTokBuf( void )
413 {
414 if (curlen) {
415 SrEntry *entry;
416 strcpy(lctok,activetok);
417 if (ignore_case) {
418 toLower(lctok);
419 }
420 if (debug) printf( "Looking up :%s:\n", lctok );
421 if (maptable->Lookup( lctok, &entry ) == 0) {
422 PutLink( activetok, entry );
423 }
424 else {
425 next->PutToken( 0, activetok );
426 }
427 curlen = 0;
428 }
429 }
PutToken(int nsp,const char * token)430 int TextOutMap::PutToken( int nsp, const char *token )
431 {
432 SrEntry *entry;
433
434 if (debug && token) printf( "Mapping token |%s|\n", token );
435 /*
436 First, tokenize the output and check each token. Special case:
437 if we reach the end of the string without a "break" character, we must
438 delay processing until we get the break character, since the token may
439 be incomplete (for example, MPI_Send may be delivered to this
440 routine as MPI, then _, then Send.
441 */
442
443 /* Spaces are a delimiter. Try to flush activetok, and rerun PutToken */
444 if (nsp) {
445 FlushTokBuf();
446 next->PutToken( nsp, (char *)0 );
447 return PutToken( 0, token );
448 }
449
450 /* Copy token to activetok while the breaktable entries are the same.
451 Careful of the BREAK_OTHER and BREAK_SPACE case.
452 We ONLY lookup tokens with break values of 2 (BREAK_ALPHA) */
453 if (!token) return 0;
454 // We can flush by sending a null token.
455 if (token[0] == 0) {
456 if (debug) {
457 printf( "Looking up %s ...", activetok );
458 }
459 if (activetok[0] == 0) return 0;
460 strcpy(lctok,activetok);
461 if (ignore_case) {
462 toLower(lctok);
463 }
464 if (debug) printf( "Looking up :%s:\n", lctok );
465 if (maptable->Lookup( lctok, &entry ) == 0) {
466 if (debug) printf( "Found entry\n" );
467 PutLink( activetok, entry );
468 }
469 else {
470 if (debug) printf( "Did not find entry\n" );
471 next->PutToken( 0, activetok );
472 }
473 /* We've flushed the token */
474 curlen = 0;
475 activetok[0] = 0;
476 }
477
478 while (*token) {
479 /* This really needs to tokenize the string */
480 if (breaktable[(int)*token] == BREAK_ALPHA) {
481 /* The character is a "alphanum" */
482 while (*token && breaktable[(int)*token] == BREAK_ALPHA) {
483 activetok[curlen++] = *token++;
484 if (curlen >= maxlen) {
485 activetok[maxlen-1] = 0;
486 fprintf( stderr, "Token too long (%s)=n", activetok );
487 return 1;
488 }
489 }
490 activetok[curlen] = 0;
491 /* If we ended at a alnum-like token, don't output yet. */
492 if (*token == 0) break;
493 }
494 else {
495 /* If there is already something in the token buffer, drain it first
496 */
497 if (curlen == 0) {
498 /* Skip over the non-alphanum characters */
499 while (*token && breaktable[(int)*token] != BREAK_ALPHA) {
500 activetok[curlen++] = *token++;
501 if (curlen >= maxlen) {
502 activetok[maxlen-1] = 0;
503 fprintf( stderr, "Token too long (%s)=n", activetok );
504 return 1;
505 }
506 }
507 }
508 activetok[curlen] = 0;
509 }
510 if (debug) {
511 printf( "Looking up %s ...", activetok );
512 }
513 strcpy(lctok,activetok);
514 if (ignore_case) {
515 toLower(lctok);
516 }
517 if (debug) printf( "Looking up :%s:\n", lctok );
518 if (maptable->Lookup( lctok, &entry ) == 0) {
519 if (debug) printf( "Found entry\n" );
520 else if (print_matched) printf( "%s\n", activetok );
521 PutLink( activetok, entry );
522 }
523 else {
524 if (debug) printf( "Did not find entry\n" );
525 next->PutToken( 0, activetok );
526 }
527 /* We've flushed the token */
528 curlen = 0;
529 activetok[0] = 0;
530 }
531 return 0;
532 }
ReadMap(InStream * ins,int ignore_case,int ignoreRepl)533 int TextOutMap::ReadMap( InStream *ins, int ignore_case, int ignoreRepl )
534 {
535 char *name, *reptext, *url, *lcname, *p;
536 char ch, sepchar;
537 SrEntry *entry;
538 MapData *info;
539 int ln;
540
541 name = new char[MAX_NAME_LEN];
542 lcname = new char[MAX_NAME_LEN];
543 reptext = new char[MAX_REPNAME_LEN];
544 url = new char[MAX_URL_LEN];
545
546
547 if (debug) printf( "Reading mappings\n" );
548 while (!ins->GetChar( &ch )) {
549 /* Skip comments */
550 if (ch == '#') {
551 ins->SkipLine();
552 continue;
553 }
554 /* Read file line. One format is
555 tagtype:%cname%c%crepname%c%c%c%cskip%cURL
556
557 where %c is any character, but the same character must be used
558 in all places (on a line-bu-line basis).
559 */
560 while (!ins->GetChar( &ch ) && ch != ':' && ch != '\n') ;
561 ins->GetChar( &sepchar );
562 p = name; ln = 0;
563 while (!ins->GetChar( &ch ) && ch != sepchar && ++ln < MAX_NAME_LEN)
564 *p++ = ch;
565 if (ch != sepchar) {
566 fprintf( stderr, "Name too long in map\n" );
567 return 1;
568 }
569 *p = 0;
570 ins->GetChar( &ch );
571 p = reptext; ln = 0;
572 while (!ins->GetChar( &ch ) && ch != sepchar &&
573 ++ln < MAX_REPNAME_LEN) *p++ = ch;
574 if (ch != sepchar) {
575 fprintf( stderr, "Replacement name too long in map\n" );
576 return 1;
577 }
578 *p = 0;
579 while (!ins->GetChar( &ch ) && ch == sepchar) ;
580 while (!ins->GetChar( &ch ) && ch != sepchar) ;
581 p = url; ln = 0;
582 while (!ins->GetChar( &ch ) && ch != '\n' && ++ln < MAX_URL_LEN)
583 *p++ = ch;
584 if (ch != '\n') {
585 fprintf( stderr, "URL too long in map\n" );
586 return 1;
587 }
588 *p = 0;
589
590 /* Install this name */
591 strcpy(lcname,name);
592 if (ignore_case) toLower(lcname);
593 //if (debug) printf( "Inserting :%s:\n", lcname );
594 maptable->Insert( lcname, &entry );
595 info = new MapData;
596 if (ignoreRepl) {
597 // Ignore the replacement - just null out the string.
598 if (debug) printf( "Ignoring the replacement text\n" );
599 reptext[0] = 0;
600 }
601 info->repname = new char[strlen(reptext)+1];
602 info->url = new char[strlen(url)+1];
603 strcpy( info->repname, reptext );
604 strcpy( info->url, url );
605 entry->extra_data = (void *)info;
606 if (debug) {
607 printf( "Installing %s with url=%s, text=%s\n",
608 name, url, reptext );
609 }
610 }
611
612 this->ignore_case = ignore_case;
613
614 delete[] name;
615 delete[] lcname;
616 delete[] reptext;
617 delete[] url;
618
619 return 0;
620 }
ReadMap(InStream * ins,int ignore_case)621 int TextOutMap::ReadMap( InStream *ins, int ignore_case )
622 {
623 return ReadMap( ins, ignore_case, 0 );
624 }
ReadMap(InStream * ins)625 int TextOutMap::ReadMap( InStream *ins )
626 {
627 return ReadMap( ins, 0, 0 );
628 }
629
PutLink(const char * name,SrEntry * entry)630 int TextOutMap::PutLink( const char *name, SrEntry *entry )
631 {
632 MapData *info = (MapData *)entry->extra_data;
633 // We may need to suppress the output message when no link command
634 // is specified, since that is a common case
635 if (next->userops->Lookup( "link", 0 )) {
636 next->PutToken( 0, "<a href=\"" );
637 next->PutToken( 0, info->url );
638 next->PutToken( 0, "\">" );
639 // If there is no replacement name, use the original name
640 if (info->repname && info->repname[0] != 0) {
641 if (debug) printf( "TextOutMap:repname nonnull = :%s:\n", info->repname );
642 next->PutToken( 0, info->repname );
643 }
644 else {
645 if (debug) printf( "repname null, using name = :%s:\n", name );
646 next->PutToken( 0, name );
647 }
648 next->PutToken( 0, "</a>" );
649 }
650 else {
651 next->PutOp( "link", info->url, info->repname, 0 );
652 }
653 return 0;
654 }
PutQuoted(int nsp,const char * token)655 int TextOutMap::PutQuoted( int nsp, const char *token )
656 {
657 // if (debug_flag && token && *token)
658 // printf( "OutStreamMap::PutQuoted %s\n", token );
659 FlushTokBuf();
660 return next->PutQuoted( nsp, token );
661 }
662
~TextOutMap()663 TextOutMap::~TextOutMap()
664 {
665 delete activetok;
666 delete maptable;
667 if (next)
668 delete next;
669 }
SetRegisterValue(int regnum,const char * val)670 int TextOutMap::SetRegisterValue( int regnum, const char * val )
671 {
672 if (next)
673 next->SetRegisterValue( regnum, val );
674 return 0;
675 }
toLower(char * s)676 void TextOutMap::toLower(char *s)
677 {
678 char c;
679 while (*s) {
680 c = *s;
681 if (isascii(c) && isupper(c)) *s = tolower(c);
682 s++;
683 }
684 }
685