1 /*************************************************************************/
2 /* */
3 /* Centre for Speech Technology Research */
4 /* University of Edinburgh, UK */
5 /* Copyright (c) 1996 */
6 /* All Rights Reserved. */
7 /* */
8 /* Permission is hereby granted, free of charge, to use and distribute */
9 /* this software and its documentation without restriction, including */
10 /* without limitation the rights to use, copy, modify, merge, publish, */
11 /* distribute, sublicense, and/or sell copies of this work, and to */
12 /* permit persons to whom this work is furnished to do so, subject to */
13 /* the following conditions: */
14 /* 1. The code must retain the above copyright notice, this list of */
15 /* conditions and the following disclaimer. */
16 /* 2. Any modifications must be clearly marked as such. */
17 /* 3. Original authors' names are not deleted. */
18 /* 4. The authors' names are not used to endorse or promote products */
19 /* derived from this software without specific prior written */
20 /* permission. */
21 /* */
22 /* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23 /* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24 /* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25 /* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26 /* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27 /* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28 /* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29 /* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30 /* THIS SOFTWARE. */
31 /* */
32 /*************************************************************************/
33 /* Author : Alan W Black */
34 /* Date : April 1996 */
35 /*-----------------------------------------------------------------------*/
36 /* */
37 /* A Tokenize class, both for Tokens (Strings plus alpha) */
38 /* EST_TokenStream for strings, FILE *, files, pipes etc */
39 /* */
40 /*=======================================================================*/
41 #include <cstdio>
42 #include <iostream>
43 #include "EST_unix.h"
44 #include <cstdlib>
45 #include <climits>
46 #include <cstring>
47 #include "EST_math.h"
48 #include "EST_Token.h"
49 #include "EST_string_aux.h"
50 #include "EST_cutils.h"
51 #include "EST_error.h"
52
53 const EST_String EST_Token_Default_WhiteSpaceChars = " \t\n\r";
54 const EST_String EST_Token_Default_SingleCharSymbols = "(){}[]";
55 const EST_String EST_Token_Default_PrePunctuationSymbols = "\"'`({[";
56 const EST_String EST_Token_Default_PunctuationSymbols = "\"'`.,:;!?]})";
57 const EST_String Token_Origin_FD = "existing file descriptor";
58 const EST_String Token_Origin_Stream = "existing istream";
59 const EST_String Token_Origin_String = "existing string";
60
61 static EST_Regex RXanywhitespace("[ \t\n\r]");
62
check_extend_str_in(char * str,int pos,int * max)63 static inline char *check_extend_str_in(char *str, int pos, int *max)
64 {
65 // Check we are not at the end of the string, if so get some more
66 // and copy the old one into the new one
67 char *newstuff;
68
69 if (pos >= *max)
70 {
71 if (pos > *max)
72 *max = 2 * pos;
73 else
74 *max *= 2;
75 newstuff = new char[*max];
76 strncpy(newstuff,str,pos);
77 delete [] str;
78 return newstuff;
79 }
80 else
81 return str;
82 }
83
84 #define check_extend_str(STR, POS, MAX) \
85 (((POS)>= *(MAX))?check_extend_str_in((STR),(POS),(MAX)):(STR))
86
operator <<(ostream & s,const EST_Token & p)87 ostream& operator<<(ostream& s, const EST_Token &p)
88 {
89 s << "[TOKEN " << p.pname << "]";
90 return s;
91 }
92
93
operator =(const EST_Token & a)94 EST_Token &EST_Token::operator = (const EST_Token &a)
95 {
96 linenum = a.linenum;
97 linepos = a.linepos;
98 p_filepos = a.p_filepos;
99 p_quoted = a.p_quoted;
100 space = a.space;
101 prepunc = a.prepunc;
102 pname = a.pname;
103 punc = a.punc;
104 return *this;
105 }
106
pos_description() const107 const EST_String EST_Token::pos_description() const
108 {
109 return "line "+itoString(linenum)+" char "+itoString(linepos);
110 }
111
operator =(const EST_String & a)112 EST_Token &EST_Token::operator = (const EST_String &a)
113 {
114 pname = a;
115 return *this;
116 }
117
EST_TokenStream()118 EST_TokenStream::EST_TokenStream()
119 {
120 tok_wspacelen = 64; // will grow if necessary
121 tok_wspace = new char[tok_wspacelen];
122 tok_stufflen = 512; // will grow if necessary
123 tok_stuff = new char[tok_stufflen];
124 tok_prepuncslen = 32; // will grow if necessary
125 tok_prepuncs = new char[tok_prepuncslen];
126
127 default_values();
128 }
129
EST_TokenStream(EST_TokenStream & s)130 EST_TokenStream::EST_TokenStream(EST_TokenStream &s)
131 {
132 (void)s;
133
134 cerr << "TokenStream: warning passing TokenStream not as reference"
135 << endl;
136
137 // You *really* shouldn't use this AT ALL unless you
138 // fully understand its consequences, you'll be copying open
139 // files and moving file pointers all over the place
140 // basically *DON'T* do this, pass the stream by reference
141
142 // Now there may be occasions when you do want to do this for example
143 // when you need to do far look ahead or check point as you read
144 // but they are obscure and I'm not sure how to do that for all
145 // the file forms supported by the TokenStream. If you do
146 // I can write a clone function that might do it.
147
148 }
149
default_values()150 void EST_TokenStream::default_values()
151 {
152 type = tst_none;
153 peeked_tokp = FALSE;
154 peeked_charp = FALSE;
155 eof_flag = FALSE;
156 quotes = FALSE;
157 p_filepos = 0;
158 linepos = 1;
159 WhiteSpaceChars = EST_Token_Default_WhiteSpaceChars;
160 SingleCharSymbols = EST_String::Empty;
161 PrePunctuationSymbols = EST_String::Empty;
162 PunctuationSymbols = EST_String::Empty;
163 build_table();
164 close_at_end=TRUE;
165 }
166
~EST_TokenStream()167 EST_TokenStream::~EST_TokenStream()
168 {
169 if (type != tst_none)
170 close();
171 delete [] tok_wspace;
172 delete [] tok_stuff;
173 delete [] tok_prepuncs;
174
175 }
176
operator <<(ostream & s,EST_TokenStream & p)177 ostream& operator<<(ostream& s, EST_TokenStream &p)
178 {
179 s << "[TOKENSTREAM ";
180 switch (p.type)
181 {
182 case tst_none:
183 cerr << "UNSET"; break;
184 case tst_file:
185 cerr << "FILE"; break;
186 case tst_pipe:
187 cerr << "PIPE"; break;
188 case tst_istream:
189 cerr << "ISTREAM"; break;
190 case tst_string:
191 cerr << "STRING"; break;
192 default:
193 cerr << "UNKNOWN" << endl;
194 }
195 s << "]";
196
197 return s;
198 }
199
open(const EST_String & filename)200 int EST_TokenStream::open(const EST_String &filename)
201 {
202 if (type != tst_none)
203 close();
204 default_values();
205 fp = fopen(filename,"rb");
206 if (fp == NULL)
207 {
208 cerr << "Cannot open file " << filename << " as tokenstream"
209 << endl;
210 return -1;
211 }
212 Origin = filename;
213 type = tst_file;
214
215 return 0;
216 }
217
open(FILE * ofp,int close_when_finished)218 int EST_TokenStream::open(FILE *ofp, int close_when_finished)
219 {
220 // absorb already open stream
221 if (type != tst_none)
222 close();
223 default_values();
224 fp = ofp;
225 if (fp == NULL)
226 {
227 cerr << "Cannot absorb NULL filestream as tokenstream" << endl;
228 return -1;
229 }
230 Origin = Token_Origin_FD;
231 type = tst_file;
232
233 close_at_end = close_when_finished;
234
235 return 0;
236 }
237
open(istream & newis)238 int EST_TokenStream::open(istream &newis)
239 {
240 // absorb already open istream
241 if (type != tst_none)
242 close();
243 default_values();
244 is = &newis;
245 Origin = Token_Origin_Stream;
246 type = tst_istream;
247
248 return 0;
249 }
250
open_string(const EST_String & newbuffer)251 int EST_TokenStream::open_string(const EST_String &newbuffer)
252 {
253 // Make a tokenstream from an internal existing string/buffer
254 const char *buf;
255 if (type != tst_none)
256 close();
257 default_values();
258 buf = (const char *)newbuffer;
259 buffer_length = newbuffer.length();
260 buffer = new char[buffer_length+1];
261 memmove(buffer,buf,buffer_length+1);
262 pos = 0;
263 Origin = Token_Origin_String;
264 type = tst_string;
265
266 return 0;
267 }
268
seek_end()269 int EST_TokenStream::seek_end()
270 {
271 // This isn't actually useful but people expect it
272 peeked_charp = FALSE;
273 peeked_tokp = FALSE;
274
275 switch (type)
276 {
277 case tst_none:
278 cerr << "EST_TokenStream unset" << endl;
279 return -1;
280 break;
281 case tst_file:
282 fseek(fp,0,SEEK_END);
283 p_filepos = ftell(fp);
284 return p_filepos;
285 case tst_pipe:
286 cerr << "EST_TokenStream seek on pipe not supported" << endl;
287 return -1;
288 break;
289 case tst_istream:
290 cerr << "EST_TokenStream seek on istream not yet supported" << endl;
291 return -1;
292 break;
293 case tst_string:
294 pos = buffer_length;
295 return pos;
296 default:
297 cerr << "EST_TokenStream: unknown type" << endl;
298 return -1;
299 }
300
301 return -1; // can't get here
302 }
303
seek(int position)304 int EST_TokenStream::seek(int position)
305 {
306 peeked_charp = FALSE;
307 peeked_tokp = FALSE;
308
309 switch (type)
310 {
311 case tst_none:
312 cerr << "EST_TokenStream unset" << endl;
313 return -1;
314 break;
315 case tst_file:
316 p_filepos = position;
317 return fseek(fp,position,SEEK_SET);
318 case tst_pipe:
319 cerr << "EST_TokenStream seek on pipe not supported" << endl;
320 return -1;
321 break;
322 case tst_istream:
323 cerr << "EST_TokenStream seek on istream not yet supported" << endl;
324 return -1;
325 break;
326 case tst_string:
327 if (position >= pos)
328 {
329 pos = position;
330 return -1;
331 }
332 else
333 {
334 pos = position;
335 return 0;
336 }
337 break;
338 default:
339 cerr << "EST_TokenStream: unknown type" << endl;
340 return -1;
341 }
342
343 return -1; // can't get here
344
345 }
346
stdio_fread(void * buff,int size,int nitems,FILE * fp)347 static int stdio_fread(void *buff,int size,int nitems,FILE *fp)
348 {
349 // So it can find the stdio one rather than the TokenStream one
350 return fread(buff,size,nitems,fp);
351 }
352
fread(void * buff,int size,int nitems)353 int EST_TokenStream::fread(void *buff, int size, int nitems)
354 {
355 // switching into binary mode for current position
356 int items_read;
357
358 // so we can continue to read afterwards
359 if (peeked_tokp)
360 {
361 cerr << "ERROR " << pos_description()
362 << " peeked into binary data" << endl;
363 return 0;
364 }
365
366 peeked_charp = FALSE;
367 peeked_tokp = FALSE;
368
369 switch (type)
370 {
371 case tst_none:
372 cerr << "EST_TokenStream unset" << endl;
373 return 0;
374 break;
375 case tst_file:
376 items_read = stdio_fread(buff,(size_t)size,(size_t)nitems,fp);
377 p_filepos += items_read*size;
378 return items_read;
379 case tst_pipe:
380 cerr << "EST_TokenStream fread pipe not yet supported" << endl;
381 return 0;
382 break;
383 case tst_istream:
384 cerr << "EST_TokenStream fread istream not yet supported" << endl;
385 return 0;
386 case tst_string:
387 if ((buffer_length-pos)/size < nitems)
388 items_read = (buffer_length-pos)/size;
389 else
390 items_read = nitems;
391 memcpy(buff,&buffer[pos],items_read*size);
392 pos += items_read*size;
393 return items_read;
394 default:
395 cerr << "EST_TokenStream: unknown type" << endl;
396 return EOF;
397 }
398
399 return 0; // can't get here
400
401 }
402
close(void)403 void EST_TokenStream::close(void)
404 {
405 // close any files (if they were used)
406
407 switch (type)
408 {
409 case tst_none:
410 break;
411 case tst_file:
412 if (close_at_end)
413 fclose(fp);
414 case tst_pipe:
415 // close(fd);
416 break;
417 case tst_istream:
418 break;
419 case tst_string:
420 delete [] buffer;
421 buffer = 0;
422 break;
423 default:
424 cerr << "EST_TokenStream: unknown type" << endl;
425 break;
426 }
427
428 type = tst_none;
429 peeked_charp = FALSE;
430 peeked_tokp = FALSE;
431
432 }
433
restart(void)434 int EST_TokenStream::restart(void)
435 {
436 // For paul, the only person I know who uses this
437
438 switch (type)
439 {
440 case tst_none:
441 break;
442 case tst_file:
443 fp = freopen(Origin,"rb",fp);
444 p_filepos = 0;
445 break;
446 case tst_pipe:
447 cerr << "EST_TokenStream: can't rewind pipe" << endl;
448 return -1;
449 break;
450 case tst_istream:
451 cerr << "EST_TokenStream: can't rewind istream" << endl;
452 break;
453 case tst_string:
454 pos = 0;
455 break;
456 default:
457 cerr << "EST_TokenStream: unknown type" << endl;
458 break;
459 }
460
461 linepos = 1;
462 peeked_charp = FALSE;
463 peeked_tokp = FALSE;
464 eof_flag = FALSE;
465
466 return 0;
467 }
468
operator >>(EST_Token & p)469 EST_TokenStream & EST_TokenStream::operator >>(EST_Token &p)
470 {
471 return get(p);
472 }
473
operator >>(EST_String & p)474 EST_TokenStream & EST_TokenStream::operator >>(EST_String &p)
475 {
476 EST_Token t;
477
478 get(t);
479 p = t.string();
480 return *this;
481 }
482
get(EST_Token & tok)483 EST_TokenStream &EST_TokenStream::get(EST_Token &tok)
484 {
485 tok = get();
486 return *this;
487 }
488
get_upto(const EST_String & s)489 EST_Token EST_TokenStream::get_upto(const EST_String &s)
490 {
491 // Returns a concatenated token form here to next symbol that matches s
492 // including s (though not adding s on the result)
493 // Not really for the purist but lots of times very handy
494 // Note this is not very efficient
495 EST_String result;
496 EST_Token t;
497
498 for (result=EST_String::Empty; (t=get()) != s; )
499 {
500 result += t.whitespace() + t.prepunctuation() +
501 t.string() + t.punctuation();
502 if (eof())
503 {
504 cerr << "EST_TokenStream: end of file when looking for \"" <<
505 s << "\"" << endl;
506 break;
507 }
508 }
509
510 return EST_Token(result);
511 }
512
get_upto_eoln(void)513 EST_Token EST_TokenStream::get_upto_eoln(void)
514 {
515 // Swallow the lot up to end of line
516 // assumes \n is a whitespace character
517
518 EST_String result(EST_String::Empty);
519
520 while (!eoln())
521 {
522 EST_Token &t=get();
523 result += t.whitespace() + t.prepunctuation();
524
525 if (quotes)
526 result += quote_string(t.string());
527 else
528 result += t.string();
529
530 result += t.punctuation();
531
532 if (eof())
533 {
534 // cerr << "EST_TokenStream: end of file when looking for end of line"
535 // << endl;
536 break;
537 }
538 }
539 // So that the next call works I have to step over the eoln condition
540 // That involves removing the whitespace upto and including the next
541 // \n in the peek token.
542
543 char *w = wstrdup(peek().whitespace());
544 int i;
545 for (i=0; w[i] != 0; i++)
546 if (w[i] == '\n') // maybe not portable
547 peek().set_whitespace(&w[i+1]);
548
549 wfree(w);
550
551 static EST_Token result_t;
552
553 result_t.set_token(result);
554
555 return result_t;
556 }
557
must_get(EST_String expected,bool * ok)558 EST_Token &EST_TokenStream::must_get(EST_String expected, bool *ok)
559 {
560 EST_Token &tok = get();
561
562 if (tok != expected)
563 {
564 if (ok != NULL)
565 {
566 *ok=FALSE;
567 return tok;
568 }
569 else
570 EST_error("Expected '%s' got '%s' at %s",
571 (const char *)expected,
572 (const char *)(EST_String)tok,
573 (const char *)pos_description());
574 }
575
576 if (ok != NULL)
577 *ok=TRUE;
578 return tok;
579 }
580
build_table()581 void EST_TokenStream::build_table()
582 {
583 int i;
584 const char *p;
585 unsigned char c;
586
587 for (i=0; i<256; ++i)
588 p_table[i]=0;
589
590 for (p=WhiteSpaceChars; *p; ++p)
591 if (p_table[c=(unsigned char)*p])
592 EST_warning("Character '%c' has two classes, '%c' and '%c'",
593 *p, c, ' ');
594 else
595 p_table[c] = ' ';
596
597 for (p=SingleCharSymbols; *p; ++p)
598 if (p_table[c=(unsigned char)*p])
599 EST_warning("Character '%c' has two classes, '%c' and '%c'",
600 *p, p_table[c], '!');
601 else
602 p_table[c] = '@';
603
604 for (p=PunctuationSymbols; *p; ++p)
605 if (p_table[c=(unsigned char)*p] == '@')
606 continue;
607 else if (p_table[c])
608 EST_warning("Character '%c' has two classes, '%c' and '%c'",
609 *p, p_table[c], '.');
610 else
611 p_table[c] = '.';
612
613 for(p=PrePunctuationSymbols; *p; ++p)
614 if (p_table[c=(unsigned char)*p] == '@')
615 continue;
616 else if (p_table[c] == '.')
617 p_table[c] = '"';
618 else if (p_table[c])
619 EST_warning("Character '%c' has two classes, '%c' and '%c'",
620 *p, p_table[c], '$');
621 else
622 p_table[c] = '$';
623
624 p_table_wrong=0;
625 }
626
getpeeked_internal(void)627 inline int EST_TokenStream::getpeeked_internal(void)
628 {
629 peeked_charp = FALSE;
630 return peeked_char;
631 }
632
633 inline
getch_internal()634 int EST_TokenStream::getch_internal()
635 {
636 // Return next character in stream
637 if (EST_TokenStream::peeked_charp)
638 {
639 return getpeeked_internal();
640 }
641
642 switch (type)
643 {
644 case tst_none:
645 cerr << "EST_TokenStream unset" << endl;
646 return EOF;
647 break;
648 case tst_file:
649 p_filepos++;
650 {
651 char lc;
652 if (stdio_fread(&lc,1,1,fp) == 0)
653 return EOF;
654 else
655 return (int)lc;
656 }
657 /* return getc(fp); */
658 case tst_pipe:
659 cerr << "EST_TokenStream pipe not yet supported" << endl;
660 return EOF;
661 break;
662 case tst_istream:
663 p_filepos++;
664 return is->get();
665 case tst_string:
666 if (pos < buffer_length)
667 {
668 p_filepos++;
669 return buffer[pos++];
670 }
671 else
672 return EOF;
673 default:
674 cerr << "EST_TokenStream: unknown type" << endl;
675 return EOF;
676 }
677
678 return EOF; // can't get here
679 }
680
getch(void)681 int EST_TokenStream::getch(void)
682 {
683 return getch_internal();
684 }
685
peekch_internal()686 inline int EST_TokenStream::peekch_internal()
687 {
688 // Return next character in stream (without reading it)
689
690 if (!peeked_charp)
691 peeked_char = getch_internal();
692 peeked_charp = TRUE;
693 return peeked_char;
694 }
695
696
peekch(void)697 int EST_TokenStream::peekch(void)
698 {
699 return peekch_internal();
700
701 }
702
703 #define CLASS(C,CL) (p_table[(unsigned char)(C)]==(CL))
704
705 #define CLASS2(C,CL1,CL2) (p_table[(unsigned char)(C)]==(CL1)||p_table[(unsigned char)(C)]==(CL2))
706
get(void)707 EST_Token &EST_TokenStream::get(void)
708 {
709 if (peeked_tokp)
710 {
711 peeked_tokp = FALSE;
712 return current_tok;
713 }
714
715 if (p_table_wrong)
716 build_table();
717
718 char *word;
719 int c,i,j;
720
721 for (i=0; (CLASS(c=getch_internal(),' ') &&
722 ( c != EOF )); i++)
723 {
724 if (c == '\n') linepos++;
725 tok_wspace = check_extend_str(tok_wspace,i,&tok_wspacelen);
726 tok_wspace[i] = c;
727 }
728 tok_wspace[i] = '\0';
729
730 current_tok.init();
731
732 if (c != EOF)
733 {
734 current_tok.set_filepos(p_filepos-1);
735
736 if ((quotes) && // quoted strings (with escapes) are allowed
737 (c == quote))
738 {
739 for (i=0;
740 ((c = getch_internal()) != EOF)
741 ;)
742 {
743 if (c == quote)
744 break;
745 tok_stuff = check_extend_str(tok_stuff,i,&tok_stufflen);
746 if (c == escape)
747 c = getch_internal();
748 tok_stuff[i++] = c;
749 }
750 current_tok.set_quoted(TRUE);
751 }
752 else // standard whitespace separated tokens
753 {
754 for (i=0,tok_stuff[i++]=c;
755 (
756 !CLASS(c,'@') &&
757 !CLASS(c=peekch_internal(),' ') &&
758 !CLASS(c,'@') &&
759 ( c != EOF )) ;)
760 {
761 tok_stuff = check_extend_str(tok_stuff,i,&tok_stufflen);
762 // note, we must have peeked to get here.
763 tok_stuff[i++] = getpeeked_internal();
764 }
765 }
766 tok_stuff[i] = '\0';
767 // Are there any punctuation symbols at the start?
768 for (j=0;
769 ((j < i) && CLASS2(tok_stuff[j], '$', '"'));
770 j++);
771 if ((j > 0) && (j < i)) // there are
772 {
773 tok_prepuncs = check_extend_str(tok_prepuncs,j+1,&tok_prepuncslen);
774 memmove(tok_prepuncs,tok_stuff,j);
775 tok_prepuncs[j] = '\0';
776 current_tok.set_prepunctuation(tok_prepuncs);
777 word=&tok_stuff[j];
778 i-=j; // reduce size by number of prepuncs
779 }
780 else
781 {
782 current_tok.set_prepunctuation(EST_String::Empty);
783 word = tok_stuff;
784 }
785 // Are there any punctuation symbols at the end
786 for (j=i-1;
787 ((j > 0) && CLASS2(word[j],'.','"'));
788 j--);
789 if (word[j+1] != '\0')
790 {
791 current_tok.set_punctuation(&word[j+1]);
792 word[j+1] = '\0';
793 }
794 else
795 current_tok.set_punctuation(EST_String::Empty);
796
797 current_tok.set_token(word);
798 if (tok_wspace[0] == '\0') // feature paths will have null whitespace
799 current_tok.set_whitespace(EST_String::Empty);
800 else
801 current_tok.set_whitespace(tok_wspace);
802 }
803 else
804 {
805 current_tok.set_token(EST_String::Empty);
806 current_tok.set_whitespace(tok_wspace);
807 current_tok.set_punctuation(EST_String::Empty);
808 current_tok.set_prepunctuation(EST_String::Empty);
809 eof_flag = TRUE;
810 }
811
812 return current_tok;
813 }
814
eoln(void)815 int EST_TokenStream::eoln(void)
816 {
817 // This doesn't really work if there are blank lines (and you want
818 // to know about them)
819
820 if ((peek().whitespace().contains("\n")) || eof())
821 return TRUE;
822 else
823 return FALSE;
824
825 }
826
quote_string(const EST_String & s,const EST_String & quote,const EST_String & escape,int force)827 EST_String quote_string(const EST_String &s,
828 const EST_String "e,
829 const EST_String &escape,
830 int force)
831 {
832 // Quotes s always if force true, or iff s contains whitespace,
833 // quotes or escapes force is false
834 // Note quote and escape are assumed to be string of length 1
835 EST_String quoted_form;
836 if ((force) ||
837 (s.contains(quote)) ||
838 (s.contains(escape)) ||
839 (s.contains(RXanywhitespace)) ||
840 (s.length() == 0))
841 {
842 // bigger than the quoted form could ever be
843 int i,j;
844 char *quoted = new char[s.length()*(quote.length()+escape.length())+
845 1+quote.length()+quote.length()];
846 quoted[0] = quote(0);
847 for (i=1,j=0; j < s.length(); j++,i++)
848 {
849 if (s(j) == quote(0))
850 quoted[i++] = escape(0);
851 else if (s(j) == escape(0))
852 quoted[i++] = escape(0);
853 quoted[i] = s(j);
854 }
855 quoted[i++] = quote(0);
856 quoted[i] = '\0';
857 quoted_form = quoted;
858 delete [] quoted;
859 return quoted_form;
860 }
861 else
862 return s;
863 }
864
pos_description()865 const EST_String EST_TokenStream::pos_description()
866 {
867 return Origin+":"+itoString(linepos);
868 }
869