1 #include <qpdf/QPDFTokenizer.hh>
2 
3 // DO NOT USE ctype -- it is locale dependent for some things, and
4 // it's not worth the risk of including it in case it may accidentally
5 // be used.
6 
7 #include <qpdf/QTC.hh>
8 #include <qpdf/QPDFExc.hh>
9 #include <qpdf/QUtil.hh>
10 #include <qpdf/QPDFObjectHandle.hh>
11 #include <qpdf/QIntC.hh>
12 
13 #include <stdexcept>
14 #include <stdlib.h>
15 #include <string.h>
16 
is_delimiter(char ch)17 static bool is_delimiter(char ch)
18 {
19     return (strchr(" \t\n\v\f\r()<>[]{}/%", ch) != 0);
20 }
21 
22 class QPDFWordTokenFinder: public InputSource::Finder
23 {
24   public:
QPDFWordTokenFinder(PointerHolder<InputSource> is,std::string const & str)25     QPDFWordTokenFinder(PointerHolder<InputSource> is,
26                         std::string const& str) :
27         is(is),
28         str(str)
29     {
30     }
~QPDFWordTokenFinder()31     virtual ~QPDFWordTokenFinder()
32     {
33     }
34     virtual bool check();
35 
36   private:
37     PointerHolder<InputSource> is;
38     std::string str;
39 };
40 
41 bool
check()42 QPDFWordTokenFinder::check()
43 {
44     // Find a word token matching the given string, preceded by a
45     // delimiter, and followed by a delimiter or EOF.
46     QPDFTokenizer tokenizer;
47     QPDFTokenizer::Token t = tokenizer.readToken(is, "finder", true);
48     qpdf_offset_t pos = is->tell();
49     if (! (t == QPDFTokenizer::Token(QPDFTokenizer::tt_word, str)))
50     {
51         QTC::TC("qpdf", "QPDFTokenizer finder found wrong word");
52         return false;
53     }
54     qpdf_offset_t token_start = is->getLastOffset();
55     char next;
56     bool next_okay = false;
57     if (is->read(&next, 1) == 0)
58     {
59         QTC::TC("qpdf", "QPDFTokenizer inline image at EOF");
60         next_okay = true;
61     }
62     else
63     {
64         next_okay = is_delimiter(next);
65     }
66     is->seek(pos, SEEK_SET);
67     if (! next_okay)
68     {
69         return false;
70     }
71     if (token_start == 0)
72     {
73         // Can't actually happen...we never start the search at the
74         // beginning of the input.
75         return false;
76     }
77     return true;
78 }
79 
Members()80 QPDFTokenizer::Members::Members() :
81     allow_eof(false),
82     include_ignorable(false)
83 {
84     reset();
85 }
86 
87 void
reset()88 QPDFTokenizer::Members::reset()
89 {
90     state = st_top;
91     type = tt_bad;
92     val = "";
93     raw_val = "";
94     error_message = "";
95     unread_char = false;
96     char_to_unread = '\0';
97     inline_image_bytes = 0;
98     string_depth = 0;
99     string_ignoring_newline = false;
100     last_char_was_bs = false;
101     last_char_was_cr = false;
102 }
103 
~Members()104 QPDFTokenizer::Members::~Members()
105 {
106 }
107 
Token(token_type_e type,std::string const & value)108 QPDFTokenizer::Token::Token(token_type_e type, std::string const& value) :
109     type(type),
110     value(value),
111     raw_value(value)
112 {
113     if (type == tt_string)
114     {
115         raw_value = QPDFObjectHandle::newString(value).unparse();
116     }
117     else if (type == tt_name)
118     {
119         raw_value = QPDFObjectHandle::newName(value).unparse();
120     }
121 }
122 
123 
124 
QPDFTokenizer()125 QPDFTokenizer::QPDFTokenizer() :
126     m(new Members())
127 {
128 }
129 
130 void
allowEOF()131 QPDFTokenizer::allowEOF()
132 {
133     this->m->allow_eof = true;
134 }
135 
136 void
includeIgnorable()137 QPDFTokenizer::includeIgnorable()
138 {
139     this->m->include_ignorable = true;
140 }
141 
142 bool
isSpace(char ch)143 QPDFTokenizer::isSpace(char ch)
144 {
145     return ((ch == '\0') || QUtil::is_space(ch));
146 }
147 
148 bool
isDelimiter(char ch)149 QPDFTokenizer::isDelimiter(char ch)
150 {
151     return is_delimiter(ch);
152 }
153 
154 void
resolveLiteral()155 QPDFTokenizer::resolveLiteral()
156 {
157     if ((this->m->val.length() > 0) && (this->m->val.at(0) == '/'))
158     {
159         this->m->type = tt_name;
160         // Deal with # in name token.  Note: '/' by itself is a
161         // valid name, so don't strip leading /.  That way we
162         // don't have to deal with the empty string as a name.
163         std::string nval = "/";
164         size_t len = this->m->val.length();
165         for (size_t i = 1; i < len; ++i)
166         {
167             char ch = this->m->val.at(i);
168             if (ch == '#')
169             {
170                 if ((i + 2 < len) &&
171                     QUtil::is_hex_digit(this->m->val.at(i+1)) &&
172                     QUtil::is_hex_digit(this->m->val.at(i+2)))
173                 {
174                     char num[3];
175                     num[0] = this->m->val.at(i+1);
176                     num[1] = this->m->val.at(i+2);
177                     num[2] = '\0';
178                     char ch2 = static_cast<char>(strtol(num, 0, 16));
179                     if (ch2 == '\0')
180                     {
181                         this->m->type = tt_bad;
182                         QTC::TC("qpdf", "QPDFTokenizer null in name");
183                         this->m->error_message =
184                             "null character not allowed in name token";
185                         nval += "#00";
186                     }
187                     else
188                     {
189                         nval.append(1, ch2);
190                     }
191                     i += 2;
192                 }
193                 else
194                 {
195                     QTC::TC("qpdf", "QPDFTokenizer bad name");
196                     this->m->error_message =
197                         "name with stray # will not work with PDF >= 1.2";
198                     // Use null to encode a bad # -- this is reversed
199                     // in QPDF_Name::normalizeName.
200                     nval += '\0';
201                 }
202             }
203             else
204             {
205                 nval.append(1, ch);
206             }
207         }
208         this->m->val = nval;
209     }
210     else if (QUtil::is_number(this->m->val.c_str()))
211     {
212         if (this->m->val.find('.') != std::string::npos)
213         {
214             this->m->type = tt_real;
215         }
216         else
217         {
218             this->m->type = tt_integer;
219         }
220     }
221     else if ((this->m->val == "true") || (this->m->val == "false"))
222     {
223         this->m->type = tt_bool;
224     }
225     else if (this->m->val == "null")
226     {
227         this->m->type = tt_null;
228     }
229     else
230     {
231         // I don't really know what it is, so leave it as tt_word.
232         // Lots of cases ($, #, etc.) other than actual words fall
233         // into this category, but that's okay at least for now.
234         this->m->type = tt_word;
235     }
236 }
237 
238 void
presentCharacter(char ch)239 QPDFTokenizer::presentCharacter(char ch)
240 {
241     if (this->m->state == st_token_ready)
242     {
243 	throw std::logic_error(
244 	    "INTERNAL ERROR: QPDF tokenizer presented character "
245 	    "while token is waiting");
246     }
247 
248     char orig_ch = ch;
249 
250     // State machine is implemented such that some characters may be
251     // handled more than once.  This happens whenever you have to use
252     // the character that caused a state change in the new state.
253 
254     bool handled = true;
255     if (this->m->state == st_top)
256     {
257 	// Note: we specifically do not use ctype here.  It is
258 	// locale-dependent.
259 	if (isSpace(ch))
260 	{
261             if (this->m->include_ignorable)
262             {
263                 this->m->state = st_in_space;
264                 this->m->val += ch;
265             }
266 	}
267 	else if (ch == '%')
268 	{
269 	    this->m->state = st_in_comment;
270             if (this->m->include_ignorable)
271             {
272                 this->m->val += ch;
273             }
274 	}
275 	else if (ch == '(')
276 	{
277 	    this->m->string_depth = 1;
278 	    this->m->string_ignoring_newline = false;
279 	    memset(this->m->bs_num_register, '\0',
280                    sizeof(this->m->bs_num_register));
281 	    this->m->last_char_was_bs = false;
282 	    this->m->last_char_was_cr = false;
283 	    this->m->state = st_in_string;
284 	}
285 	else if (ch == '<')
286 	{
287 	    this->m->state = st_lt;
288 	}
289 	else if (ch == '>')
290 	{
291 	    this->m->state = st_gt;
292 	}
293 	else
294 	{
295 	    this->m->val += ch;
296 	    if (ch == ')')
297 	    {
298 		this->m->type = tt_bad;
299 		QTC::TC("qpdf", "QPDFTokenizer bad )");
300 		this->m->error_message = "unexpected )";
301 		this->m->state = st_token_ready;
302 	    }
303 	    else if (ch == '[')
304 	    {
305 		this->m->type = tt_array_open;
306 		this->m->state = st_token_ready;
307 	    }
308 	    else if (ch == ']')
309 	    {
310 		this->m->type = tt_array_close;
311 		this->m->state = st_token_ready;
312 	    }
313 	    else if (ch == '{')
314 	    {
315 		this->m->type = tt_brace_open;
316 		this->m->state = st_token_ready;
317 	    }
318 	    else if (ch == '}')
319 	    {
320 		this->m->type = tt_brace_close;
321 		this->m->state = st_token_ready;
322 	    }
323 	    else
324 	    {
325 		this->m->state = st_literal;
326 	    }
327 	}
328     }
329     else if (this->m->state == st_in_space)
330     {
331         // We only enter this state if include_ignorable is true.
332         if (! isSpace(ch))
333         {
334 	    this->m->type = tt_space;
335 	    this->m->unread_char = true;
336 	    this->m->char_to_unread = ch;
337 	    this->m->state = st_token_ready;
338         }
339         else
340         {
341             this->m->val += ch;
342         }
343     }
344     else if (this->m->state == st_in_comment)
345     {
346 	if ((ch == '\r') || (ch == '\n'))
347         {
348             if (this->m->include_ignorable)
349             {
350                 this->m->type = tt_comment;
351                 this->m->unread_char = true;
352                 this->m->char_to_unread = ch;
353                 this->m->state = st_token_ready;
354             }
355             else
356             {
357                 this->m->state = st_top;
358             }
359         }
360         else if (this->m->include_ignorable)
361         {
362             this->m->val += ch;
363         }
364     }
365     else if (this->m->state == st_lt)
366     {
367 	if (ch == '<')
368 	{
369 	    this->m->val = "<<";
370 	    this->m->type = tt_dict_open;
371 	    this->m->state = st_token_ready;
372 	}
373 	else
374 	{
375 	    handled = false;
376 	    this->m->state = st_in_hexstring;
377 	}
378     }
379     else if (this->m->state == st_gt)
380     {
381 	if (ch == '>')
382 	{
383 	    this->m->val = ">>";
384 	    this->m->type = tt_dict_close;
385 	    this->m->state = st_token_ready;
386 	}
387 	else
388 	{
389 	    this->m->val = ">";
390 	    this->m->type = tt_bad;
391 	    QTC::TC("qpdf", "QPDFTokenizer bad >");
392 	    this->m->error_message = "unexpected >";
393 	    this->m->unread_char = true;
394 	    this->m->char_to_unread = ch;
395 	    this->m->state = st_token_ready;
396 	}
397     }
398     else if (this->m->state == st_in_string)
399     {
400 	if (this->m->string_ignoring_newline && (ch != '\n'))
401 	{
402 	    this->m->string_ignoring_newline = false;
403 	}
404 
405 	size_t bs_num_count = strlen(this->m->bs_num_register);
406 	bool ch_is_octal = ((ch >= '0') && (ch <= '7'));
407 	if ((bs_num_count == 3) || ((bs_num_count > 0) && (! ch_is_octal)))
408 	{
409 	    // We've accumulated \ddd.  PDF Spec says to ignore
410 	    // high-order overflow.
411 	    this->m->val += static_cast<char>(
412                 strtol(this->m->bs_num_register, 0, 8));
413 	    memset(this->m->bs_num_register, '\0',
414                    sizeof(this->m->bs_num_register));
415 	    bs_num_count = 0;
416 	}
417 
418 	if (this->m->string_ignoring_newline && (ch == '\n'))
419 	{
420 	    // ignore
421             this->m->string_ignoring_newline = false;
422 	}
423 	else if (ch_is_octal &&
424                  (this->m->last_char_was_bs || (bs_num_count > 0)))
425 	{
426 	    this->m->bs_num_register[bs_num_count++] = ch;
427 	}
428 	else if (this->m->last_char_was_bs)
429 	{
430 	    switch (ch)
431 	    {
432 	      case 'n':
433 		this->m->val += '\n';
434 		break;
435 
436 	      case 'r':
437 		this->m->val += '\r';
438 		break;
439 
440 	      case 't':
441 		this->m->val += '\t';
442 		break;
443 
444 	      case 'b':
445 		this->m->val += '\b';
446 		break;
447 
448 	      case 'f':
449 		this->m->val += '\f';
450 		break;
451 
452 	      case '\n':
453                 break;
454 
455 	      case '\r':
456 		this->m->string_ignoring_newline = true;
457 		break;
458 
459 	      default:
460 		// PDF spec says backslash is ignored before anything else
461 		this->m->val += ch;
462 		break;
463 	    }
464 	}
465 	else if (ch == '\\')
466 	{
467 	    // last_char_was_bs is set/cleared below as appropriate
468 	    if (bs_num_count)
469 	    {
470 		throw std::logic_error(
471 		    "INTERNAL ERROR: QPDFTokenizer: bs_num_count != 0 "
472 		    "when ch == '\\'");
473 	    }
474 	}
475 	else if (ch == '(')
476 	{
477 	    this->m->val += ch;
478 	    ++this->m->string_depth;
479 	}
480 	else if ((ch == ')') && (--this->m->string_depth == 0))
481 	{
482 	    this->m->type = tt_string;
483 	    this->m->state = st_token_ready;
484 	}
485         else if (ch == '\r')
486         {
487             // CR by itself is converted to LF
488             this->m->val += '\n';
489         }
490         else if (ch == '\n')
491         {
492             // CR LF is converted to LF
493             if (! this->m->last_char_was_cr)
494             {
495                 this->m->val += ch;
496             }
497         }
498 	else
499 	{
500 	    this->m->val += ch;
501 	}
502 
503         this->m->last_char_was_cr =
504             ((! this->m->string_ignoring_newline) && (ch == '\r'));
505 	this->m->last_char_was_bs =
506             ((! this->m->last_char_was_bs) && (ch == '\\'));
507     }
508     else if (this->m->state == st_literal)
509     {
510 	if (isDelimiter(ch))
511 	{
512 	    // A C-locale whitespace character or delimiter terminates
513 	    // token.  It is important to unread the whitespace
514 	    // character even though it is ignored since it may be the
515 	    // newline after a stream keyword.  Removing it here could
516 	    // make the stream-reading code break on some files,
517 	    // though not on any files in the test suite as of this
518 	    // writing.
519 
520 	    this->m->type = tt_word;
521 	    this->m->unread_char = true;
522 	    this->m->char_to_unread = ch;
523 	    this->m->state = st_token_ready;
524 	}
525 	else
526 	{
527 	    this->m->val += ch;
528 	}
529     }
530     else if (this->m->state == st_inline_image)
531     {
532         this->m->val += ch;
533         size_t len = this->m->val.length();
534         if (len == this->m->inline_image_bytes)
535         {
536             QTC::TC("qpdf", "QPDFTokenizer found EI by byte count");
537             this->m->type = tt_inline_image;
538             this->m->inline_image_bytes = 0;
539             this->m->state = st_token_ready;
540         }
541     }
542     else
543     {
544 	handled = false;
545     }
546 
547     if (handled)
548     {
549 	// okay
550     }
551     else if (this->m->state == st_in_hexstring)
552     {
553 	if (ch == '>')
554 	{
555 	    this->m->type = tt_string;
556 	    this->m->state = st_token_ready;
557 	    if (this->m->val.length() % 2)
558 	    {
559 		// PDF spec says odd hexstrings have implicit
560 		// trailing 0.
561 		this->m->val += '0';
562 	    }
563 	    char num[3];
564 	    num[2] = '\0';
565 	    std::string nval;
566 	    for (unsigned int i = 0; i < this->m->val.length(); i += 2)
567 	    {
568 		num[0] = this->m->val.at(i);
569 		num[1] = this->m->val.at(i+1);
570 		char nch = static_cast<char>(strtol(num, 0, 16));
571 		nval += nch;
572 	    }
573 	    this->m->val = nval;
574 	}
575 	else if (QUtil::is_hex_digit(ch))
576 	{
577 	    this->m->val += ch;
578 	}
579 	else if (isSpace(ch))
580 	{
581 	    // ignore
582 	}
583 	else
584 	{
585 	    this->m->type = tt_bad;
586 	    QTC::TC("qpdf", "QPDFTokenizer bad hexstring character");
587 	    this->m->error_message = std::string("invalid character (") +
588 		ch + ") in hexstring";
589 	    this->m->state = st_token_ready;
590 	}
591     }
592     else
593     {
594 	throw std::logic_error(
595 	    "INTERNAL ERROR: invalid state while reading token");
596     }
597 
598     if ((this->m->state == st_token_ready) && (this->m->type == tt_word))
599     {
600         resolveLiteral();
601     }
602 
603     if (! (betweenTokens() ||
604            ((this->m->state == st_token_ready) && this->m->unread_char)))
605     {
606 	this->m->raw_val += orig_ch;
607     }
608 }
609 
610 void
presentEOF()611 QPDFTokenizer::presentEOF()
612 {
613     if (this->m->state == st_literal)
614     {
615         QTC::TC("qpdf", "QPDFTokenizer EOF reading appendable token");
616         resolveLiteral();
617     }
618     else if ((this->m->include_ignorable) && (this->m->state == st_in_space))
619     {
620         this->m->type = tt_space;
621     }
622     else if ((this->m->include_ignorable) && (this->m->state == st_in_comment))
623     {
624         this->m->type = tt_comment;
625     }
626     else if (betweenTokens())
627     {
628         this->m->type = tt_eof;
629     }
630     else if (this->m->state != st_token_ready)
631     {
632         QTC::TC("qpdf", "QPDFTokenizer EOF reading token");
633         this->m->type = tt_bad;
634         this->m->error_message = "EOF while reading token";
635     }
636 
637     this->m->state = st_token_ready;
638 }
639 
640 void
expectInlineImage(PointerHolder<InputSource> input)641 QPDFTokenizer::expectInlineImage(PointerHolder<InputSource> input)
642 {
643     if (this->m->state != st_top)
644     {
645         throw std::logic_error("QPDFTokenizer::expectInlineImage called"
646                                " when tokenizer is in improper state");
647     }
648     findEI(input);
649     this->m->state = st_inline_image;
650 }
651 
652 void
findEI(PointerHolder<InputSource> input)653 QPDFTokenizer::findEI(PointerHolder<InputSource> input)
654 {
655     if (! input.getPointer())
656     {
657         return;
658     }
659 
660     qpdf_offset_t last_offset = input->getLastOffset();
661     qpdf_offset_t pos = input->tell();
662 
663     // Use QPDFWordTokenFinder to find EI surrounded by delimiters.
664     // Then read the next several tokens or up to EOF. If we find any
665     // suspicious-looking or tokens, this is probably still part of
666     // the image data, so keep looking for EI. Stop at the first EI
667     // that passes. If we get to the end without finding one, return
668     // the last EI we found. Store the number of bytes expected in the
669     // inline image including the EI and use that to break out of
670     // inline image, falling back to the old method if needed.
671 
672     bool okay = false;
673     bool first_try = true;
674     while (! okay)
675     {
676         QPDFWordTokenFinder f(input, "EI");
677         if (! input->findFirst("EI", input->tell(), 0, f))
678         {
679             break;
680         }
681         this->m->inline_image_bytes = QIntC::to_size(input->tell() - pos - 2);
682 
683         QPDFTokenizer check;
684         bool found_bad = false;
685         // Look at the next 10 tokens or up to EOF. The next inline
686         // image's image data would look like bad tokens, but there
687         // will always be at least 10 tokens between one inline
688         // image's EI and the next valid one's ID since width, height,
689         // bits per pixel, and color space are all required as well as
690         // a BI and ID. If we get 10 good tokens in a row or hit EOF,
691         // we can be pretty sure we've found the actual EI.
692         for (int i = 0; i < 10; ++i)
693         {
694             QPDFTokenizer::Token t =
695                 check.readToken(input, "checker", true);
696             token_type_e type = t.getType();
697             if (type == tt_eof)
698             {
699                 okay = true;
700             }
701             else if (type == tt_bad)
702             {
703                 found_bad = true;
704             }
705             else if (type == tt_word)
706             {
707                 // The qpdf tokenizer lumps alphabetic and otherwise
708                 // uncategorized characters into "words". We recognize
709                 // strings of alphabetic characters as potential valid
710                 // operators for purposes of telling whether we're in
711                 // valid content or not. It's not perfect, but it
712                 // should work more reliably than what we used to do,
713                 // which was already good enough for the vast majority
714                 // of files.
715                 bool found_alpha = false;
716                 bool found_non_printable = false;
717                 bool found_other = false;
718                 std::string value = t.getValue();
719                 for (std::string::iterator iter = value.begin();
720                      iter != value.end(); ++iter)
721                 {
722                     signed char ch = *iter;
723                     if (((ch >= 'a') && (ch <= 'z')) ||
724                         ((ch >= 'A') && (ch <= 'Z')) ||
725                         (ch == '*'))
726                     {
727                         // Treat '*' as alpha since there are valid
728                         // PDF operators that contain * along with
729                         // alphabetic characters.
730                         found_alpha = true;
731                     }
732                     else if ((ch < 32) && (! isSpace(ch)))
733                     {
734                         // ch is signed, so characters outside of
735                         // 7-bit will be < 0.
736                         found_non_printable = true;
737                         break;
738                     }
739                     else
740                     {
741                         found_other = true;
742                     }
743                 }
744                 if (found_non_printable || (found_alpha && found_other))
745                 {
746                     found_bad = true;
747                 }
748             }
749             if (okay || found_bad)
750             {
751                 break;
752             }
753         }
754         if (! found_bad)
755         {
756             okay = true;
757         }
758         if (! okay)
759         {
760             first_try = false;
761         }
762     }
763     if (okay && (! first_try))
764     {
765         QTC::TC("qpdf", "QPDFTokenizer found EI after more than one try");
766     }
767 
768     input->seek(pos, SEEK_SET);
769     input->setLastOffset(last_offset);
770 }
771 
772 bool
getToken(Token & token,bool & unread_char,char & ch)773 QPDFTokenizer::getToken(Token& token, bool& unread_char, char& ch)
774 {
775     bool ready = (this->m->state == st_token_ready);
776     unread_char = this->m->unread_char;
777     ch = this->m->char_to_unread;
778     if (ready)
779     {
780         if (this->m->type == tt_bad)
781         {
782             this->m->val = this->m->raw_val;
783         }
784 	token = Token(this->m->type, this->m->val,
785                       this->m->raw_val, this->m->error_message);
786 	this->m->reset();
787     }
788     return ready;
789 }
790 
791 bool
betweenTokens()792 QPDFTokenizer::betweenTokens()
793 {
794     return ((this->m->state == st_top) ||
795             ((! this->m->include_ignorable) &&
796              ((this->m->state == st_in_comment) ||
797               (this->m->state == st_in_space))));
798 }
799 
800 QPDFTokenizer::Token
readToken(PointerHolder<InputSource> input,std::string const & context,bool allow_bad,size_t max_len)801 QPDFTokenizer::readToken(PointerHolder<InputSource> input,
802                          std::string const& context,
803                          bool allow_bad,
804                          size_t max_len)
805 {
806     qpdf_offset_t offset = input->tell();
807     Token token;
808     bool unread_char;
809     char char_to_unread;
810     bool presented_eof = false;
811     while (! getToken(token, unread_char, char_to_unread))
812     {
813 	char ch;
814 	if (input->read(&ch, 1) == 0)
815 	{
816             if (! presented_eof)
817             {
818                 presentEOF();
819                 presented_eof = true;
820                 if ((this->m->type == tt_eof) && (! this->m->allow_eof))
821                 {
822                     // Nothing in the qpdf library calls readToken
823                     // without allowEOF anymore, so this case is not
824                     // exercised.
825                     this->m->type = tt_bad;
826                     this->m->error_message = "unexpected EOF";
827                     offset = input->getLastOffset();
828                 }
829             }
830             else
831             {
832                 throw std::logic_error(
833                     "getToken returned false after presenting EOF");
834             }
835 	}
836 	else
837 	{
838 	    presentCharacter(ch);
839 	    if (betweenTokens() && (input->getLastOffset() == offset))
840 	    {
841 		++offset;
842 	    }
843             if (max_len && (this->m->raw_val.length() >= max_len) &&
844                 (this->m->state != st_token_ready))
845             {
846                 // terminate this token now
847                 QTC::TC("qpdf", "QPDFTokenizer block long token");
848                 this->m->type = tt_bad;
849                 this->m->state = st_token_ready;
850                 this->m->error_message =
851                     "exceeded allowable length while reading token";
852             }
853 	}
854     }
855 
856     if (unread_char)
857     {
858 	input->unreadCh(char_to_unread);
859     }
860 
861     if (token.getType() != tt_eof)
862     {
863         input->setLastOffset(offset);
864     }
865 
866     if (token.getType() == tt_bad)
867     {
868         if (allow_bad)
869         {
870             QTC::TC("qpdf", "QPDFTokenizer allowing bad token");
871         }
872         else
873         {
874             throw QPDFExc(qpdf_e_damaged_pdf, input->getName(),
875                           context, offset, token.getErrorMessage());
876         }
877     }
878 
879     return token;
880 }
881