1 /**
2  * \file Parser.cpp
3  * This file is part of LyX, the document processor.
4  * Licence details can be found in the file COPYING.
5  *
6  * \author André Pönitz
7  *
8  * Full author contact details are available in file CREDITS.
9  */
10 
11 #include <config.h>
12 
13 #include "Encoding.h"
14 #include "Parser.h"
15 #include "support/lstrings.h"
16 #include "support/textutils.h"
17 
18 #include <iostream>
19 
20 using namespace std;
21 using namespace lyx::support;
22 
23 namespace lyx {
24 
25 namespace {
26 
27 /*!
28  * Translate a line ending to '\n'.
29  * \p c must have catcode catNewline, and it must be the last character read
30  * from \p is.
31  */
getNewline(iparserdocstream & is,char_type c)32 char_type getNewline(iparserdocstream & is, char_type c)
33 {
34 	// we have to handle 3 different line endings:
35 	// - UNIX (\n)
36 	// - MAC  (\r)
37 	// - DOS  (\r\n)
38 	if (c == '\r') {
39 		// MAC or DOS
40 		char_type wc;
41 		if (is.get(wc) && wc != '\n') {
42 			// MAC
43 			is.putback(wc);
44 		}
45 		return '\n';
46 	}
47 	// UNIX
48 	return c;
49 }
50 
51 } // namespace
52 
53 //
54 // Token
55 //
56 
operator <<(ostream & os,Token const & t)57 ostream & operator<<(ostream & os, Token const & t)
58 {
59 	if (t.cat() == catComment)
60 		os << '%' << t.cs() << '\n';
61 	else if (t.cat() == catSpace)
62 		os << t.cs();
63 	else if (t.cat() == catEscape)
64 		os << '\\' << t.cs() << ' ';
65 	else if (t.cat() == catLetter)
66 		os << t.cs();
67 	else if (t.cat() == catNewline)
68 		os << "[" << t.cs().size() << "\\n," << t.cat() << "]\n";
69 	else
70 		os << '[' << t.cs() << ',' << t.cat() << ']';
71 	return os;
72 }
73 
74 
asInput() const75 string Token::asInput() const
76 {
77 	if (cat_ == catComment)
78 		return '%' + cs_ + '\n';
79 	if (cat_ == catEscape)
80 		return '\\' + cs_;
81 	return cs_;
82 }
83 
84 
isAlnumASCII() const85 bool Token::isAlnumASCII() const
86 {
87 	return cat_ == catLetter ||
88 	       (cat_ == catOther && cs_.length() == 1 && isDigitASCII(cs_[0]));
89 }
90 
91 
92 #ifdef FILEDEBUG
debugToken(std::ostream & os,Token const & t,unsigned int flags)93 void debugToken(std::ostream & os, Token const & t, unsigned int flags)
94 {
95 	char sep = ' ';
96 	os << "t: " << t << " flags: " << flags;
97 	if (flags & FLAG_BRACE_LAST) { os << sep << "BRACE_LAST"; sep = '|'; }
98 	if (flags & FLAG_RIGHT     ) { os << sep << "RIGHT"     ; sep = '|'; }
99 	if (flags & FLAG_END       ) { os << sep << "END"       ; sep = '|'; }
100 	if (flags & FLAG_BRACK_LAST) { os << sep << "BRACK_LAST"; sep = '|'; }
101 	if (flags & FLAG_TEXTMODE  ) { os << sep << "TEXTMODE"  ; sep = '|'; }
102 	if (flags & FLAG_ITEM      ) { os << sep << "ITEM"      ; sep = '|'; }
103 	if (flags & FLAG_LEAVE     ) { os << sep << "LEAVE"     ; sep = '|'; }
104 	if (flags & FLAG_SIMPLE    ) { os << sep << "SIMPLE"    ; sep = '|'; }
105 	if (flags & FLAG_EQUATION  ) { os << sep << "EQUATION"  ; sep = '|'; }
106 	if (flags & FLAG_SIMPLE2   ) { os << sep << "SIMPLE2"   ; sep = '|'; }
107 	if (flags & FLAG_OPTION    ) { os << sep << "OPTION"    ; sep = '|'; }
108 	if (flags & FLAG_BRACED    ) { os << sep << "BRACED"    ; sep = '|'; }
109 	if (flags & FLAG_CELL      ) { os << sep << "CELL"      ; sep = '|'; }
110 	if (flags & FLAG_TABBING   ) { os << sep << "TABBING"   ; sep = '|'; }
111 	os << "\n";
112 }
113 #endif
114 
115 
116 //
117 // Wrapper
118 //
119 
setEncoding(std::string const & e)120 void iparserdocstream::setEncoding(std::string const & e)
121 {
122 	is_ << lyx::setEncoding(e);
123 }
124 
125 
putback(char_type c)126 void iparserdocstream::putback(char_type c)
127 {
128 	s_ = c + s_;
129 }
130 
131 
putback(docstring s)132 void iparserdocstream::putback(docstring s)
133 {
134 	s_ = s + s_;
135 }
136 
137 
get(char_type & c)138 iparserdocstream & iparserdocstream::get(char_type &c)
139 {
140 	if (s_.empty())
141 		is_.get(c);
142 	else {
143 		//cerr << "unparsed: " << to_utf8(s_) <<endl;
144 		c = s_[0];
145 		s_.erase(0,1);
146 	}
147 	return *this;
148 }
149 
150 
151 //
152 // Parser
153 //
154 
155 
Parser(idocstream & is,std::string const & fixedenc)156 Parser::Parser(idocstream & is, std::string const & fixedenc)
157 	: lineno_(0), pos_(0), iss_(0), is_(is),
158 	  encoding_iconv_(fixedenc.empty() ? "UTF-8" : fixedenc),
159 	  theCatcodesType_(NORMAL_CATCODES), curr_cat_(UNDECIDED_CATCODES),
160 	  fixed_enc_(!fixedenc.empty())
161 {
162 	if (fixed_enc_)
163 		is_.setEncoding(fixedenc);
164 	catInit();
165 }
166 
167 
Parser(string const & s)168 Parser::Parser(string const & s)
169 	: lineno_(0), pos_(0),
170 	  iss_(new idocstringstream(from_utf8(s))), is_(*iss_),
171 	  encoding_iconv_("UTF-8"),
172 	  theCatcodesType_(NORMAL_CATCODES), curr_cat_(UNDECIDED_CATCODES),
173 	  // An idocstringstream can not change the encoding
174 	  fixed_enc_(true)
175 {
176 	catInit();
177 }
178 
179 
~Parser()180 Parser::~Parser()
181 {
182 	delete iss_;
183 }
184 
185 
deparse()186 void Parser::deparse()
187 {
188 	string s;
189 	for(size_type i = pos_ ; i < tokens_.size() ; ++i) {
190 		s += tokens_[i].asInput();
191 	}
192 	is_.putback(from_utf8(s));
193 	tokens_.erase(tokens_.begin() + pos_, tokens_.end());
194 	// make sure that next token is read
195 	tokenize_one();
196 }
197 
198 
setEncoding(std::string const & e,int const & p)199 bool Parser::setEncoding(std::string const & e, int const & p)
200 {
201 	// We may (and need to) use unsafe encodings here: Since the text is
202 	// converted to unicode while reading from is_, we never see text in
203 	// the original encoding of the parser, but operate on utf8 strings
204 	// instead. Therefore, we cannot misparse high bytes as {, } or \\.
205 	Encoding const * const enc = encodings.fromLaTeXName(e, p, true);
206 	if (!enc) {
207 		cerr << "Unknown encoding " << e << ". Ignoring." << std::endl;
208 		return false;
209 	}
210 	return setEncoding(enc->iconvName());
211 }
212 
213 
catInit()214 void Parser::catInit()
215 {
216 	if (curr_cat_ == theCatcodesType_)
217 		return;
218 	curr_cat_ = theCatcodesType_;
219 
220 	fill(theCatcode_, theCatcode_ + 256, catOther);
221 	fill(theCatcode_ + 'a', theCatcode_ + 'z' + 1, catLetter);
222 	fill(theCatcode_ + 'A', theCatcode_ + 'Z' + 1, catLetter);
223 	// This is wrong!
224 	theCatcode_[int('@')]  = catLetter;
225 
226 	if (theCatcodesType_ == NORMAL_CATCODES) {
227 		theCatcode_[int('\\')] = catEscape;
228 		theCatcode_[int('{')]  = catBegin;
229 		theCatcode_[int('}')]  = catEnd;
230 		theCatcode_[int('$')]  = catMath;
231 		theCatcode_[int('&')]  = catAlign;
232 		theCatcode_[int('\n')] = catNewline;
233 		theCatcode_[int('#')]  = catParameter;
234 		theCatcode_[int('^')]  = catSuper;
235 		theCatcode_[int('_')]  = catSub;
236 		theCatcode_[0x7f]      = catIgnore;
237 		theCatcode_[int(' ')]  = catSpace;
238 		theCatcode_[int('\t')] = catSpace;
239 		theCatcode_[int('\r')] = catNewline;
240 		theCatcode_[int('~')]  = catActive;
241 		theCatcode_[int('%')]  = catComment;
242 	}
243 }
244 
catcode(char_type c) const245 CatCode Parser::catcode(char_type c) const
246 {
247 	if (c < 256)
248 		return theCatcode_[(unsigned char)c];
249 	return catOther;
250 }
251 
252 
setCatcode(char c,CatCode cat)253 void Parser::setCatcode(char c, CatCode cat)
254 {
255 	theCatcode_[(unsigned char)c] = cat;
256 	deparse();
257 }
258 
259 
setCatcodes(cat_type t)260 void Parser::setCatcodes(cat_type t)
261 {
262 	theCatcodesType_ = t;
263 	deparse();
264 }
265 
266 
setEncoding(std::string const & e)267 bool Parser::setEncoding(std::string const & e)
268 {
269 	//cerr << "setting encoding to " << e << std::endl;
270 	encoding_iconv_ = e;
271 	// If the encoding is fixed, we must not change the stream encoding
272 	// (because the whole input uses that encoding, e.g. if it comes from
273 	// the clipboard). We still need to track the original encoding in
274 	// encoding_iconv_, so that the generated output is correct.
275 	if (!fixed_enc_)
276 		is_.setEncoding(e);
277 	return true;
278 }
279 
280 
push_back(Token const & t)281 void Parser::push_back(Token const & t)
282 {
283 	tokens_.push_back(t);
284 }
285 
286 
287 // We return a copy here because the tokens_ vector may get reallocated
prev_token() const288 Token const Parser::prev_token() const
289 {
290 	static const Token dummy;
291 	return pos_ > 1 ? tokens_[pos_ - 2] : dummy;
292 }
293 
294 
295 // We return a copy here because the tokens_ vector may get reallocated
curr_token() const296 Token const Parser::curr_token() const
297 {
298 	static const Token dummy;
299 	return pos_ > 0 ? tokens_[pos_ - 1] : dummy;
300 }
301 
302 
303 // We return a copy here because the tokens_ vector may get reallocated
next_token()304 Token const Parser::next_token()
305 {
306 	static const Token dummy;
307 	if (!good())
308 		return dummy;
309 	if (pos_ >= tokens_.size())
310 		tokenize_one();
311 	return pos_ < tokens_.size() ? tokens_[pos_] : dummy;
312 }
313 
314 
315 // We return a copy here because the tokens_ vector may get reallocated
next_next_token()316 Token const Parser::next_next_token()
317 {
318 	static const Token dummy;
319 	if (!good())
320 		return dummy;
321 	// If tokenize_one() has not been called after the last get_token() we
322 	// need to tokenize two more tokens.
323 	if (pos_ >= tokens_.size())
324 		tokenize_one();
325 	if (pos_ + 1 >= tokens_.size())
326 		tokenize_one();
327 	return pos_ + 1 < tokens_.size() ? tokens_[pos_ + 1] : dummy;
328 }
329 
330 
331 // We return a copy here because the tokens_ vector may get reallocated
get_token()332 Token const Parser::get_token()
333 {
334 	static const Token dummy;
335 	if (!good())
336 		return dummy;
337 	if (pos_ >= tokens_.size()) {
338 		tokenize_one();
339 		if (pos_ >= tokens_.size())
340 			return dummy;
341 	}
342 	// cerr << "looking at token " << tokens_[pos_]
343 	//      << " pos: " << pos_ << '\n';
344 	return tokens_[pos_++];
345 }
346 
347 
isParagraph()348 bool Parser::isParagraph()
349 {
350 	// A new paragraph in TeX ist started
351 	// - either by a newline, following any amount of whitespace
352 	//   characters (including zero), and another newline
353 	// - or the token \par
354 	if (curr_token().cat() == catNewline &&
355 	    (curr_token().cs().size() > 1 ||
356 	     (next_token().cat() == catSpace &&
357 	      next_next_token().cat() == catNewline)))
358 		return true;
359 	if (curr_token().cat() == catEscape && curr_token().cs() == "par")
360 		return true;
361 	return false;
362 }
363 
364 
skip_spaces(bool skip_comments)365 bool Parser::skip_spaces(bool skip_comments)
366 {
367 	// We just silently return if we have no more tokens.
368 	// skip_spaces() should be callable at any time,
369 	// the caller must check p::good() anyway.
370 	bool skipped = false;
371 	while (good()) {
372 		get_token();
373 		if (isParagraph()) {
374 			putback();
375 			break;
376 		}
377 		if (curr_token().cat() == catSpace ||
378 		    curr_token().cat() == catNewline) {
379 			skipped = true;
380 			continue;
381 		}
382 		if ((curr_token().cat() == catComment && curr_token().cs().empty()))
383 			continue;
384 		if (skip_comments && curr_token().cat() == catComment) {
385 			// If positions_ is not empty we are doing some kind
386 			// of look ahead
387 			if (!positions_.empty())
388 				cerr << "  Ignoring comment: "
389 				     << curr_token().asInput();
390 		} else {
391 			putback();
392 			break;
393 		}
394 	}
395 	return skipped;
396 }
397 
398 
unskip_spaces(bool skip_comments)399 void Parser::unskip_spaces(bool skip_comments)
400 {
401 	while (pos_ > 0) {
402 		if ( curr_token().cat() == catSpace ||
403 		    (curr_token().cat() == catNewline && curr_token().cs().size() == 1))
404 			putback();
405 		else if (skip_comments && curr_token().cat() == catComment) {
406 			// TODO: Get rid of this
407 			// If positions_ is not empty we are doing some kind
408 			// of look ahead
409 			if (!positions_.empty())
410 				cerr << "Unignoring comment: "
411 				     << curr_token().asInput();
412 			putback();
413 		}
414 		else
415 			break;
416 	}
417 }
418 
419 
putback()420 void Parser::putback()
421 {
422 	--pos_;
423 }
424 
425 
pushPosition()426 void Parser::pushPosition()
427 {
428 	positions_.push_back(pos_);
429 }
430 
431 
popPosition()432 void Parser::popPosition()
433 {
434 	pos_ = positions_.back();
435 	positions_.pop_back();
436 	deparse();
437 }
438 
439 
dropPosition()440 void Parser::dropPosition()
441 {
442 	positions_.pop_back();
443 }
444 
445 
good()446 bool Parser::good()
447 {
448 	if (pos_ < tokens_.size())
449 		return true;
450 	if (!is_.good())
451 		return false;
452 	return is_.peek() != idocstream::traits_type::eof();
453 }
454 
455 
hasOpt(string const l)456 bool Parser::hasOpt(string const l)
457 {
458 	// An optional argument can occur in any of the following forms:
459 	// - \foo[bar]
460 	// - \foo [bar]
461 	// - \foo
462 	//   [bar]
463 	// - \foo %comment
464 	//   [bar]
465 
466 	// remember current position
467 	unsigned int oldpos = pos_;
468 	// skip spaces and comments
469 	while (good()) {
470 		get_token();
471 		if (isParagraph()) {
472 			putback();
473 			break;
474 		}
475 		if (curr_token().cat() == catSpace ||
476 		    curr_token().cat() == catNewline ||
477 		    curr_token().cat() == catComment)
478 			continue;
479 		putback();
480 		break;
481 	}
482 	bool const retval = (next_token().asInput() == l);
483 	pos_ = oldpos;
484 	return retval;
485 }
486 
487 
getFullArg(char left,char right,bool allow_escaping)488 Parser::Arg Parser::getFullArg(char left, char right, bool allow_escaping)
489 {
490 	skip_spaces(true);
491 
492 	// This is needed if a partial file ends with a command without arguments,
493 	// e. g. \medskip
494 	if (! good())
495 		return make_pair(false, string());
496 
497 	int group_level = 0;
498 	string result;
499 	Token t = get_token();
500 
501 	if (t.cat() == catComment || t.cat() == catEscape ||
502 	    t.character() != left) {
503 		putback();
504 		return make_pair(false, string());
505 	} else {
506 		while (good()) {
507 			t = get_token();
508 			// honor grouping
509 			if (left != '{' && t.cat() == catBegin) {
510 				++group_level;
511 				continue;
512 			}
513 			if (left != '{' && t.cat() == catEnd) {
514 				--group_level;
515 				continue;
516 			}
517 			// Ignore comments
518 			if (t.cat() == catComment) {
519 				if (!t.cs().empty())
520 					cerr << "Ignoring comment: " << t.asInput();
521 				continue;
522 			}
523 			if (allow_escaping) {
524 				if (t.cat() != catEscape && t.character() == right
525 				    && group_level == 0)
526 					break;
527 			} else {
528 				if (t.character() == right) {
529 					if (t.cat() == catEscape)
530 						result += '\\';
531 					if (group_level == 0)
532 						break;
533 				}
534 			}
535 			result += t.asInput();
536 		}
537 	}
538 	return make_pair(true, result);
539 }
540 
541 
getArg(char left,char right,bool allow_escaping)542 string Parser::getArg(char left, char right, bool allow_escaping)
543 {
544 	return getFullArg(left, right, allow_escaping).second;
545 }
546 
547 
getFullOpt(bool keepws,char left,char right)548 string Parser::getFullOpt(bool keepws, char left, char right)
549 {
550 	Arg arg = getFullArg(left, right);
551 	if (arg.first)
552 		return left + arg.second + right;
553 	if (keepws)
554 		unskip_spaces(true);
555 	return string();
556 }
557 
558 
getOpt(bool keepws)559 string Parser::getOpt(bool keepws)
560 {
561 	string const res = getArg('[', ']');
562 	if (res.empty()) {
563 		if (keepws)
564 			unskip_spaces(true);
565 		return string();
566 	}
567 	return '[' + res + ']';
568 }
569 
570 
getFullParentheseArg()571 string Parser::getFullParentheseArg()
572 {
573 	Arg arg = getFullArg('(', ')');
574 	if (arg.first)
575 		return '(' + arg.second + ')';
576 	return string();
577 }
578 
579 
ertEnvironment(string const & name)580 string const Parser::ertEnvironment(string const & name)
581 {
582 	if (!good())
583 		return string();
584 
585 	ostringstream os;
586 	for (Token t = get_token(); good(); t = get_token()) {
587 		if (t.cat() == catBegin) {
588 			putback();
589 			os << '{' << verbatim_item() << '}';
590 		} else if (t.asInput() == "\\begin") {
591 			string const env = getArg('{', '}');
592 			os << "\\begin{" << env << '}'
593 			   << ertEnvironment(env)
594 			   << "\\end{" << env << '}';
595 		} else if (t.asInput() == "\\end") {
596 			string const end = getArg('{', '}');
597 			if (end != name)
598 				cerr << "\\end{" << end
599 				     << "} does not match \\begin{" << name
600 				     << "}." << endl;
601 			return os.str();
602 		} else
603 			os << t.asInput();
604 	}
605 	cerr << "unexpected end of input" << endl;
606 	return os.str();
607 }
608 
609 
plainEnvironment(string const & name)610 string const Parser::plainEnvironment(string const & name)
611 {
612 	if (!good())
613 		return string();
614 
615 	ostringstream os;
616 	for (Token t = get_token(); good(); t = get_token()) {
617 		if (t.asInput() == "\\end") {
618 			string const end = getArg('{', '}');
619 			if (end == name)
620 				return os.str();
621 			else
622 				os << "\\end{" << end << '}';
623 		} else
624 			os << t.asInput();
625 	}
626 	cerr << "unexpected end of input" << endl;
627 	return os.str();
628 }
629 
630 
plainCommand(char left,char right,string const & name)631 string const Parser::plainCommand(char left, char right, string const & name)
632 {
633 	if (!good())
634 		return string();
635 	// check if first token is really the start character
636 	Token tok = get_token();
637 	if (tok.character() != left) {
638 		cerr << "first character does not match start character of command \\" << name << endl;
639 		return string();
640 	}
641 	ostringstream os;
642 	for (Token t = get_token(); good(); t = get_token()) {
643 		if (t.character() == right) {
644 			return os.str();
645 		} else
646 			os << t.asInput();
647 	}
648 	cerr << "unexpected end of input" << endl;
649 	return os.str();
650 }
651 
652 
getCommandLatexParam()653 string const Parser::getCommandLatexParam()
654 {
655 	if (!good())
656 		return string();
657 	string res;
658 	size_t offset = 0;
659 	while (true) {
660 		if (pos_ + offset >= tokens_.size())
661 			tokenize_one();
662 		if (pos_ + offset >= tokens_.size())
663 			break;
664 		Token t = tokens_[pos_ + offset];
665 		if (t.cat() == catBegin)
666 			break;
667 		res += t.asInput();
668 		++offset;
669 	}
670 	return res;
671 }
672 
673 
verbatimStuff(string const & end_string,bool const allow_linebreak)674 Parser::Arg Parser::verbatimStuff(string const & end_string, bool const allow_linebreak)
675 {
676 	if (!good())
677 		return Arg(false, string());
678 
679 	pushPosition();
680 	ostringstream oss;
681 	size_t match_index = 0;
682 	setCatcodes(VERBATIM_CATCODES);
683 	for (Token t = get_token(); good(); t = get_token()) {
684 		// FIXME t.asInput() might be longer than we need ?
685 		if (t.asInput() == end_string.substr(match_index,
686 						     t.asInput().length())) {
687 			match_index += t.asInput().length();
688 			if (match_index >= end_string.length())
689 				break;
690 		} else {
691 			if (!allow_linebreak && t.asInput() == "\n") {
692 				cerr << "unexpected end of input" << endl;
693 				popPosition();
694 				setCatcodes(NORMAL_CATCODES);
695 				return Arg(false, string());
696 			}
697 			if (match_index) {
698 				oss << end_string.substr(0, match_index)
699 				    << t.asInput();
700 				match_index = 0;
701 			} else
702 				oss << t.asInput();
703 		}
704 	}
705 
706 	if (!good()) {
707 		cerr << "unexpected end of input" << endl;
708 		popPosition();
709 		setCatcodes(NORMAL_CATCODES);
710 		return Arg(false, string());
711 	}
712 	setCatcodes(NORMAL_CATCODES);
713 	dropPosition();
714 	return Arg(true, oss.str());
715 }
716 
717 
verbatimEnvironment(string const & name)718 string const Parser::verbatimEnvironment(string const & name)
719 {
720 	//FIXME: do something if endstring is not found
721 	string s = verbatimStuff("\\end{" + name + "}").second;
722 	// ignore one newline at beginning or end of string
723 	if (prefixIs(s, "\n"))
724 		s.erase(0,1);
725 	if (suffixIs(s, "\n"))
726 		s.erase(s.length() - 1,1);
727 	return s;
728 }
729 
730 
verbatimOption()731 string Parser::verbatimOption()
732 {
733 	string res;
734 	if (next_token().character() == '[') {
735 		Token t = get_token();
736 		for (t = get_token(); t.character() != ']' && good(); t = get_token()) {
737 			if (t.cat() == catBegin) {
738 				putback();
739 				res += '{' + verbatim_item() + '}';
740 			} else
741 				res += t.asInput();
742 		}
743 	}
744 	return res;
745 }
746 
747 
verbatim_item()748 string Parser::verbatim_item()
749 {
750 	if (!good())
751 		error("stream bad");
752 	skip_spaces();
753 	if (next_token().cat() == catBegin) {
754 		Token t = get_token(); // skip brace
755 		string res;
756 		for (Token t = get_token(); t.cat() != catEnd && good(); t = get_token()) {
757 			if (t.cat() == catBegin) {
758 				putback();
759 				res += '{' + verbatim_item() + '}';
760 			}
761 			else
762 				res += t.asInput();
763 		}
764 		return res;
765 	}
766 	return get_token().asInput();
767 }
768 
769 
tokenize_one()770 void Parser::tokenize_one()
771 {
772 	catInit();
773 	char_type c;
774 	if (!is_.get(c))
775 		return;
776 
777 	switch (catcode(c)) {
778 	case catSpace: {
779 		docstring s(1, c);
780 		while (is_.get(c) && catcode(c) == catSpace)
781 			s += c;
782 		if (catcode(c) != catSpace)
783 			is_.putback(c);
784 		push_back(Token(s, catSpace));
785 		break;
786 	}
787 
788 	case catNewline: {
789 		++lineno_;
790 		docstring s(1, getNewline(is_, c));
791 		while (is_.get(c) && catcode(c) == catNewline) {
792 			++lineno_;
793 			s += getNewline(is_, c);
794 		}
795 		if (catcode(c) != catNewline)
796 			is_.putback(c);
797 		push_back(Token(s, catNewline));
798 		break;
799 	}
800 
801 	case catComment: {
802 		// We don't treat "%\n" combinations here specially because
803 		// we want to preserve them in the preamble
804 		docstring s;
805 		while (is_.get(c) && catcode(c) != catNewline)
806 			s += c;
807 		// handle possible DOS line ending
808 		if (catcode(c) == catNewline)
809 			c = getNewline(is_, c);
810 		// Note: The '%' at the beginning and the '\n' at the end
811 		// of the comment are not stored.
812 		++lineno_;
813 		push_back(Token(s, catComment));
814 		break;
815 	}
816 
817 	case catEscape: {
818 		is_.get(c);
819 		if (!is_) {
820 			error("unexpected end of input");
821 		} else {
822 			docstring s(1, c);
823 			if (catcode(c) == catLetter) {
824 				// collect letters
825 				while (is_.get(c) && catcode(c) == catLetter)
826 					s += c;
827 				if (catcode(c) != catLetter)
828 					is_.putback(c);
829 			}
830 			push_back(Token(s, catEscape));
831 		}
832 		break;
833 	}
834 
835 	case catIgnore: {
836 		cerr << "ignoring a char: " << c << "\n";
837 		break;
838 	}
839 
840 	default:
841 		push_back(Token(docstring(1, c), catcode(c)));
842 	}
843 	//cerr << tokens_.back();
844 }
845 
846 
dump() const847 void Parser::dump() const
848 {
849 	cerr << "\nTokens: ";
850 	for (unsigned i = 0; i < tokens_.size(); ++i) {
851 		if (i == pos_)
852 			cerr << " <#> ";
853 		cerr << tokens_[i];
854 	}
855 	cerr << " pos: " << pos_ << "\n";
856 }
857 
858 
error(string const & msg)859 void Parser::error(string const & msg)
860 {
861 	cerr << "Line ~" << lineno_ << ":  parse error: " << msg << endl;
862 	dump();
863 	//exit(1);
864 }
865 
866 
reset()867 void Parser::reset()
868 {
869 	pos_ = 0;
870 }
871 
872 
873 } // namespace lyx
874