1 // ==========================================================================
2 //                 SeqAn - The Library for Sequence Analysis
3 // ==========================================================================
4 // Copyright (c) 2006-2010, Knut Reinert, FU Berlin
5 // All rights reserved.
6 //
7 // Redistribution and use in source and binary forms, with or without
8 // modification, are permitted provided that the following conditions are met:
9 //
10 //     * Redistributions of source code must retain the above copyright
11 //       notice, this list of conditions and the following disclaimer.
12 //     * Redistributions in binary form must reproduce the above copyright
13 //       notice, this list of conditions and the following disclaimer in the
14 //       documentation and/or other materials provided with the distribution.
15 //     * Neither the name of Knut Reinert or the FU Berlin nor the names of
16 //       its contributors may be used to endorse or promote products derived
17 //       from this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 // ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE
23 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 // OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
29 // DAMAGE.
30 //
31 // ==========================================================================
32 
33 //SEQAN_NO_GENERATED_FORWARDS: no forwards are generated for this file
34 
35 #ifndef SEQAN_HEADER_MISC_PARSING_H
36 #define SEQAN_HEADER_MISC_PARSING_H
37 
38 #include <cmath>
39 
40 
41 //////////////////////////////////////////////////////////////////////////////
42 
43 namespace SEQAN_NAMESPACE_MAIN
44 {
45 
46 //////////////////////////////////////////////////////////////////////////////
47 // General parsing funtions
48 //////////////////////////////////////////////////////////////////////////////
49 
50 //////////////////////////////////////////////////////////////////////////////
51 
52 
53 //////////////////////////////////////////////////////////////////////////////
54 
55 template<typename TFile, typename TChar>
56 inline void
_parseSkipLine(TFile & file,TChar & c)57 _parseSkipLine(TFile& file, TChar& c)
58 {
59 	if (c == '\n') {
60 		c = _streamGet(file);
61 		return;
62 	}
63 	while (!_streamEOF(file)) {
64 		c = _streamGet(file);
65 		if (c == '\n') break;
66 	}
67 	c = _streamGet(file);
68 }
69 //////////////////////////////////////////////////////////////////////////////
70 
71 
72 template<typename TFile, typename TChar>
73 inline void
_parseSkipWhitespace(TFile & file,TChar & c)74 _parseSkipWhitespace(TFile& file, TChar& c)
75 {
76 	if ((unsigned) c > 32) return;
77 	while (!_streamEOF(file)) {
78 		c = _streamGet(file);
79 		if ((unsigned) c > 32) break;
80 	}
81 }
82 
83 template<typename TFile, typename TChar>
84 inline void
_parseSkipSpace(TFile & file,TChar & c)85 _parseSkipSpace(TFile& file, TChar& c)
86 {
87 	if (c != '\t' && c != ' ') return;
88 	while (!_streamEOF(file)) {
89 		c = _streamGet(file);
90 		if (c != '\t' && c != ' ') break;
91 	}
92 }
93 
94 
95 /**
96 .Internal._parseSkipUntilChar:
97 ..summary:Skip to the next ocurrence of x in file.
98 ..cat:Miscenalleous
99 ..signature:_parseSkipUntilChar(file, x, c)
100 ..param.file:The file to read from.
101 ..param.x:The character to skip to.
102 ..param.c:Parser state character.
103  */
104 template<typename TFile, typename TChar>
105 inline void
_parseSkipUntilChar(TFile & file,const TChar & x,TChar & c)106 _parseSkipUntilChar(TFile& file, const TChar &x, TChar& c)
107 {
108 	if (c == x) return;
109 	while (!_streamEOF(file)) {
110 		c = _streamGet(file);
111 		if (c == x) break;
112 	}
113 }
114 
115 //////////////////////////////////////////////////////////////////////////////
116 
117 template<typename TChar>
118 inline bool
_parseIsDigit(TChar const c)119 _parseIsDigit(TChar const c)
120 {
121 	return (((unsigned) c >  47) && ((unsigned) c <  58));
122 }
123 
124 //////////////////////////////////////////////////////////////////////////////
125 
126 template<typename TChar>
127 inline bool
_parseIsLetter(TChar const c)128 _parseIsLetter(TChar const c)
129 {
130 	return ( (((unsigned) c > 64) && ((unsigned) c < 91)) || (((unsigned) c > 96) && ((unsigned) c < 123)) );
131 }
132 
133 //////////////////////////////////////////////////////////////////////////////
134 
135 // TODO(holtgrew): The name of this function is WRONG.
136 template<typename TChar>
137 inline bool
_parseIsAlphanumericChar(TChar const c)138 _parseIsAlphanumericChar(TChar const c)
139 {
140 	return ((_parseIsDigit(c)) || (_parseIsLetter(c)) || (c == '_') || (c == '.') || (c == '-') || (c == '|') || (c == '/') || (c == ':'));
141 }
142 
143 //////////////////////////////////////////////////////////////////////////////
144 
145 template<typename TFile, typename TChar>
146 inline int
_parseReadNumber(TFile & file,TChar & c)147 _parseReadNumber(TFile & file, TChar& c)
148 {
149 	// Read number
150 	String<char> str(c);
151 	while (!_streamEOF(file)) {
152 		c = _streamGet(file);
153 		if (!_parseIsDigit(c)) break;
154 		append(str, c);
155 	}
156  	return atoi(toCString(str));
157 }
158 
159 //////////////////////////////////////////////////////////////////////////////
160 
161 template<typename TFile, typename TChar>
162 inline double
_parseReadDouble(TFile & file,TChar & c)163 _parseReadDouble(TFile & file, TChar& c)
164 {
165 	// Read number
166 	String<char> str(c);
167 	while (!_streamEOF(file)) {
168 		c = _streamGet(file);
169 		if (!_parseIsDigit(c) && (c != '.')) break;
170 		append(str, c);
171 	}
172  	return atof(toCString(str));
173 }
174 
175 //////////////////////////////////////////////////////////////////////////////
176 
177 template<typename TFile, typename TChar>
178 inline String<char>
_parseReadIdentifier(TFile & file,TChar & c)179 _parseReadIdentifier(TFile & file, TChar& c)
180 {
181 	// Read identifier
182 	String<char> str(c);
183 	while (!_streamEOF(file)) {
184 		c = _streamGet(file);
185 		if (!_parseIsAlphanumericChar(c)) break;
186 		append(str, c);
187 	}
188 	return str;
189 }
190 
191 //////////////////////////////////////////////////////////////////////////////
192 
193 template<typename TFile, typename TChar>
194 inline char
_parseReadChar(TFile & file,TChar & c)195 _parseReadChar(TFile & file, TChar& c)
196 {
197     char result = c;
198     if (!_streamEOF(file))
199         c = _streamGet(file);
200     return result;
201 }
202 
203 //////////////////////////////////////////////////////////////////////////////
204 
205 template<typename TFile, typename TString, typename TChar>
206 inline void
_parseReadIdentifier(TFile & file,TString & str,TChar & c)207 _parseReadIdentifier(TFile & file, TString& str, TChar& c)
208 {
209 	// Read identifier
210 	append(str, c, Generous());
211 	while (!_streamEOF(file)) {
212 		c = _streamGet(file);
213 		if (!_parseIsAlphanumericChar(c)) break;
214 		append(str, c, Generous());
215 	}
216 }
217 
218 //////////////////////////////////////////////////////////////////////////////
219 
220 template<typename TFile, typename TChar>
221 inline String<char>
_parseReadWord(TFile & file,TChar & c)222 _parseReadWord(TFile & file, TChar& c)
223 {
224 	// Read word
225 	String<char> str(c);
226 	while (!_streamEOF(file)) {
227 		c = _streamGet(file);
228 		if (!_parseIsLetter(c)) break;
229 		append(str, c);
230 	}
231 	return str;
232 }
233 
234 
235 // parse word up to a maximum length
236 template<typename TFile, typename TChar, typename TSize>
237 inline String<char>
_parseReadWord(TFile & file,TChar & c,TSize max_len)238 _parseReadWord(TFile & file, TChar& c, TSize max_len)
239 {
240 	// Read word
241 	String<char> str(c);
242 	--max_len;
243 	TSize i = 0;
244 	while (!_streamEOF(file) ) {
245 		c = _streamGet(file);
246 		if (!_parseIsLetter(c) || i >= max_len) break;
247 		append(str, c);
248 		++i;
249 	}
250 	return str;
251 }
252 
253 
254 
255 
256 //read filename (read line and trim trailing whitespaces)
257 template<typename TFile, typename TChar>
258 inline String<char>
_parseReadFilepath(TFile & file,TChar & c)259 _parseReadFilepath(TFile& file, TChar& c)
260 {
261 	String<char> str(c);
262 	if (c == '\n' || (c == '\r' && _streamPeek(file) != '\n')) {
263 		c = _streamGet(file);
264 		return str;
265 	}
266 	while (!_streamEOF(file)) {
267 		c = _streamGet(file);
268 		if (c == '\n' || (c == '\r' && _streamPeek(file) != '\n')) break;
269 		append(str, c);
270 	}
271 	typename Iterator<String<char>,Rooted >::Type str_it = end(str);
272 	while(str_it != begin(str)) {
273 		--str_it;
274 		if(*str_it != ' ' && *str_it != '\t'){
275 		++str_it;
276 		break;
277 		}
278 	}
279 	resize(str,position(str_it));
280 	return str;
281 }
282 
283 
284 //read filename (read line and trim trailing whitespaces)
285 template<typename TFile, typename TChar>
286 inline String<char>
_parseReadWordUntilWhitespace(TFile & file,TChar & c)287 _parseReadWordUntilWhitespace(TFile& file, TChar& c)
288 {
289 	String<char> str(c);
290 	if (c == '\n' || (c == '\r' && _streamPeek(file) != '\n')) {
291 		c = _streamGet(file);
292 		return str;
293 	}
294 	while (!_streamEOF(file)) {
295 		c = _streamGet(file);
296 		if (c== ' ' || c== '\t' || c == '\n' || (c == '\r' && _streamPeek(file) != '\n')) break;
297 		append(str, c);
298 	}
299 	return str;
300 }
301 
302 
303 //////////////////////////////////////////////////////////////////////////////
304 
305 template<typename TFile, typename TChar, typename TString>
306 inline void
_parseReadSequenceData(TFile & file,TChar & c,TString & str)307 _parseReadSequenceData(TFile & file,
308 						TChar & c,
309 						TString& str)
310 {
311 	SEQAN_CHECKPOINT
312 
313 	append(str, c);
314 
315 	// Read sequence
316 	while (!_streamEOF(file)) {
317 		c = _streamGet(file);
318 		if (!_parseIsLetter(c)) break;
319 		else append(str, c);
320 	}
321 }
322 
323 
324 
325 template<typename TFile, typename TChar>
326 inline void
_parseSkipBlanks(TFile & file,TChar & c)327 _parseSkipBlanks(TFile& file, TChar& c)
328 {
329 	if ((c != ' ') && (c != '\t')) return;
330 	while (!_streamEOF(file)) {
331 		c = _streamGet(file);
332 		if ((c != ' ') && (c != '\t')) break;
333 	}
334 }
335 
336 template<typename TFile, typename TChar>
337 inline void
_parseSkipLine2(TFile & file,TChar & c)338 _parseSkipLine2(TFile& file, TChar& c)
339 {
340 	if (c != '\n' && c != '\r')
341 		while (!_streamEOF(file)) {
342 			c = _streamGet(file);
343 			if (c == '\n' || c == '\r') break;
344 		}
345 	if (!_streamEOF(file))
346 		c = _streamGet(file);
347 }
348 
349 
350 
351 //////////////////////////////////////////////////////////////////////////////
352 template<typename TFile, typename TChar>
353 inline double
_parseReadEValue(TFile & file,TChar & c)354 _parseReadEValue(TFile & file, TChar& c)
355 {
356 SEQAN_CHECKPOINT
357 
358 	// Read number
359 	String<char> str(c);
360 	bool e = false;
361 	double val1 = 0;
362 	while (!_streamEOF(file)) {
363 		c = _streamGet(file);
364 		if(!e && c == 'e'){
365 			e = true;
366 			val1 = atof(toCString(str));
367 			c = _streamGet(file);
368 			resize(str,0);
369 		}
370 		if (!_parseIsDigit(c) && c != '.' && c != '-' && c != '+') break;
371 		append(str, c);
372 	}
373 	if(e)
374 	{
375 		return val1 * pow((double)10.0,(double)atof(toCString(str)));
376 	}
377  	else
378 		return (double)atof(toCString(str));
379 }
380 
381 
382 
383 
384 
385 /////////////////////////////////////////////////////////////////////////////////
386 // read floating point value
387 template<typename TFile, typename TChar>
388 inline float
_parseReadFloat(TFile & file,TChar & c)389 _parseReadFloat(TFile & file, TChar& c)
390 {
391 SEQAN_CHECKPOINT
392 	// Read number
393 	String<char> str(c);
394 	while (!_streamEOF(file)) {
395 		c = _streamGet(file);
396 		if (c != '.' && c != ',' && !_parseIsDigit(c)) break;
397 		append(str, c);
398 	}
399  	return atof(toCString(str));
400 }
401 
402 
403 
404 
405 /////////////////////////////////////////////////////////////////////////////////
406 //parse until line begins with character x (skip whitespaces)
407 // zeigt am ende darauf!!!
408 template<typename TFile, typename TChar>
409 inline bool
_parseUntilBeginLine(TFile & file,TChar & c,TChar x)410 _parseUntilBeginLine(TFile & file, TChar& c, TChar x)
411 {
412 SEQAN_CHECKPOINT
413 	_parseSkipWhitespace(file,c);
414 	typename Position<TFile>::Type pos = _streamTellG(file);
415 	TChar c_before = c;
416 	while (!_streamEOF(file) && c != x){
417 		_parseSkipLine(file, c);
418 		_parseSkipWhitespace(file,c);
419 	}
420 	if(!_streamEOF(file)) return true;
421 	_streamSeekG(file,pos);
422 	c = c_before;
423 	return false;
424 }
425 
426 
427 /////////////////////////////////////////////////////////////////////////////////
428 //parse until line begins with word
429 //zeigt am ende dahinter!
430 template<typename TFile, typename TChar, typename TSize>
431 inline bool
_parseUntilBeginLine(TFile & file,TChar & c,String<TChar> & word,TSize len)432 _parseUntilBeginLine(TFile & file, TChar& c, String<TChar> & word, TSize len)
433 {
434 SEQAN_CHECKPOINT
435 	_parseSkipWhitespace(file,c);
436 	typename Position<TFile>::Type pos = _streamTellG(file);
437 	TChar c_before = c;
438 	while (!_streamEOF(file)){
439 		if(c == word[0])
440 			if(word == _parseReadWord(file,c,len))
441 				break;
442 		_parseSkipLine(file, c);
443 		_parseSkipWhitespace(file,c);
444 	}
445 	if(!_streamEOF(file)) return true;
446 	_streamSeekG(file,pos);
447 	c = c_before;
448 	return false;
449 }
450 
451 
452 /////////////////////////////////////////////////////////////////////////////////
453 //parse until line begins with word (parse no more than num_lines lines)
454 //zeigt am ende dahinter!
455 template<typename TFile, typename TChar, typename TSize>
456 inline bool
_parseUntilBeginLine(TFile & file,TChar & c,String<TChar> & word,TSize len,TSize num_lines)457 _parseUntilBeginLine(TFile & file, TChar& c, String<TChar> & word, TSize len, TSize num_lines)
458 {
459 SEQAN_CHECKPOINT
460 	_parseSkipWhitespace(file,c);
461 	typename Position<TFile>::Type pos = _streamTellG(file);
462 	TChar c_before = c;
463 	TSize i = 0;
464 	bool found = false;
465 	while (!_streamEOF(file)){
466 		if(c == word[0])
467 			if(word == _parseReadWord(file,c,len))
468 			{
469 				found = true;
470 				break;
471 			}
472 		if(i >= num_lines)
473 			break;
474 		++i;
475 		_parseSkipLine(file, c);
476 		_parseSkipWhitespace(file,c);
477 	}
478 	if(!_streamEOF(file) && found) return true;
479 	_streamSeekG(file,pos);
480 	c = c_before;
481 	return false;
482 }
483 
484 
485 /////////////////////////////////////////////////////////////////////////////////
486 //parse until line begins with one of the characters in string x (skip whitespaces)
487 //zeigt am ende darauf!
488 template<typename TFile, typename TChar, typename TSize>
489 inline bool
_parseUntilBeginLineOneOf(TFile & file,TChar & c,String<TChar> & x,TSize len)490 _parseUntilBeginLineOneOf(TFile & file, TChar& c, String<TChar> & x, TSize len)
491 {
492 SEQAN_CHECKPOINT
493 	_parseSkipWhitespace(file,c);
494 	typename Position<TFile>::Type pos = _streamTellG(file);
495 	TChar c_before = c;
496 	bool found = false;
497 	while (!_streamEOF(file)){
498 		for(int i = 0; i < len; ++i)
499 			if(c == x[i])
500 			{
501 				found = true;
502 				break;
503 			}
504 		if(found) break;
505 		_parseSkipLine(file, c);
506 		_parseSkipWhitespace(file,c);
507 	}
508 	if(!_streamEOF(file)) return true;
509 	_streamSeekG(file,pos);
510 	c = c_before;
511 	return false;
512 }
513 
514 
515 /////////////////////////////////////////////////////////////////////////////////
516 //parse until c == x
517 //zeigt am ende darauf!
518 template<typename TFile, typename TChar>
519 inline bool
_parseUntil(TFile & file,TChar & c,TChar x)520 _parseUntil(TFile & file, TChar& c, TChar x)
521 {
522 SEQAN_CHECKPOINT
523 	typename Position<TFile>::Type pos = _streamTellG(file);
524 	TChar c_before = c;
525 	while (!_streamEOF(file) && c != x){
526 		c = _streamGet(file);
527 	}
528 	if(!_streamEOF(file)) return true;
529 	_streamSeekG(file,pos);
530 	c = c_before;
531 	return false;
532 }
533 
534 
535 
536 /////////////////////////////////////////////////////////////////////////////////
537 //parse until word
538 //zeigt am ende dahinter!
539 template<typename TFile, typename TChar, typename TSize>
540 inline bool
_parseUntil(TFile & file,TChar & c,String<TChar> & word,TSize len)541 _parseUntil(TFile & file, TChar& c, String<TChar> & word, TSize len)
542 {
543 SEQAN_CHECKPOINT
544 	typename Position<TFile>::Type pos = _streamTellG(file);
545 	TChar c_before = c;
546 	while (!_streamEOF(file)){
547 		if(c == word[0])
548 			if(word == _parseReadWord(file,c,len))
549 				break;
550 		c = _streamGet(file);
551 	}
552 	if(!_streamEOF(file)) return true;
553 	_streamSeekG(file,pos);
554 	c = c_before;
555 	return false;
556 }
557 
558 
559 /////////////////////////////////////////////////////////////////////////////////
560 //parse until c == x or new line
561 //zeigt am ende darauf!
562 template<typename TFile, typename TChar>
563 inline bool
_parseLineUntil(TFile & file,TChar & c,TChar x)564 _parseLineUntil(TFile & file, TChar& c, TChar x)
565 {
566 SEQAN_CHECKPOINT
567 	typename Position<TFile>::Type pos = _streamTellG(file);
568 	TChar c_before = c;
569 	while (!_streamEOF(file) && c != x){
570 		if (c == '\n' || c == '\r')
571 		{
572 			_streamSeekG(file,pos);
573 			c = c_before;
574 			return false;
575 		}
576 		c = _streamGet(file);
577 	}
578 	if(!_streamEOF(file)) return true;
579 	_streamSeekG(file,pos);
580 	c = c_before;
581 	return false;
582 }
583 
584 
585 /////////////////////////////////////////////////////////////////////////////////
586 //parse this line until word
587 //zeigt am ende hinter wort if true, oder auf ende der zeile
588 template<typename TFile, typename TChar, typename TSize>
589 inline bool
_parseLineUntil(TFile & file,TChar & c,String<TChar> & word,TSize len)590 _parseLineUntil(TFile & file, TChar& c, String<TChar> & word, TSize len)
591 {
592 SEQAN_CHECKPOINT
593 	typename Position<TFile>::Type pos = _streamTellG(file);
594 	TChar c_before = c;
595 	while (!_streamEOF(file)){
596 		if(c == word[0])
597 		{	if(word == _parseReadWord(file,c,len))
598 				break;
599 		}
600 		else if (c == '\n' || c == '\r')
601 			{
602 				_streamSeekG(file,pos);
603 				c = c_before;
604 				return false;
605 			}
606 		c = _streamGet(file);
607 	}
608 	if(!_streamEOF(file)) return true;
609 	_streamSeekG(file,pos);
610 	c = c_before;
611 	return false;
612 }
613 
614 
615 
616 
617 
618 
619 }
620 
621 #endif
622 
623