1 // ==========================================================================
2 // SeqAn - The Library for Sequence Analysis
3 // ==========================================================================
4 // Copyright (c) 2006-2010, Knut Reinert, FU Berlin
5 // All rights reserved.
6 //
7 // Redistribution and use in source and binary forms, with or without
8 // modification, are permitted provided that the following conditions are met:
9 //
10 // * Redistributions of source code must retain the above copyright
11 // notice, this list of conditions and the following disclaimer.
12 // * Redistributions in binary form must reproduce the above copyright
13 // notice, this list of conditions and the following disclaimer in the
14 // documentation and/or other materials provided with the distribution.
15 // * Neither the name of Knut Reinert or the FU Berlin nor the names of
16 // its contributors may be used to endorse or promote products derived
17 // from this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 // ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE
23 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 // OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
29 // DAMAGE.
30 //
31 // ==========================================================================
32
33 //SEQAN_NO_GENERATED_FORWARDS: no forwards are generated for this file
34
35 #ifndef SEQAN_HEADER_MISC_PARSING_H
36 #define SEQAN_HEADER_MISC_PARSING_H
37
38 #include <cmath>
39
40
41 //////////////////////////////////////////////////////////////////////////////
42
43 namespace SEQAN_NAMESPACE_MAIN
44 {
45
46 //////////////////////////////////////////////////////////////////////////////
47 // General parsing funtions
48 //////////////////////////////////////////////////////////////////////////////
49
50 //////////////////////////////////////////////////////////////////////////////
51
52
53 //////////////////////////////////////////////////////////////////////////////
54
55 template<typename TFile, typename TChar>
56 inline void
_parseSkipLine(TFile & file,TChar & c)57 _parseSkipLine(TFile& file, TChar& c)
58 {
59 if (c == '\n') {
60 c = _streamGet(file);
61 return;
62 }
63 while (!_streamEOF(file)) {
64 c = _streamGet(file);
65 if (c == '\n') break;
66 }
67 c = _streamGet(file);
68 }
69 //////////////////////////////////////////////////////////////////////////////
70
71
72 template<typename TFile, typename TChar>
73 inline void
_parseSkipWhitespace(TFile & file,TChar & c)74 _parseSkipWhitespace(TFile& file, TChar& c)
75 {
76 if ((unsigned) c > 32) return;
77 while (!_streamEOF(file)) {
78 c = _streamGet(file);
79 if ((unsigned) c > 32) break;
80 }
81 }
82
83 template<typename TFile, typename TChar>
84 inline void
_parseSkipSpace(TFile & file,TChar & c)85 _parseSkipSpace(TFile& file, TChar& c)
86 {
87 if (c != '\t' && c != ' ') return;
88 while (!_streamEOF(file)) {
89 c = _streamGet(file);
90 if (c != '\t' && c != ' ') break;
91 }
92 }
93
94
95 /**
96 .Internal._parseSkipUntilChar:
97 ..summary:Skip to the next ocurrence of x in file.
98 ..cat:Miscenalleous
99 ..signature:_parseSkipUntilChar(file, x, c)
100 ..param.file:The file to read from.
101 ..param.x:The character to skip to.
102 ..param.c:Parser state character.
103 */
104 template<typename TFile, typename TChar>
105 inline void
_parseSkipUntilChar(TFile & file,const TChar & x,TChar & c)106 _parseSkipUntilChar(TFile& file, const TChar &x, TChar& c)
107 {
108 if (c == x) return;
109 while (!_streamEOF(file)) {
110 c = _streamGet(file);
111 if (c == x) break;
112 }
113 }
114
115 //////////////////////////////////////////////////////////////////////////////
116
117 template<typename TChar>
118 inline bool
_parseIsDigit(TChar const c)119 _parseIsDigit(TChar const c)
120 {
121 return (((unsigned) c > 47) && ((unsigned) c < 58));
122 }
123
124 //////////////////////////////////////////////////////////////////////////////
125
126 template<typename TChar>
127 inline bool
_parseIsLetter(TChar const c)128 _parseIsLetter(TChar const c)
129 {
130 return ( (((unsigned) c > 64) && ((unsigned) c < 91)) || (((unsigned) c > 96) && ((unsigned) c < 123)) );
131 }
132
133 //////////////////////////////////////////////////////////////////////////////
134
135 // TODO(holtgrew): The name of this function is WRONG.
136 template<typename TChar>
137 inline bool
_parseIsAlphanumericChar(TChar const c)138 _parseIsAlphanumericChar(TChar const c)
139 {
140 return ((_parseIsDigit(c)) || (_parseIsLetter(c)) || (c == '_') || (c == '.') || (c == '-') || (c == '|') || (c == '/') || (c == ':'));
141 }
142
143 //////////////////////////////////////////////////////////////////////////////
144
145 template<typename TFile, typename TChar>
146 inline int
_parseReadNumber(TFile & file,TChar & c)147 _parseReadNumber(TFile & file, TChar& c)
148 {
149 // Read number
150 String<char> str(c);
151 while (!_streamEOF(file)) {
152 c = _streamGet(file);
153 if (!_parseIsDigit(c)) break;
154 append(str, c);
155 }
156 return atoi(toCString(str));
157 }
158
159 //////////////////////////////////////////////////////////////////////////////
160
161 template<typename TFile, typename TChar>
162 inline double
_parseReadDouble(TFile & file,TChar & c)163 _parseReadDouble(TFile & file, TChar& c)
164 {
165 // Read number
166 String<char> str(c);
167 while (!_streamEOF(file)) {
168 c = _streamGet(file);
169 if (!_parseIsDigit(c) && (c != '.')) break;
170 append(str, c);
171 }
172 return atof(toCString(str));
173 }
174
175 //////////////////////////////////////////////////////////////////////////////
176
177 template<typename TFile, typename TChar>
178 inline String<char>
_parseReadIdentifier(TFile & file,TChar & c)179 _parseReadIdentifier(TFile & file, TChar& c)
180 {
181 // Read identifier
182 String<char> str(c);
183 while (!_streamEOF(file)) {
184 c = _streamGet(file);
185 if (!_parseIsAlphanumericChar(c)) break;
186 append(str, c);
187 }
188 return str;
189 }
190
191 //////////////////////////////////////////////////////////////////////////////
192
193 template<typename TFile, typename TChar>
194 inline char
_parseReadChar(TFile & file,TChar & c)195 _parseReadChar(TFile & file, TChar& c)
196 {
197 char result = c;
198 if (!_streamEOF(file))
199 c = _streamGet(file);
200 return result;
201 }
202
203 //////////////////////////////////////////////////////////////////////////////
204
205 template<typename TFile, typename TString, typename TChar>
206 inline void
_parseReadIdentifier(TFile & file,TString & str,TChar & c)207 _parseReadIdentifier(TFile & file, TString& str, TChar& c)
208 {
209 // Read identifier
210 append(str, c, Generous());
211 while (!_streamEOF(file)) {
212 c = _streamGet(file);
213 if (!_parseIsAlphanumericChar(c)) break;
214 append(str, c, Generous());
215 }
216 }
217
218 //////////////////////////////////////////////////////////////////////////////
219
220 template<typename TFile, typename TChar>
221 inline String<char>
_parseReadWord(TFile & file,TChar & c)222 _parseReadWord(TFile & file, TChar& c)
223 {
224 // Read word
225 String<char> str(c);
226 while (!_streamEOF(file)) {
227 c = _streamGet(file);
228 if (!_parseIsLetter(c)) break;
229 append(str, c);
230 }
231 return str;
232 }
233
234
235 // parse word up to a maximum length
236 template<typename TFile, typename TChar, typename TSize>
237 inline String<char>
_parseReadWord(TFile & file,TChar & c,TSize max_len)238 _parseReadWord(TFile & file, TChar& c, TSize max_len)
239 {
240 // Read word
241 String<char> str(c);
242 --max_len;
243 TSize i = 0;
244 while (!_streamEOF(file) ) {
245 c = _streamGet(file);
246 if (!_parseIsLetter(c) || i >= max_len) break;
247 append(str, c);
248 ++i;
249 }
250 return str;
251 }
252
253
254
255
256 //read filename (read line and trim trailing whitespaces)
257 template<typename TFile, typename TChar>
258 inline String<char>
_parseReadFilepath(TFile & file,TChar & c)259 _parseReadFilepath(TFile& file, TChar& c)
260 {
261 String<char> str(c);
262 if (c == '\n' || (c == '\r' && _streamPeek(file) != '\n')) {
263 c = _streamGet(file);
264 return str;
265 }
266 while (!_streamEOF(file)) {
267 c = _streamGet(file);
268 if (c == '\n' || (c == '\r' && _streamPeek(file) != '\n')) break;
269 append(str, c);
270 }
271 typename Iterator<String<char>,Rooted >::Type str_it = end(str);
272 while(str_it != begin(str)) {
273 --str_it;
274 if(*str_it != ' ' && *str_it != '\t'){
275 ++str_it;
276 break;
277 }
278 }
279 resize(str,position(str_it));
280 return str;
281 }
282
283
284 //read filename (read line and trim trailing whitespaces)
285 template<typename TFile, typename TChar>
286 inline String<char>
_parseReadWordUntilWhitespace(TFile & file,TChar & c)287 _parseReadWordUntilWhitespace(TFile& file, TChar& c)
288 {
289 String<char> str(c);
290 if (c == '\n' || (c == '\r' && _streamPeek(file) != '\n')) {
291 c = _streamGet(file);
292 return str;
293 }
294 while (!_streamEOF(file)) {
295 c = _streamGet(file);
296 if (c== ' ' || c== '\t' || c == '\n' || (c == '\r' && _streamPeek(file) != '\n')) break;
297 append(str, c);
298 }
299 return str;
300 }
301
302
303 //////////////////////////////////////////////////////////////////////////////
304
305 template<typename TFile, typename TChar, typename TString>
306 inline void
_parseReadSequenceData(TFile & file,TChar & c,TString & str)307 _parseReadSequenceData(TFile & file,
308 TChar & c,
309 TString& str)
310 {
311 SEQAN_CHECKPOINT
312
313 append(str, c);
314
315 // Read sequence
316 while (!_streamEOF(file)) {
317 c = _streamGet(file);
318 if (!_parseIsLetter(c)) break;
319 else append(str, c);
320 }
321 }
322
323
324
325 template<typename TFile, typename TChar>
326 inline void
_parseSkipBlanks(TFile & file,TChar & c)327 _parseSkipBlanks(TFile& file, TChar& c)
328 {
329 if ((c != ' ') && (c != '\t')) return;
330 while (!_streamEOF(file)) {
331 c = _streamGet(file);
332 if ((c != ' ') && (c != '\t')) break;
333 }
334 }
335
336 template<typename TFile, typename TChar>
337 inline void
_parseSkipLine2(TFile & file,TChar & c)338 _parseSkipLine2(TFile& file, TChar& c)
339 {
340 if (c != '\n' && c != '\r')
341 while (!_streamEOF(file)) {
342 c = _streamGet(file);
343 if (c == '\n' || c == '\r') break;
344 }
345 if (!_streamEOF(file))
346 c = _streamGet(file);
347 }
348
349
350
351 //////////////////////////////////////////////////////////////////////////////
352 template<typename TFile, typename TChar>
353 inline double
_parseReadEValue(TFile & file,TChar & c)354 _parseReadEValue(TFile & file, TChar& c)
355 {
356 SEQAN_CHECKPOINT
357
358 // Read number
359 String<char> str(c);
360 bool e = false;
361 double val1 = 0;
362 while (!_streamEOF(file)) {
363 c = _streamGet(file);
364 if(!e && c == 'e'){
365 e = true;
366 val1 = atof(toCString(str));
367 c = _streamGet(file);
368 resize(str,0);
369 }
370 if (!_parseIsDigit(c) && c != '.' && c != '-' && c != '+') break;
371 append(str, c);
372 }
373 if(e)
374 {
375 return val1 * pow((double)10.0,(double)atof(toCString(str)));
376 }
377 else
378 return (double)atof(toCString(str));
379 }
380
381
382
383
384
385 /////////////////////////////////////////////////////////////////////////////////
386 // read floating point value
387 template<typename TFile, typename TChar>
388 inline float
_parseReadFloat(TFile & file,TChar & c)389 _parseReadFloat(TFile & file, TChar& c)
390 {
391 SEQAN_CHECKPOINT
392 // Read number
393 String<char> str(c);
394 while (!_streamEOF(file)) {
395 c = _streamGet(file);
396 if (c != '.' && c != ',' && !_parseIsDigit(c)) break;
397 append(str, c);
398 }
399 return atof(toCString(str));
400 }
401
402
403
404
405 /////////////////////////////////////////////////////////////////////////////////
406 //parse until line begins with character x (skip whitespaces)
407 // zeigt am ende darauf!!!
408 template<typename TFile, typename TChar>
409 inline bool
_parseUntilBeginLine(TFile & file,TChar & c,TChar x)410 _parseUntilBeginLine(TFile & file, TChar& c, TChar x)
411 {
412 SEQAN_CHECKPOINT
413 _parseSkipWhitespace(file,c);
414 typename Position<TFile>::Type pos = _streamTellG(file);
415 TChar c_before = c;
416 while (!_streamEOF(file) && c != x){
417 _parseSkipLine(file, c);
418 _parseSkipWhitespace(file,c);
419 }
420 if(!_streamEOF(file)) return true;
421 _streamSeekG(file,pos);
422 c = c_before;
423 return false;
424 }
425
426
427 /////////////////////////////////////////////////////////////////////////////////
428 //parse until line begins with word
429 //zeigt am ende dahinter!
430 template<typename TFile, typename TChar, typename TSize>
431 inline bool
_parseUntilBeginLine(TFile & file,TChar & c,String<TChar> & word,TSize len)432 _parseUntilBeginLine(TFile & file, TChar& c, String<TChar> & word, TSize len)
433 {
434 SEQAN_CHECKPOINT
435 _parseSkipWhitespace(file,c);
436 typename Position<TFile>::Type pos = _streamTellG(file);
437 TChar c_before = c;
438 while (!_streamEOF(file)){
439 if(c == word[0])
440 if(word == _parseReadWord(file,c,len))
441 break;
442 _parseSkipLine(file, c);
443 _parseSkipWhitespace(file,c);
444 }
445 if(!_streamEOF(file)) return true;
446 _streamSeekG(file,pos);
447 c = c_before;
448 return false;
449 }
450
451
452 /////////////////////////////////////////////////////////////////////////////////
453 //parse until line begins with word (parse no more than num_lines lines)
454 //zeigt am ende dahinter!
455 template<typename TFile, typename TChar, typename TSize>
456 inline bool
_parseUntilBeginLine(TFile & file,TChar & c,String<TChar> & word,TSize len,TSize num_lines)457 _parseUntilBeginLine(TFile & file, TChar& c, String<TChar> & word, TSize len, TSize num_lines)
458 {
459 SEQAN_CHECKPOINT
460 _parseSkipWhitespace(file,c);
461 typename Position<TFile>::Type pos = _streamTellG(file);
462 TChar c_before = c;
463 TSize i = 0;
464 bool found = false;
465 while (!_streamEOF(file)){
466 if(c == word[0])
467 if(word == _parseReadWord(file,c,len))
468 {
469 found = true;
470 break;
471 }
472 if(i >= num_lines)
473 break;
474 ++i;
475 _parseSkipLine(file, c);
476 _parseSkipWhitespace(file,c);
477 }
478 if(!_streamEOF(file) && found) return true;
479 _streamSeekG(file,pos);
480 c = c_before;
481 return false;
482 }
483
484
485 /////////////////////////////////////////////////////////////////////////////////
486 //parse until line begins with one of the characters in string x (skip whitespaces)
487 //zeigt am ende darauf!
488 template<typename TFile, typename TChar, typename TSize>
489 inline bool
_parseUntilBeginLineOneOf(TFile & file,TChar & c,String<TChar> & x,TSize len)490 _parseUntilBeginLineOneOf(TFile & file, TChar& c, String<TChar> & x, TSize len)
491 {
492 SEQAN_CHECKPOINT
493 _parseSkipWhitespace(file,c);
494 typename Position<TFile>::Type pos = _streamTellG(file);
495 TChar c_before = c;
496 bool found = false;
497 while (!_streamEOF(file)){
498 for(int i = 0; i < len; ++i)
499 if(c == x[i])
500 {
501 found = true;
502 break;
503 }
504 if(found) break;
505 _parseSkipLine(file, c);
506 _parseSkipWhitespace(file,c);
507 }
508 if(!_streamEOF(file)) return true;
509 _streamSeekG(file,pos);
510 c = c_before;
511 return false;
512 }
513
514
515 /////////////////////////////////////////////////////////////////////////////////
516 //parse until c == x
517 //zeigt am ende darauf!
518 template<typename TFile, typename TChar>
519 inline bool
_parseUntil(TFile & file,TChar & c,TChar x)520 _parseUntil(TFile & file, TChar& c, TChar x)
521 {
522 SEQAN_CHECKPOINT
523 typename Position<TFile>::Type pos = _streamTellG(file);
524 TChar c_before = c;
525 while (!_streamEOF(file) && c != x){
526 c = _streamGet(file);
527 }
528 if(!_streamEOF(file)) return true;
529 _streamSeekG(file,pos);
530 c = c_before;
531 return false;
532 }
533
534
535
536 /////////////////////////////////////////////////////////////////////////////////
537 //parse until word
538 //zeigt am ende dahinter!
539 template<typename TFile, typename TChar, typename TSize>
540 inline bool
_parseUntil(TFile & file,TChar & c,String<TChar> & word,TSize len)541 _parseUntil(TFile & file, TChar& c, String<TChar> & word, TSize len)
542 {
543 SEQAN_CHECKPOINT
544 typename Position<TFile>::Type pos = _streamTellG(file);
545 TChar c_before = c;
546 while (!_streamEOF(file)){
547 if(c == word[0])
548 if(word == _parseReadWord(file,c,len))
549 break;
550 c = _streamGet(file);
551 }
552 if(!_streamEOF(file)) return true;
553 _streamSeekG(file,pos);
554 c = c_before;
555 return false;
556 }
557
558
559 /////////////////////////////////////////////////////////////////////////////////
560 //parse until c == x or new line
561 //zeigt am ende darauf!
562 template<typename TFile, typename TChar>
563 inline bool
_parseLineUntil(TFile & file,TChar & c,TChar x)564 _parseLineUntil(TFile & file, TChar& c, TChar x)
565 {
566 SEQAN_CHECKPOINT
567 typename Position<TFile>::Type pos = _streamTellG(file);
568 TChar c_before = c;
569 while (!_streamEOF(file) && c != x){
570 if (c == '\n' || c == '\r')
571 {
572 _streamSeekG(file,pos);
573 c = c_before;
574 return false;
575 }
576 c = _streamGet(file);
577 }
578 if(!_streamEOF(file)) return true;
579 _streamSeekG(file,pos);
580 c = c_before;
581 return false;
582 }
583
584
585 /////////////////////////////////////////////////////////////////////////////////
586 //parse this line until word
587 //zeigt am ende hinter wort if true, oder auf ende der zeile
588 template<typename TFile, typename TChar, typename TSize>
589 inline bool
_parseLineUntil(TFile & file,TChar & c,String<TChar> & word,TSize len)590 _parseLineUntil(TFile & file, TChar& c, String<TChar> & word, TSize len)
591 {
592 SEQAN_CHECKPOINT
593 typename Position<TFile>::Type pos = _streamTellG(file);
594 TChar c_before = c;
595 while (!_streamEOF(file)){
596 if(c == word[0])
597 { if(word == _parseReadWord(file,c,len))
598 break;
599 }
600 else if (c == '\n' || c == '\r')
601 {
602 _streamSeekG(file,pos);
603 c = c_before;
604 return false;
605 }
606 c = _streamGet(file);
607 }
608 if(!_streamEOF(file)) return true;
609 _streamSeekG(file,pos);
610 c = c_before;
611 return false;
612 }
613
614
615
616
617
618
619 }
620
621 #endif
622
623