1 /**
2 * PLL (version 1.0.0) a software library for phylogenetic inference
3 * Copyright (C) 2013 Tomas Flouri and Alexandros Stamatakis
4 *
5 * Derived from
6 * RAxML-HPC, a program for sequential and parallel estimation of phylogenetic
7 * trees by Alexandros Stamatakis
8 *
9 * This program is free software: you can redistribute it and/or modify it
10 * under the terms of the GNU General Public License as published by the Free
11 * Software Foundation, either version 3 of the License, or (at your option)
12 * any later version.
13 *
14 * This program is distributed in the hope that it will be useful, but WITHOUT
15 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
17 * more details.
18 *
19 * You should have received a copy of the GNU General Public License along with
20 * this program. If not, see <http://www.gnu.org/licenses/>.
21 *
22 * For any other enquiries send an Email to Tomas Flouri
23 * Tomas.Flouri@h-its.org
24 *
25 * When publishing work that uses PLL please cite PLL
26 *
27 * @file alignment.c
28 *
29 * @brief Collection of routines for reading alignments
30 *
31 * Auxiliary functions for storing alignments read from predefined file formats
32 */
33
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <assert.h>
38
39 #include "pll.h"
40 #include "pllInternal.h"
41
42 /** @defgroup alignmentGroup Reading and parsing multiple sequence alignments
43
44 This set of functions handles the reading and parsing of several file formats that describe multiple sequence alignments. They are also responsible for storing the alignment in an internal structure
45 */
46 static pllAlignmentData * pllParsePHYLIP (const char * filename);
47 static pllAlignmentData * pllParseFASTA (const char * filename);
48 static int read_phylip_header (int * inp, int * sequenceCount, int * sequenceLength);
49 static __inline int parsedOk (int * actLen, int sequenceCount, int sequenceLength);
50 static int parse_phylip (pllAlignmentData * alignmentData, int input);
51 static int getFastaAlignmentInfo (int * inp, int * seqCount, int * seqLen);
52 static int parseFastaAlignment (pllAlignmentData * alignmentData, int input);
53
54 #ifdef __PLL_DEBUG_PARSER
55 static int
printTokens(int input)56 printTokens (int input)
57 {
58 pllLexToken token;
59
60 do
61 {
62 NEXT_TOKEN
63
64 /* begin of parser */
65 switch (token.tokenType)
66 {
67 case PLL_TOKEN_NUMBER:
68 printf ("PLL_TOKEN_NUMBER (%.*s, %d)\n", token.len, token.lexeme, token.len);
69 break;
70 case PLL_TOKEN_STRING:
71 printf ("PLL_TOKEN_STRING (%.*s, %d)\n", token.len, token.lexeme, token.len);
72 break;
73 case PLL_TOKEN_EOF:
74 printf ("PLL_TOKEN_EOF\n");
75 break;
76 case PLL_TOKEN_WHITESPACE:
77 printf ("PLL_TOKEN_WHITESPACE\n");
78 break;
79 case PLL_TOKEN_NEWLINE:
80 printf ("PLL_TOKEN_NEWLINE\n");
81 break;
82 case PLL_TOKEN_UNKNOWN:
83 printf ("PLL_TOKEN_UNKNOWN (%.*s, %d)\n", token.len, token.lexeme, token.len);
84 break;
85 default:
86 break;
87 }
88 /* end of parser */
89
90
91 }
92 while (token.tokenType != PLL_TOKEN_EOF && token.tokenType != PLL_TOKEN_UNKNOWN);
93
94 if (token.tokenType == PLL_TOKEN_UNKNOWN) return (0);
95
96 return (1);
97 }
98 #endif
99
100 /** @ingroup alignmentGroup
101 @brief Initialize alignment structure fields
102
103 Allocates memory for the data structure that will hold the alignment and
104 initializes it. It requires the number of sequences \a sequenceCount and
105 the length of sequences \a sequenceLength. It returns a pointer to the
106 initialized data structure.
107
108 @param sequenceCount
109 Number of sequences in the alignment
110
111 @param sequenceLength
112 Length of the sequences
113
114 @param
115 Initialized alignment data structured
116 */
117 pllAlignmentData *
pllInitAlignmentData(int sequenceCount,int sequenceLength)118 pllInitAlignmentData (int sequenceCount, int sequenceLength)
119 {
120 int i;
121 pllAlignmentData * alignmentData;
122 //void * mem;
123 //TUNG
124 unsigned char *mem;
125
126
127 /** TODO */
128 alignmentData = (pllAlignmentData *) rax_malloc (sizeof (pllAlignmentData));
129 alignmentData->sequenceData = (unsigned char **) rax_malloc ((sequenceCount + 1) * sizeof (unsigned char *));
130 //mem = (void *) rax_malloc (sizeof (unsigned char) * (sequenceLength + 1) * sequenceCount);
131 //TUNG
132 mem = (unsigned char *)rax_malloc(sizeof(unsigned char) * (sequenceLength + 1) * sequenceCount);
133 for (i = 1; i <= sequenceCount; ++i)
134 {
135 alignmentData->sequenceData[i] = (unsigned char *) (&mem[sizeof (unsigned char) * (i - 1) * (sequenceLength + 1)]);
136 alignmentData->sequenceData[i][sequenceLength] = 0;
137 }
138 alignmentData->sequenceData[0] = NULL;
139
140 alignmentData->sequenceLabels = (char **) rax_calloc ((sequenceCount + 1), sizeof (char *));
141
142 alignmentData->sequenceCount = sequenceCount;
143 alignmentData->sequenceLength = sequenceLength;
144 alignmentData->originalSeqLength = sequenceLength;
145
146 /** TODO: remove siteWeights from alignment */
147 alignmentData->siteWeights = NULL;
148
149 return (alignmentData);
150 }
151
152 /** @ingroup alignmentGroup
153 @brief Deallocates the memory associated with the alignment data structure
154
155 Deallocates the memory associated with the alignment data structure \a alignmentData.
156
157 @param alignmentData
158 The alignment data structure
159 */
160 void
pllAlignmentDataDestroy(pllAlignmentData * alignmentData)161 pllAlignmentDataDestroy (pllAlignmentData * alignmentData)
162 {
163 int i;
164
165 for (i = 1; i <= alignmentData->sequenceCount; ++ i)
166 {
167 rax_free (alignmentData->sequenceLabels[i]);
168 }
169 rax_free (alignmentData->sequenceLabels);
170 rax_free (alignmentData->sequenceData[1]);
171 rax_free (alignmentData->sequenceData);
172 rax_free (alignmentData->siteWeights);
173 rax_free (alignmentData);
174 }
175
176
177 /** @ingroup alignmentGroup
178 @brief Prints the alignment to the console
179
180 @param alignmentData
181 The alignment data structure
182 */
183 void
pllAlignmentDataDumpConsole(pllAlignmentData * alignmentData)184 pllAlignmentDataDumpConsole (pllAlignmentData * alignmentData)
185 {
186 int i;
187
188 printf ("%d %d\n", alignmentData->sequenceCount, alignmentData->sequenceLength);
189 for (i = 1; i <= alignmentData->sequenceCount; ++ i)
190 {
191 printf ("%s %s\n", alignmentData->sequenceLabels[i], alignmentData->sequenceData[i]);
192 }
193 }
194
195
196
dump_fasta_content(FILE * fp,pllAlignmentData * alignmentData)197 static void dump_fasta_content(FILE * fp, pllAlignmentData * alignmentData)
198 {
199 int i;
200
201 for (i = 1; i <= alignmentData->sequenceCount; ++i)
202 fprintf (fp, ">%s\n%s\n", alignmentData->sequenceLabels[i], alignmentData->sequenceData[i]);
203 }
204
dump_phylip_content(FILE * fp,pllAlignmentData * alignmentData)205 static void dump_phylip_content(FILE * fp, pllAlignmentData * alignmentData)
206 {
207 int i;
208
209 for (i = 1; i <= alignmentData->sequenceCount; ++i)
210 fprintf (fp, "%s %s\n", alignmentData->sequenceLabels[i], alignmentData->sequenceData[i]);
211 }
212
213 /** @ingroup alignmentGroup
214 @brief Dump the alignment to a file of format \a fileFormat
215
216 Dumps the alignment contained in \a alignmentData to file \a filename of type \a fileFormat.
217
218 @note If \a filename exists, all contents will be erased
219
220 @param alignmentData
221 Alignment data structure
222
223 @param fileFormat
224 Format of output file. Can take the value \b PLL_FORMAT_PHYLIP or \b PLL_FORMAT_FASTA
225
226 @param filename
227 Output filename
228
229 @return
230 Returns \b PLL_TRUE on success, otherwise \b PLL_FALSE.
231 */
232 int
pllAlignmentDataDumpFile(pllAlignmentData * alignmentData,int fileFormat,const char * filename)233 pllAlignmentDataDumpFile (pllAlignmentData * alignmentData, int fileFormat, const char * filename)
234 {
235 FILE * fp;
236 void (*outfun)(FILE *, pllAlignmentData *);
237
238 if (fileFormat != PLL_FORMAT_PHYLIP && fileFormat != PLL_FORMAT_FASTA) return (PLL_FALSE);
239
240 outfun = (fileFormat == PLL_FORMAT_PHYLIP) ? dump_phylip_content : dump_fasta_content;
241
242 fp = fopen (filename,"wb");
243 if (!fp) return (PLL_FALSE);
244
245 /* if PHYLIP print the silly header at the beginning */
246 if (fileFormat == PLL_FORMAT_PHYLIP)
247 {
248 fprintf (fp, "%d %d\n", alignmentData->sequenceCount, alignmentData->sequenceLength);
249 }
250
251 outfun(fp, alignmentData);
252
253 fclose (fp);
254 return (PLL_TRUE);
255 }
256
257
258
259 /* ROUTINES FOR PHYLIP PARSING */
260 /** @ingroup alignmentGroup
261 @brief Parse the PHYLIP file header
262 */
263 static int
read_phylip_header(int * inp,int * sequenceCount,int * sequenceLength)264 read_phylip_header (int * inp, int * sequenceCount, int * sequenceLength)
265 {
266 pllLexToken token;
267 int input;
268
269 input = *inp;
270
271
272 NEXT_TOKEN
273 CONSUME(PLL_TOKEN_WHITESPACE | PLL_TOKEN_NEWLINE)
274
275 if (token.tokenType != PLL_TOKEN_NUMBER) return (0);
276
277 *sequenceCount = atoi (token.lexeme);
278
279 NEXT_TOKEN
280 CONSUME(PLL_TOKEN_WHITESPACE | PLL_TOKEN_NEWLINE)
281 if (token.tokenType != PLL_TOKEN_NUMBER) return (0);
282
283 *sequenceLength = atoi (token.lexeme);
284
285 *inp = input;
286
287 return (*sequenceCount && *sequenceLength);
288 }
289
290 static __inline int
parsedOk(int * actLen,int sequenceCount,int sequenceLength)291 parsedOk (int * actLen, int sequenceCount, int sequenceLength)
292 {
293 int i;
294
295 for (i = 1; i <= sequenceCount; ++ i)
296 {
297 if (actLen[i] != sequenceLength) return (0);
298 }
299
300 return (1);
301 }
302
303
304 /** @ingroup alignmentGroup
305 @brief Parse the PHYLIP file body
306 */
307 static int
parse_phylip(pllAlignmentData * alignmentData,int input)308 parse_phylip (pllAlignmentData * alignmentData, int input)
309 {
310 int i,j;
311 pllLexToken token;
312 int * sequenceLength;
313 int rc;
314
315 sequenceLength = (int *) rax_calloc (alignmentData->sequenceCount + 1, sizeof (int));
316
317 NEXT_TOKEN
318 for (i = 0; ; ++i)
319 {
320 j = i % alignmentData->sequenceCount;
321 if (i < alignmentData->sequenceCount)
322 {
323 if (token.tokenType == PLL_TOKEN_EOF)
324 {
325 rc = parsedOk (sequenceLength, alignmentData->sequenceCount, alignmentData->sequenceLength);
326 rax_free (sequenceLength);
327 return (rc);
328 }
329
330 if (token.tokenType == PLL_TOKEN_UNKNOWN)
331 {
332 rax_free (sequenceLength);
333 return (0);
334 }
335
336 CONSUME(PLL_TOKEN_WHITESPACE | PLL_TOKEN_NEWLINE)
337
338
339 if (token.tokenType != PLL_TOKEN_STRING && token.tokenType != PLL_TOKEN_NUMBER && token.tokenType != PLL_TOKEN_FLOAT)
340 {
341 rax_free (sequenceLength);
342 return (0);
343 }
344 alignmentData->sequenceLabels[i + 1] = my_strndup (token.lexeme, token.len);
345 NEXT_TOKEN
346 CONSUME(PLL_TOKEN_WHITESPACE | PLL_TOKEN_NEWLINE)
347 }
348
349 while (1)
350 {
351 if (token.tokenType == PLL_TOKEN_EOF)
352 {
353 rc = parsedOk (sequenceLength, alignmentData->sequenceCount, alignmentData->sequenceLength);
354 rax_free (sequenceLength);
355 return (rc);
356 }
357
358 if (token.tokenType == PLL_TOKEN_UNKNOWN)
359 {
360 rax_free (sequenceLength);
361 return (0);
362 }
363
364 if (token.tokenType == PLL_TOKEN_NEWLINE) break;
365
366 if (token.tokenType != PLL_TOKEN_STRING)
367 {
368 rax_free (sequenceLength);
369 return (0);
370 }
371
372 if (sequenceLength[j + 1] + token.len > alignmentData->sequenceLength)
373 {
374 fprintf (stderr, "Sequence %d is larger than specified\n", j + 1);
375 rax_free (sequenceLength);
376 return (0);
377 }
378 memmove (alignmentData->sequenceData[j + 1] + sequenceLength[j + 1], token.lexeme, token.len);
379 sequenceLength[j + 1] += token.len;
380
381 NEXT_TOKEN
382 CONSUME (PLL_TOKEN_WHITESPACE)
383 }
384 CONSUME(PLL_TOKEN_WHITESPACE | PLL_TOKEN_NEWLINE);
385 }
386 }
387
388 /* Phylip parsers. Use the following attributed grammar
389 *
390 * S -> HEADER ENDL DATA
391 * HEADER -> PLL_TOKEN_NUMBER PLL_TOKEN_WHITESPACE PLL_TOKEN_NUMBER ENDL |
392 * PLL_TOKEN_WHITESPACE PLL_TOKEN_NUMBER PLL_TOKEN_WHITESPACE PLL_TOKEN_NUMBER ENDL
393 * ENDL -> PLL_TOKEN_WHITESPACE PLL_TOKEN_NEWLINE | PLL_TOKEN_NEWLINE
394 * DATA -> PLL_TOKEN_STRING PLL_TOKEN_WHITESPACE PLL_TOKEN_STRING ENDL DATA |
395 * PLL_TOKEN_WHITESPACE PLL_TOKEN_STRING PLL_TOKEN_WHITESPACE PLL_TOKEN_STRING ENDL DATA |
396 * PLL_TOKEN_STRING PLL_TOKEN_WHITESPACE PLL_TOKEN_STRING PLL_TOKEN_EOF |
397 * PLL_TOKEN_WHITESPACE PLL_TOKEN_STRING PLL_TOKEN_WHITESPACE PLL_TOKEN_STRING PLL_TOKEN_EOF
398 */
399
400 /** @ingroup alignmentGroup
401 @brief Parse a PHYLIP file
402
403 Parses the PHYLIP file \a filename and returns a ::pllAlignmentData structure
404 with the alignment.
405
406 @param filename
407 Name of file to be parsed
408
409 @return
410 Returns a structure of type ::pllAlignmentData that contains the alignment, or \b NULL
411 in case of failure.
412 */
413 static pllAlignmentData *
pllParsePHYLIP(const char * filename)414 pllParsePHYLIP (const char * filename)
415 {
416 int
417 i, input, sequenceCount, sequenceLength;
418 char * rawdata;
419 long filesize;
420 pllAlignmentData * alignmentData;
421
422 rawdata = pllReadFile (filename, &filesize);
423 if (!rawdata)
424 {
425 errno = PLL_ERROR_FILE_OPEN;
426 return (NULL);
427 }
428
429 init_lexan (rawdata, filesize);
430 input = get_next_symbol();
431
432 /* parse the header to obtain the number of taxa and sequence length */
433 if (!read_phylip_header (&input, &sequenceCount, &sequenceLength))
434 {
435 rax_free (rawdata);
436 fprintf (stderr, "Error while parsing PHYLIP header (number of taxa and sequence length)\n");
437 errno = PLL_ERROR_PHYLIP_HEADER_SYNTAX;
438 return (NULL);
439 }
440
441 lex_table_amend_phylip();
442
443 /* allocate alignment structure */
444 alignmentData = pllInitAlignmentData (sequenceCount, sequenceLength);
445
446 if (! parse_phylip (alignmentData, input))
447 {
448 errno = PLL_ERROR_PHYLIP_BODY_SYNTAX;
449 pllAlignmentDataDestroy (alignmentData);
450 lex_table_restore();
451 rax_free (rawdata);
452 return (NULL);
453 }
454
455 lex_table_restore();
456 rax_free (rawdata);
457
458 alignmentData->siteWeights = (int *) rax_malloc (alignmentData->sequenceLength * sizeof (int));
459 for (i = 0; i < alignmentData->sequenceLength; ++ i)
460 alignmentData->siteWeights[i] = 1;
461
462 return (alignmentData);
463 }
464
465 pllAlignmentData *
pllParsePHYLIPString(const char * rawdata,long filesize)466 pllParsePHYLIPString (const char *rawdata, long filesize)
467 {
468 int
469 i, input, sequenceCount, sequenceLength;
470 // char * rawdata;
471 // long filesize;
472 pllAlignmentData * alignmentData;
473
474 // rawdata = pllReadFile (filename, &filesize);
475 // if (!rawdata)
476 // {
477 // errno = PLL_ERROR_FILE_OPEN;
478 // return (NULL);
479 // }
480
481 init_lexan (rawdata, filesize);
482 input = get_next_symbol();
483
484 /* parse the header to obtain the number of taxa and sequence length */
485 if (!read_phylip_header (&input, &sequenceCount, &sequenceLength))
486 {
487 // rax_free (rawdata);
488 fprintf (stderr, "Error while parsing PHYLIP header (number of taxa and sequence length)\n");
489 errno = PLL_ERROR_PHYLIP_HEADER_SYNTAX;
490 return (NULL);
491 }
492
493 lex_table_amend_phylip();
494
495 /* allocate alignment structure */
496 alignmentData = pllInitAlignmentData (sequenceCount, sequenceLength);
497
498 if (! parse_phylip (alignmentData, input))
499 {
500 errno = PLL_ERROR_PHYLIP_BODY_SYNTAX;
501 pllAlignmentDataDestroy (alignmentData);
502 lex_table_restore();
503 // rax_free (rawdata);
504 return (NULL);
505 }
506
507 lex_table_restore();
508 // rax_free (rawdata);
509
510 alignmentData->siteWeights = (int *) rax_malloc (alignmentData->sequenceLength * sizeof (int));
511 for (i = 0; i < alignmentData->sequenceLength; ++ i)
512 alignmentData->siteWeights[i] = 1;
513
514 return (alignmentData);
515 }
516
517 /* FASTA routines */
518 /* only check whether it is a valid alignment in fasta format */
519 /** @ingroup alignmentGroup
520 @brief Get information about the FASTA alignment
521
522 Get the information such as number of sequences and length of sequences of a FASTA alignment
523
524 @return
525 Returns \b PLL_TRUE if the alignment is valid, otherwise \b PLL_FALSE
526 */
527 static int
getFastaAlignmentInfo(int * inp,int * seqCount,int * seqLen)528 getFastaAlignmentInfo (int * inp, int * seqCount, int * seqLen)
529 {
530 pllLexToken token;
531 int input;
532
533 input = *inp;
534
535 *seqCount = *seqLen = 0;
536
537 NEXT_TOKEN
538 CONSUME(PLL_TOKEN_WHITESPACE | PLL_TOKEN_NEWLINE)
539
540 if (token.tokenType != PLL_TOKEN_NUMBER && token.tokenType != PLL_TOKEN_STRING) return (PLL_FALSE);
541
542 while (1)
543 {
544 switch (token.tokenType)
545 {
546 case PLL_TOKEN_EOF:
547 return (PLL_TRUE);
548
549 case PLL_TOKEN_NUMBER:
550 case PLL_TOKEN_STRING:
551 if (token.len < 2 || token.lexeme[0] != '>') return (0);
552 break;
553 default:
554 return (PLL_FALSE);
555 }
556
557 NEXT_TOKEN
558 CONSUME(PLL_TOKEN_WHITESPACE | PLL_TOKEN_NEWLINE)
559
560 /* read second token (sequence) */
561 switch (token.tokenType)
562 {
563 case PLL_TOKEN_EOF:
564 return (PLL_FALSE);
565 break;
566
567 case PLL_TOKEN_NUMBER:
568 case PLL_TOKEN_STRING:
569 if (!*seqLen)
570 *seqLen = token.len;
571 else
572 {
573 if (*seqLen != token.len) return (0);
574 }
575 break;
576 default:
577 return (PLL_FALSE);
578 }
579 NEXT_TOKEN
580 CONSUME(PLL_TOKEN_WHITESPACE | PLL_TOKEN_NEWLINE)
581 ++ (*seqCount);
582 }
583
584 return (PLL_TRUE);
585 }
586
587 /** @ingroup alignmentGroup
588 @brief Check whether the FASTA content is valid
589 */
590 static int
parseFastaAlignment(pllAlignmentData * alignmentData,int input)591 parseFastaAlignment (pllAlignmentData * alignmentData, int input)
592 {
593 pllLexToken token;
594 int i;
595
596 NEXT_TOKEN
597 CONSUME(PLL_TOKEN_WHITESPACE | PLL_TOKEN_NEWLINE)
598
599 if (token.tokenType != PLL_TOKEN_NUMBER && token.tokenType != PLL_TOKEN_STRING) return (0);
600
601 i = 1;
602 while (1)
603 {
604 /* first parse the sequence label */
605 switch (token.tokenType)
606 {
607 case PLL_TOKEN_EOF:
608 return (1);
609 break;
610
611 case PLL_TOKEN_NUMBER:
612 case PLL_TOKEN_STRING:
613 alignmentData->sequenceLabels[i] = my_strndup (token.lexeme + 1, token.len - 1);
614 break;
615 default:
616 return (0);
617 }
618
619 NEXT_TOKEN
620 CONSUME(PLL_TOKEN_WHITESPACE | PLL_TOKEN_NEWLINE)
621
622 /* now parse the sequence itself */
623 switch (token.tokenType)
624 {
625 case PLL_TOKEN_EOF:
626 return (0);
627 break;
628
629 case PLL_TOKEN_NUMBER:
630 case PLL_TOKEN_STRING:
631 memmove (alignmentData->sequenceData[i], token.lexeme, token.len);
632 break;
633 default:
634 return (0);
635 }
636 NEXT_TOKEN
637 CONSUME(PLL_TOKEN_WHITESPACE | PLL_TOKEN_NEWLINE)
638 ++ i;
639 }
640 }
641
642
643 /** @ingroup alignmentGroup
644 @brief Parse a FASTA file
645
646 Parses the FASTA file \a filename and returns a ::pllAlignmentData structure
647 with the alignment.
648
649 @param filename
650 Name of file to be parsed
651
652 @return
653 Returns a structure of type ::pllAlignmentData that contains the alignment, or \b NULL
654 in case of failure.
655 */
656 static pllAlignmentData *
pllParseFASTA(const char * filename)657 pllParseFASTA (const char * filename)
658 {
659 int
660 i,
661 seqLen,
662 seqCount,
663 input;
664 long filesize;
665
666 char * rawdata;
667 pllAlignmentData * alignmentData;
668
669 rawdata = pllReadFile (filename, &filesize);
670 if (!rawdata)
671 {
672 errno = PLL_ERROR_FILE_OPEN;
673 return (NULL);
674 }
675
676 lex_table_amend_fasta ();
677
678 init_lexan (rawdata, filesize);
679 input = get_next_symbol ();
680
681
682 if (!getFastaAlignmentInfo (&input, &seqCount, &seqLen))
683 {
684 errno = PLL_ERROR_FASTA_SYNTAX;
685 lex_table_restore ();
686 rax_free (rawdata);
687 return (NULL);
688 }
689
690 alignmentData = pllInitAlignmentData (seqCount, seqLen);
691
692 printf ("\n---------------\n\n");
693
694 init_lexan (rawdata, filesize);
695 input = get_next_symbol ();
696
697 if (!parseFastaAlignment (alignmentData, input))
698 {
699 errno = PLL_ERROR_FASTA_SYNTAX;
700 pllAlignmentDataDestroy (alignmentData);
701 lex_table_restore();
702 rax_free(rawdata);
703 return (NULL);
704 }
705
706 /* allocate alignment structure */
707
708
709 lex_table_restore ();
710 rax_free (rawdata);
711
712 alignmentData->siteWeights = (int *) rax_malloc (alignmentData->sequenceLength * sizeof (int));
713 for (i = 0; i < alignmentData->sequenceLength; ++ i)
714 alignmentData->siteWeights[i] = 1;
715
716 return (alignmentData);
717 }
718
719
720
721 /** @ingroup alignmentGroup
722 @brief Parse a file that contains a multiple sequence alignment
723
724 Parses the file \a filename of type \a fileType which contains a multiple sequence alignment.
725 The supported file types are the sequential and interleaved versions of PHYLIP format, and
726 the FASTA format. The parsed alignment is returned as a pointer to a structure of type
727 ::pllAlignmentData
728
729 @param fileType
730 Type of file to parse. Can be either \b PLL_FORMAT_PHYLIP or \b PLL_FORMAT_FASTA
731
732 @param filename
733 Name of file to parse
734
735 @return
736 Returns a structure of type ::pllAlignmentData that contains the multiple sequence alignment,
737 otherwise returns \b NULL in case of failure.
738 */
739 pllAlignmentData *
pllParseAlignmentFile(int fileType,const char * filename)740 pllParseAlignmentFile (int fileType, const char * filename)
741 {
742
743 switch (fileType)
744 {
745 case PLL_FORMAT_PHYLIP:
746 return (pllParsePHYLIP (filename));
747 case PLL_FORMAT_FASTA:
748 return (pllParseFASTA (filename));
749 default:
750 /* RTFM */
751 errno = PLL_ERROR_INVALID_FILETYPE;
752 return (NULL);
753 }
754 }
755