1 /**
2  * PLL (version 1.0.0) a software library for phylogenetic inference
3  * Copyright (C) 2013 Tomas Flouri and Alexandros Stamatakis
4  *
5  * Derived from
6  * RAxML-HPC, a program for sequential and parallel estimation of phylogenetic
7  * trees by Alexandros Stamatakis
8  *
9  * This program is free software: you can redistribute it and/or modify it
10  * under the terms of the GNU General Public License as published by the Free
11  * Software Foundation, either version 3 of the License, or (at your option)
12  * any later version.
13  *
14  * This program is distributed in the hope that it will be useful, but WITHOUT
15  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
16  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
17  * more details.
18  *
19  * You should have received a copy of the GNU General Public License along with
20  * this program.  If not, see <http://www.gnu.org/licenses/>.
21  *
22  * For any other enquiries send an Email to Tomas Flouri
23  * Tomas.Flouri@h-its.org
24  *
25  * When publishing work that uses PLL please cite PLL
26  *
27  * @file alignment.c
28  *
29  * @brief Collection of routines for reading alignments
30  *
31  * Auxiliary functions for storing alignments read from predefined file formats
32  */
33 
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <assert.h>
38 
39 #include "pll.h"
40 #include "pllInternal.h"
41 
42 /** @defgroup alignmentGroup Reading and parsing multiple sequence alignments
43 
44     This set of functions handles the reading and parsing of several file formats that describe multiple sequence alignments. They are also responsible for storing the alignment in an internal structure
45 */
46 static pllAlignmentData * pllParsePHYLIP (const char * filename);
47 static pllAlignmentData * pllParseFASTA (const char * filename);
48 static int read_phylip_header (int * inp, int * sequenceCount, int * sequenceLength);
49 static __inline int parsedOk (int * actLen, int sequenceCount, int sequenceLength);
50 static int parse_phylip (pllAlignmentData * alignmentData, int input);
51 static int getFastaAlignmentInfo (int * inp, int * seqCount, int * seqLen);
52 static int parseFastaAlignment (pllAlignmentData * alignmentData, int input);
53 
54 #ifdef __PLL_DEBUG_PARSER
55 static int
printTokens(int input)56 printTokens (int input)
57 {
58   pllLexToken token;
59 
60   do
61    {
62      NEXT_TOKEN
63 
64      /* begin of parser */
65      switch (token.tokenType)
66       {
67         case PLL_TOKEN_NUMBER:
68           printf ("PLL_TOKEN_NUMBER (%.*s, %d)\n", token.len, token.lexeme, token.len);
69           break;
70         case PLL_TOKEN_STRING:
71           printf ("PLL_TOKEN_STRING (%.*s, %d)\n", token.len, token.lexeme, token.len);
72           break;
73         case PLL_TOKEN_EOF:
74           printf ("PLL_TOKEN_EOF\n");
75           break;
76         case PLL_TOKEN_WHITESPACE:
77           printf ("PLL_TOKEN_WHITESPACE\n");
78           break;
79         case PLL_TOKEN_NEWLINE:
80           printf ("PLL_TOKEN_NEWLINE\n");
81           break;
82         case PLL_TOKEN_UNKNOWN:
83           printf ("PLL_TOKEN_UNKNOWN (%.*s, %d)\n", token.len, token.lexeme, token.len);
84           break;
85         default:
86           break;
87       }
88      /* end of parser */
89 
90 
91    }
92   while (token.tokenType != PLL_TOKEN_EOF && token.tokenType != PLL_TOKEN_UNKNOWN);
93 
94   if (token.tokenType == PLL_TOKEN_UNKNOWN) return (0);
95 
96   return (1);
97 }
98 #endif
99 
100 /** @ingroup alignmentGroup
101     @brief Initialize alignment structure fields
102 
103     Allocates memory for the data structure that will hold the alignment and
104     initializes it. It requires the number of sequences \a sequenceCount and
105     the length of sequences \a sequenceLength. It returns a pointer to the
106     initialized data structure.
107 
108     @param sequenceCount
109       Number of sequences in the alignment
110 
111     @param sequenceLength
112       Length of the sequences
113 
114     @param
115       Initialized alignment data structured
116 */
117 pllAlignmentData *
pllInitAlignmentData(int sequenceCount,int sequenceLength)118 pllInitAlignmentData (int sequenceCount, int sequenceLength)
119  {
120    int i;
121    pllAlignmentData * alignmentData;
122    //void * mem;
123    //TUNG
124    unsigned char *mem;
125 
126 
127    /** TODO */
128    alignmentData               =  (pllAlignmentData *) rax_malloc (sizeof (pllAlignmentData));
129    alignmentData->sequenceData = (unsigned char **) rax_malloc ((sequenceCount + 1) * sizeof (unsigned char *));
130    //mem = (void *) rax_malloc (sizeof (unsigned char) * (sequenceLength + 1) * sequenceCount);
131    //TUNG
132    mem = (unsigned char *)rax_malloc(sizeof(unsigned char) * (sequenceLength + 1) * sequenceCount);
133    for (i = 1; i <= sequenceCount; ++i)
134     {
135       alignmentData->sequenceData[i]                 = (unsigned char *) (&mem[sizeof (unsigned char) * (i - 1) * (sequenceLength + 1)]);
136       alignmentData->sequenceData[i][sequenceLength] = 0;
137     }
138    alignmentData->sequenceData[0] = NULL;
139 
140    alignmentData->sequenceLabels = (char **) rax_calloc ((sequenceCount + 1), sizeof (char *));
141 
142    alignmentData->sequenceCount  = sequenceCount;
143    alignmentData->sequenceLength = sequenceLength;
144    alignmentData->originalSeqLength = sequenceLength;
145 
146    /** TODO: remove siteWeights from alignment */
147    alignmentData->siteWeights    = NULL;
148 
149    return (alignmentData);
150  }
151 
152 /** @ingroup alignmentGroup
153     @brief Deallocates the memory associated with the alignment data structure
154 
155     Deallocates the memory associated with the alignment data structure \a alignmentData.
156 
157     @param alignmentData
158       The alignment data structure
159 */
160 void
pllAlignmentDataDestroy(pllAlignmentData * alignmentData)161 pllAlignmentDataDestroy (pllAlignmentData * alignmentData)
162 {
163   int i;
164 
165   for (i = 1; i <= alignmentData->sequenceCount; ++ i)
166    {
167      rax_free (alignmentData->sequenceLabels[i]);
168    }
169   rax_free (alignmentData->sequenceLabels);
170   rax_free (alignmentData->sequenceData[1]);
171   rax_free (alignmentData->sequenceData);
172   rax_free (alignmentData->siteWeights);
173   rax_free (alignmentData);
174 }
175 
176 
177 /** @ingroup alignmentGroup
178     @brief Prints the alignment to the console
179 
180     @param alignmentData
181       The alignment data structure
182 */
183 void
pllAlignmentDataDumpConsole(pllAlignmentData * alignmentData)184 pllAlignmentDataDumpConsole (pllAlignmentData * alignmentData)
185  {
186    int i;
187 
188    printf ("%d %d\n", alignmentData->sequenceCount, alignmentData->sequenceLength);
189    for (i = 1; i <= alignmentData->sequenceCount; ++ i)
190     {
191       printf ("%s %s\n", alignmentData->sequenceLabels[i], alignmentData->sequenceData[i]);
192     }
193  }
194 
195 
196 
dump_fasta_content(FILE * fp,pllAlignmentData * alignmentData)197 static void dump_fasta_content(FILE * fp, pllAlignmentData * alignmentData)
198 {
199   int i;
200 
201   for (i = 1; i <= alignmentData->sequenceCount; ++i)
202      fprintf (fp, ">%s\n%s\n", alignmentData->sequenceLabels[i], alignmentData->sequenceData[i]);
203 }
204 
dump_phylip_content(FILE * fp,pllAlignmentData * alignmentData)205 static void dump_phylip_content(FILE * fp, pllAlignmentData * alignmentData)
206 {
207   int i;
208 
209   for (i = 1; i <= alignmentData->sequenceCount; ++i)
210      fprintf (fp, "%s %s\n", alignmentData->sequenceLabels[i], alignmentData->sequenceData[i]);
211 }
212 
213 /** @ingroup alignmentGroup
214     @brief Dump the alignment to a file of format \a fileFormat
215 
216     Dumps the alignment contained in \a alignmentData to file \a filename of type \a fileFormat.
217 
218     @note If \a filename exists, all contents will be erased
219 
220     @param alignmentData
221       Alignment data structure
222 
223     @param fileFormat
224       Format of output file. Can take the value \b PLL_FORMAT_PHYLIP or \b PLL_FORMAT_FASTA
225 
226     @param filename
227       Output filename
228 
229     @return
230       Returns \b PLL_TRUE on success, otherwise \b PLL_FALSE.
231 */
232 int
pllAlignmentDataDumpFile(pllAlignmentData * alignmentData,int fileFormat,const char * filename)233 pllAlignmentDataDumpFile (pllAlignmentData * alignmentData, int fileFormat, const char * filename)
234 {
235   FILE * fp;
236   void (*outfun)(FILE *, pllAlignmentData *);
237 
238   if (fileFormat != PLL_FORMAT_PHYLIP && fileFormat != PLL_FORMAT_FASTA) return (PLL_FALSE);
239 
240   outfun = (fileFormat == PLL_FORMAT_PHYLIP) ? dump_phylip_content : dump_fasta_content;
241 
242   fp = fopen (filename,"wb");
243   if (!fp) return (PLL_FALSE);
244 
245   /* if PHYLIP print the silly header at the beginning */
246   if (fileFormat == PLL_FORMAT_PHYLIP)
247    {
248      fprintf (fp, "%d %d\n", alignmentData->sequenceCount, alignmentData->sequenceLength);
249    }
250 
251   outfun(fp, alignmentData);
252 
253   fclose (fp);
254   return (PLL_TRUE);
255 }
256 
257 
258 
259 /* ROUTINES FOR PHYLIP PARSING */
260 /** @ingroup alignmentGroup
261     @brief Parse the PHYLIP file header
262 */
263 static int
read_phylip_header(int * inp,int * sequenceCount,int * sequenceLength)264 read_phylip_header (int * inp, int * sequenceCount, int * sequenceLength)
265 {
266   pllLexToken token;
267   int input;
268 
269   input = *inp;
270 
271 
272   NEXT_TOKEN
273   CONSUME(PLL_TOKEN_WHITESPACE | PLL_TOKEN_NEWLINE)
274 
275   if (token.tokenType != PLL_TOKEN_NUMBER) return (0);
276 
277   *sequenceCount = atoi (token.lexeme);
278 
279   NEXT_TOKEN
280   CONSUME(PLL_TOKEN_WHITESPACE | PLL_TOKEN_NEWLINE)
281   if (token.tokenType != PLL_TOKEN_NUMBER) return (0);
282 
283   *sequenceLength = atoi (token.lexeme);
284 
285   *inp = input;
286 
287   return (*sequenceCount && *sequenceLength);
288 }
289 
290 static __inline int
parsedOk(int * actLen,int sequenceCount,int sequenceLength)291 parsedOk (int * actLen, int sequenceCount, int sequenceLength)
292 {
293   int i;
294 
295   for (i = 1; i <= sequenceCount; ++ i)
296    {
297      if (actLen[i] != sequenceLength) return (0);
298    }
299 
300   return (1);
301 }
302 
303 
304 /** @ingroup alignmentGroup
305     @brief Parse the PHYLIP file body
306 */
307 static int
parse_phylip(pllAlignmentData * alignmentData,int input)308 parse_phylip (pllAlignmentData * alignmentData, int input)
309 {
310   int i,j;
311   pllLexToken token;
312   int * sequenceLength;
313   int rc;
314 
315   sequenceLength = (int *) rax_calloc (alignmentData->sequenceCount + 1, sizeof (int));
316 
317   NEXT_TOKEN
318   for (i = 0; ; ++i)
319   {
320     j = i % alignmentData->sequenceCount;
321     if (i < alignmentData->sequenceCount)
322      {
323        if (token.tokenType == PLL_TOKEN_EOF)
324         {
325           rc = parsedOk (sequenceLength, alignmentData->sequenceCount, alignmentData->sequenceLength);
326           rax_free (sequenceLength);
327           return (rc);
328         }
329 
330        if (token.tokenType == PLL_TOKEN_UNKNOWN)
331         {
332           rax_free (sequenceLength);
333           return (0);
334         }
335 
336        CONSUME(PLL_TOKEN_WHITESPACE | PLL_TOKEN_NEWLINE)
337 
338 
339        if (token.tokenType != PLL_TOKEN_STRING && token.tokenType != PLL_TOKEN_NUMBER && token.tokenType != PLL_TOKEN_FLOAT)
340         {
341           rax_free (sequenceLength);
342           return (0);
343         }
344        alignmentData->sequenceLabels[i + 1] = my_strndup (token.lexeme, token.len);
345        NEXT_TOKEN
346        CONSUME(PLL_TOKEN_WHITESPACE | PLL_TOKEN_NEWLINE)
347      }
348 
349     while (1)
350      {
351        if (token.tokenType == PLL_TOKEN_EOF)
352         {
353           rc = parsedOk (sequenceLength, alignmentData->sequenceCount, alignmentData->sequenceLength);
354           rax_free (sequenceLength);
355           return (rc);
356         }
357 
358        if (token.tokenType == PLL_TOKEN_UNKNOWN)
359         {
360          rax_free (sequenceLength);
361          return (0);
362         }
363 
364        if (token.tokenType == PLL_TOKEN_NEWLINE) break;
365 
366        if (token.tokenType != PLL_TOKEN_STRING)
367         {
368           rax_free (sequenceLength);
369           return (0);
370         }
371 
372        if (sequenceLength[j + 1] + token.len > alignmentData->sequenceLength)
373         {
374           fprintf (stderr, "Sequence %d is larger than specified\n", j + 1);
375           rax_free (sequenceLength);
376           return (0);
377         }
378        memmove (alignmentData->sequenceData[j + 1] + sequenceLength[j + 1], token.lexeme, token.len);
379        sequenceLength[j + 1] += token.len;
380 
381        NEXT_TOKEN
382        CONSUME (PLL_TOKEN_WHITESPACE)
383      }
384     CONSUME(PLL_TOKEN_WHITESPACE | PLL_TOKEN_NEWLINE);
385   }
386 }
387 
388 /* Phylip parsers. Use the following attributed grammar
389  *
390  *        S -> HEADER ENDL DATA
391  *   HEADER -> PLL_TOKEN_NUMBER PLL_TOKEN_WHITESPACE PLL_TOKEN_NUMBER ENDL |
392  *             PLL_TOKEN_WHITESPACE PLL_TOKEN_NUMBER PLL_TOKEN_WHITESPACE PLL_TOKEN_NUMBER ENDL
393  *     ENDL -> PLL_TOKEN_WHITESPACE PLL_TOKEN_NEWLINE | PLL_TOKEN_NEWLINE
394  *     DATA -> PLL_TOKEN_STRING PLL_TOKEN_WHITESPACE PLL_TOKEN_STRING ENDL DATA |
395  *             PLL_TOKEN_WHITESPACE PLL_TOKEN_STRING PLL_TOKEN_WHITESPACE PLL_TOKEN_STRING ENDL DATA |
396  *             PLL_TOKEN_STRING PLL_TOKEN_WHITESPACE PLL_TOKEN_STRING PLL_TOKEN_EOF |
397  *             PLL_TOKEN_WHITESPACE PLL_TOKEN_STRING PLL_TOKEN_WHITESPACE PLL_TOKEN_STRING PLL_TOKEN_EOF
398  */
399 
400 /** @ingroup alignmentGroup
401     @brief Parse a PHYLIP file
402 
403     Parses the PHYLIP file \a filename and returns a ::pllAlignmentData structure
404     with the alignment.
405 
406     @param filename
407       Name of file to be parsed
408 
409     @return
410       Returns a structure of type ::pllAlignmentData that contains the alignment, or \b NULL
411       in case of failure.
412 */
413 static pllAlignmentData *
pllParsePHYLIP(const char * filename)414 pllParsePHYLIP (const char * filename)
415 {
416   int
417     i, input, sequenceCount, sequenceLength;
418   char * rawdata;
419   long filesize;
420   pllAlignmentData * alignmentData;
421 
422   rawdata = pllReadFile (filename, &filesize);
423   if (!rawdata)
424    {
425      errno = PLL_ERROR_FILE_OPEN;
426      return (NULL);
427    }
428 
429   init_lexan (rawdata, filesize);
430   input = get_next_symbol();
431 
432   /* parse the header to obtain the number of taxa and sequence length */
433   if (!read_phylip_header (&input, &sequenceCount, &sequenceLength))
434    {
435      rax_free (rawdata);
436      fprintf (stderr, "Error while parsing PHYLIP header (number of taxa and sequence length)\n");
437      errno = PLL_ERROR_PHYLIP_HEADER_SYNTAX;
438      return (NULL);
439    }
440 
441   lex_table_amend_phylip();
442 
443   /* allocate alignment structure */
444   alignmentData = pllInitAlignmentData (sequenceCount, sequenceLength);
445 
446   if (! parse_phylip (alignmentData, input))
447    {
448      errno = PLL_ERROR_PHYLIP_BODY_SYNTAX;
449      pllAlignmentDataDestroy (alignmentData);
450      lex_table_restore();
451      rax_free (rawdata);
452      return (NULL);
453    }
454 
455   lex_table_restore();
456   rax_free (rawdata);
457 
458   alignmentData->siteWeights  = (int *) rax_malloc (alignmentData->sequenceLength * sizeof (int));
459   for (i = 0; i < alignmentData->sequenceLength; ++ i)
460     alignmentData->siteWeights[i] = 1;
461 
462   return (alignmentData);
463 }
464 
465 pllAlignmentData *
pllParsePHYLIPString(const char * rawdata,long filesize)466 pllParsePHYLIPString (const char *rawdata, long filesize)
467 {
468   int
469     i, input, sequenceCount, sequenceLength;
470 //  char * rawdata;
471 //  long filesize;
472   pllAlignmentData * alignmentData;
473 
474 //  rawdata = pllReadFile (filename, &filesize);
475 //  if (!rawdata)
476 //   {
477 //     errno = PLL_ERROR_FILE_OPEN;
478 //     return (NULL);
479 //   }
480 
481   init_lexan (rawdata, filesize);
482   input = get_next_symbol();
483 
484   /* parse the header to obtain the number of taxa and sequence length */
485   if (!read_phylip_header (&input, &sequenceCount, &sequenceLength))
486    {
487 //     rax_free (rawdata);
488      fprintf (stderr, "Error while parsing PHYLIP header (number of taxa and sequence length)\n");
489      errno = PLL_ERROR_PHYLIP_HEADER_SYNTAX;
490      return (NULL);
491    }
492 
493   lex_table_amend_phylip();
494 
495   /* allocate alignment structure */
496   alignmentData = pllInitAlignmentData (sequenceCount, sequenceLength);
497 
498   if (! parse_phylip (alignmentData, input))
499    {
500      errno = PLL_ERROR_PHYLIP_BODY_SYNTAX;
501      pllAlignmentDataDestroy (alignmentData);
502      lex_table_restore();
503 //     rax_free (rawdata);
504      return (NULL);
505    }
506 
507   lex_table_restore();
508 //  rax_free (rawdata);
509 
510   alignmentData->siteWeights  = (int *) rax_malloc (alignmentData->sequenceLength * sizeof (int));
511   for (i = 0; i < alignmentData->sequenceLength; ++ i)
512     alignmentData->siteWeights[i] = 1;
513 
514   return (alignmentData);
515 }
516 
517 /* FASTA routines */
518 /* only check whether it is a valid alignment in fasta format */
519 /** @ingroup alignmentGroup
520     @brief Get information about the FASTA alignment
521 
522     Get the information such as number of sequences and length of sequences of a FASTA alignment
523 
524     @return
525       Returns \b PLL_TRUE if the alignment is valid, otherwise \b PLL_FALSE
526 */
527 static int
getFastaAlignmentInfo(int * inp,int * seqCount,int * seqLen)528 getFastaAlignmentInfo (int * inp, int * seqCount, int * seqLen)
529 {
530   pllLexToken token;
531   int input;
532 
533   input = *inp;
534 
535   *seqCount = *seqLen = 0;
536 
537   NEXT_TOKEN
538   CONSUME(PLL_TOKEN_WHITESPACE | PLL_TOKEN_NEWLINE)
539 
540   if (token.tokenType != PLL_TOKEN_NUMBER && token.tokenType != PLL_TOKEN_STRING) return (PLL_FALSE);
541 
542   while (1)
543    {
544      switch (token.tokenType)
545       {
546         case PLL_TOKEN_EOF:
547           return (PLL_TRUE);
548 
549         case PLL_TOKEN_NUMBER:
550         case PLL_TOKEN_STRING:
551           if (token.len < 2 || token.lexeme[0] != '>') return (0);
552           break;
553         default:
554           return (PLL_FALSE);
555       }
556 
557      NEXT_TOKEN
558      CONSUME(PLL_TOKEN_WHITESPACE | PLL_TOKEN_NEWLINE)
559 
560      /* read second token (sequence) */
561      switch (token.tokenType)
562       {
563         case PLL_TOKEN_EOF:
564           return (PLL_FALSE);
565           break;
566 
567         case PLL_TOKEN_NUMBER:
568         case PLL_TOKEN_STRING:
569           if (!*seqLen)
570             *seqLen = token.len;
571           else
572            {
573              if (*seqLen != token.len) return (0);
574            }
575           break;
576         default:
577           return (PLL_FALSE);
578       }
579      NEXT_TOKEN
580      CONSUME(PLL_TOKEN_WHITESPACE | PLL_TOKEN_NEWLINE)
581      ++ (*seqCount);
582    }
583 
584   return (PLL_TRUE);
585 }
586 
587 /** @ingroup alignmentGroup
588     @brief Check whether the FASTA content is valid
589 */
590 static int
parseFastaAlignment(pllAlignmentData * alignmentData,int input)591 parseFastaAlignment (pllAlignmentData * alignmentData, int input)
592 {
593   pllLexToken token;
594   int i;
595 
596   NEXT_TOKEN
597   CONSUME(PLL_TOKEN_WHITESPACE | PLL_TOKEN_NEWLINE)
598 
599   if (token.tokenType != PLL_TOKEN_NUMBER && token.tokenType != PLL_TOKEN_STRING) return (0);
600 
601   i = 1;
602   while (1)
603    {
604      /* first parse the sequence label */
605      switch (token.tokenType)
606       {
607         case PLL_TOKEN_EOF:
608           return (1);
609           break;
610 
611         case PLL_TOKEN_NUMBER:
612         case PLL_TOKEN_STRING:
613           alignmentData->sequenceLabels[i] = my_strndup (token.lexeme + 1, token.len - 1);
614           break;
615         default:
616           return (0);
617       }
618 
619      NEXT_TOKEN
620      CONSUME(PLL_TOKEN_WHITESPACE | PLL_TOKEN_NEWLINE)
621 
622      /* now parse the sequence itself */
623      switch (token.tokenType)
624       {
625         case PLL_TOKEN_EOF:
626           return (0);
627           break;
628 
629         case PLL_TOKEN_NUMBER:
630         case PLL_TOKEN_STRING:
631           memmove (alignmentData->sequenceData[i], token.lexeme, token.len);
632           break;
633         default:
634           return (0);
635       }
636      NEXT_TOKEN
637      CONSUME(PLL_TOKEN_WHITESPACE | PLL_TOKEN_NEWLINE)
638      ++ i;
639    }
640 }
641 
642 
643 /** @ingroup alignmentGroup
644     @brief Parse a FASTA file
645 
646     Parses the FASTA file \a filename and returns a ::pllAlignmentData structure
647     with the alignment.
648 
649     @param filename
650       Name of file to be parsed
651 
652     @return
653       Returns a structure of type ::pllAlignmentData that contains the alignment, or \b NULL
654       in case of failure.
655 */
656 static pllAlignmentData *
pllParseFASTA(const char * filename)657 pllParseFASTA (const char * filename)
658 {
659   int
660     i,
661     seqLen,
662     seqCount,
663     input;
664   long filesize;
665 
666   char * rawdata;
667   pllAlignmentData * alignmentData;
668 
669   rawdata = pllReadFile (filename, &filesize);
670   if (!rawdata)
671    {
672      errno = PLL_ERROR_FILE_OPEN;
673      return (NULL);
674    }
675 
676   lex_table_amend_fasta ();
677 
678   init_lexan (rawdata, filesize);
679   input = get_next_symbol ();
680 
681 
682   if (!getFastaAlignmentInfo (&input, &seqCount, &seqLen))
683    {
684      errno = PLL_ERROR_FASTA_SYNTAX;
685      lex_table_restore ();
686      rax_free (rawdata);
687      return (NULL);
688    }
689 
690   alignmentData = pllInitAlignmentData (seqCount, seqLen);
691 
692   printf ("\n---------------\n\n");
693 
694   init_lexan (rawdata, filesize);
695   input = get_next_symbol ();
696 
697   if (!parseFastaAlignment (alignmentData, input))
698    {
699      errno = PLL_ERROR_FASTA_SYNTAX;
700      pllAlignmentDataDestroy (alignmentData);
701      lex_table_restore();
702      rax_free(rawdata);
703      return (NULL);
704    }
705 
706   /* allocate alignment structure */
707 
708 
709   lex_table_restore ();
710   rax_free (rawdata);
711 
712   alignmentData->siteWeights = (int *) rax_malloc (alignmentData->sequenceLength * sizeof (int));
713   for (i = 0; i < alignmentData->sequenceLength; ++ i)
714     alignmentData->siteWeights[i] = 1;
715 
716   return (alignmentData);
717 }
718 
719 
720 
721 /** @ingroup alignmentGroup
722     @brief Parse a file that contains a multiple sequence alignment
723 
724     Parses the file \a filename of type \a fileType which contains a multiple sequence alignment.
725     The supported file types are the sequential and interleaved versions of PHYLIP format, and
726     the FASTA format. The parsed alignment is returned as a pointer to a structure of type
727     ::pllAlignmentData
728 
729     @param fileType
730       Type of file to parse. Can be either \b PLL_FORMAT_PHYLIP or \b PLL_FORMAT_FASTA
731 
732     @param filename
733       Name of file to parse
734 
735     @return
736       Returns a structure of type ::pllAlignmentData that contains the multiple sequence alignment,
737       otherwise returns \b NULL in case of failure.
738 */
739 pllAlignmentData *
pllParseAlignmentFile(int fileType,const char * filename)740 pllParseAlignmentFile (int fileType, const char * filename)
741 {
742 
743   switch (fileType)
744    {
745      case PLL_FORMAT_PHYLIP:
746        return (pllParsePHYLIP (filename));
747      case PLL_FORMAT_FASTA:
748        return (pllParseFASTA (filename));
749      default:
750        /* RTFM */
751        errno = PLL_ERROR_INVALID_FILETYPE;
752        return (NULL);
753    }
754 }
755