1 /*
2 
3 PhyML:  a program that  computes maximum likelihood phylogenies from
4 DNA or AA homologous sequences.
5 
6 Copyright (C) Stephane Guindon. Oct 2003 onward.
7 
8 All parts of the source except where indicated are distributed under
9 the GNU public licence. See http://www.opensource.org for details.
10 
11 */
12 
13 #include "nexus.h"
14 
Find_Nexus_Com(char * token,nexcom ** found_com,nexparm ** default_parm,nexcom ** com_list)15 void Find_Nexus_Com(char *token, nexcom **found_com, nexparm **default_parm, nexcom **com_list)
16 {
17   int i,j,tokenlen,ndiff;
18 
19   for(i=0;i<N_MAX_NEX_COM;i++)
20     {
21       tokenlen = strlen(token);
22       ndiff = -1;
23       if(tokenlen && (tokenlen == strlen(com_list[i]->name)))
24 	{
25 	  ndiff = 0;
26 	  for(j=0;j<tokenlen;j++)
27 	    {
28 	      Lowercase(token+j);
29 	      Lowercase(com_list[i]->name+j);
30 	      if(token[j] != com_list[i]->name[j]) ndiff++;
31 	    }
32 	}
33       if(!ndiff) { *found_com = com_list[i]; break; }
34     }
35 
36   if(*found_com && (*found_com)->nparm) *default_parm = (*found_com)->parm[0];
37 
38   /* if(*found_com) PhyML_Printf("\n. Found command '%s'.\n",(*found_com)->name); */
39 }
40 
41 //////////////////////////////////////////////////////////////
42 //////////////////////////////////////////////////////////////
43 
44 
Find_Nexus_Parm(char * token,nexparm ** found_parm,nexcom * curr_com)45 void Find_Nexus_Parm(char *token, nexparm **found_parm, nexcom *curr_com)
46 {
47   int i,j;
48   int tokenlen;
49   int ndiff;
50 
51   if(!curr_com)
52     {
53       PhyML_Printf("\n. Err in file %s at line %d\n",__FILE__,__LINE__);
54       Exit("");
55     }
56 
57   for(i=0;i<curr_com->nparm;i++)
58     {
59       tokenlen = strlen(token);
60       ndiff = -1;
61       if(tokenlen == strlen(curr_com->parm[i]->name))
62 	{
63 	  ndiff = 0;
64 	  for(j=0;j<tokenlen;j++)
65 	    {
66 	      Lowercase(token+j);
67 	      Lowercase(curr_com->parm[i]->name+j);
68 	      if(token[j] != curr_com->parm[i]->name[j]) ndiff++;
69 	    }
70 	}
71       if(!ndiff) { *found_parm = curr_com->parm[i]; break; }
72     }
73 
74   /* if(*found_parm) PhyML_Printf("\n. Found parameter '%s'.\n",(*found_parm)->name); */
75 }
76 
77 //////////////////////////////////////////////////////////////
78 //////////////////////////////////////////////////////////////
79 
80 
Read_Nexus_Taxa(char * token,nexparm * curr_parm,option * io)81 int Read_Nexus_Taxa(char *token, nexparm *curr_parm, option *io)
82 {
83 
84   PhyML_Printf("\n. Skipping 'taxa' block");
85 
86   do
87     {
88       Get_Token(io->fp_in_align,token);
89       if(token[0] == ';') break;
90     }while(strlen(token) > 0);
91 
92   fseek(io->fp_in_align,-1*sizeof(char),SEEK_CUR);
93 
94   return 1;
95 }
96 
97 //////////////////////////////////////////////////////////////
98 //////////////////////////////////////////////////////////////
99 
100 
Read_Nexus_Translate(char * token,nexparm * curr_parm,option * io)101 int Read_Nexus_Translate(char *token, nexparm *curr_parm, option *io)
102 {
103   int tax_num;
104   char *end;
105 
106   PhyML_Printf("\n. Reading 'translate' block");
107   io->size_tax_names = 0;
108 
109   do
110     {
111       Get_Token(io->fp_in_tree,token);
112       if(token[0] == ';') break;
113       tax_num = (int)strtol(token,&end,10);
114       if(*end =='\0' && token[0])
115 	{
116 	  io->size_tax_names++;
117 
118 	  io->short_tax_names = (char **)realloc(io->short_tax_names,io->size_tax_names*sizeof(char *));
119 	  io->short_tax_names[io->size_tax_names-1] = (char *)mCalloc(strlen(token)+1,sizeof(char));
120 	  sprintf(io->short_tax_names[io->size_tax_names-1],"%d",tax_num);
121 
122 	  Get_Token(io->fp_in_tree,token);
123 
124 	  io->long_tax_names = (char **)realloc(io->long_tax_names,io->size_tax_names*sizeof(char *));
125 	  io->long_tax_names[io->size_tax_names-1] = (char *)mCalloc(strlen(token)+1,sizeof(char));
126 	  strcpy(io->long_tax_names[io->size_tax_names-1],token);
127 
128 /* 	  printf("\n. Copying %s number %d",io->long_tax_names[io->size_long_tax_names-1],tax_num-1); */
129 	}
130     }while(strlen(token) > 0);
131 
132   fseek(io->fp_in_tree,-1*sizeof(char),SEEK_CUR);
133 
134   return 1;
135 }
136 
137 //////////////////////////////////////////////////////////////
138 //////////////////////////////////////////////////////////////
139 
140 
Read_Nexus_Matrix(char * token,nexparm * curr_parm,option * io)141 int Read_Nexus_Matrix(char *token, nexparm *curr_parm, option *io)
142 {
143 
144   if(io->interleaved) io->data = Read_Seq_Interleaved(io);
145   else                io->data = Read_Seq_Sequential(io);
146 
147   fseek(io->fp_in_align,-1*sizeof(char),SEEK_CUR);
148 
149   return 1;
150 }
151 
152 //////////////////////////////////////////////////////////////
153 //////////////////////////////////////////////////////////////
154 
155 
Read_Nexus_Tree(char * token,nexparm * curr_parm,option * io)156 int Read_Nexus_Tree(char *token, nexparm *curr_parm, option *io)
157 {
158   io->treelist->tree = (t_tree **)realloc(io->treelist->tree,(io->treelist->list_size+1)*sizeof(t_tree *));
159   io->tree = Read_Tree_File_Phylip(io->fp_in_tree);
160   if(!(io->treelist->list_size%10) && io->treelist->list_size > 1)
161     {
162       PhyML_Printf("\n. Reading tree %d",io->treelist->list_size);
163       if(io->tree->n_root) PhyML_Printf(" (that is a rooted tree)");
164       else                 PhyML_Printf(" (that is an unrooted tree)");
165     }
166   io->treelist->tree[io->treelist->list_size] = io->tree;
167   io->treelist->list_size++;
168   fseek(io->fp_in_tree,-1*sizeof(char),SEEK_CUR);
169   return 1;
170 }
171 
172 //////////////////////////////////////////////////////////////
173 //////////////////////////////////////////////////////////////
174 
175 
Read_Nexus_Begin(char * token,nexparm * curr_parm,option * io)176 int Read_Nexus_Begin(char *token, nexparm *curr_parm, option *io)
177 {
178   if(token[0] == '=') return 0;
179 
180   if(!curr_parm)
181     {
182       PhyML_Printf("\n. Err in file %s at line %d\n",__FILE__,__LINE__);
183       Exit("");
184     }
185 
186   if(!strcmp(curr_parm->name,"data") || !strcmp(curr_parm->name,"trees"))
187     PhyML_Printf("\n. Reading '%s' block.\n",curr_parm->value);
188   else
189     {
190       PhyML_Printf("\n. The '%s' block type is not supported by PhyML. Sorry.\n",curr_parm->name);
191       PhyML_Printf("\n. Err in file %s at line %d\n",__FILE__,__LINE__);
192       Exit("");
193     }
194 
195   return 1;
196 }
197 
198 //////////////////////////////////////////////////////////////
199 //////////////////////////////////////////////////////////////
200 
201 
Read_Nexus_Dimensions(char * token,nexparm * curr_parm,option * io)202 int Read_Nexus_Dimensions(char *token, nexparm *curr_parm, option *io)
203 {
204   if(token[0] == '=') return 0;
205 
206   if(!curr_parm)
207     {
208       PhyML_Printf("\n. Err in file %s at line %d\n",__FILE__,__LINE__);
209       Exit("");
210     }
211 
212   strcpy(curr_parm->value,token);
213 
214   if(!strcmp(curr_parm->name,"ntax"))
215     {
216       sscanf(curr_parm->value,"%d",&(io->n_otu));
217     }
218 
219   if(!strcmp(curr_parm->name,"nchar"))
220     {
221       sscanf(curr_parm->value,"%d",&(io->init_len));
222     }
223   return 1;
224 }
225 
226 //////////////////////////////////////////////////////////////
227 //////////////////////////////////////////////////////////////
228 
229 
Read_Nexus_Format(char * token,nexparm * curr_parm,option * io)230 int Read_Nexus_Format(char *token, nexparm *curr_parm, option *io)
231 {
232   int i;
233 
234   if(token[0] == '=') return 0;
235 
236   if(!curr_parm)
237     {
238       PhyML_Printf("\n. Err in file %s at line %d\n",__FILE__,__LINE__);
239       Exit("");
240     }
241 
242   For(i,strlen(token)) Lowercase(token+i);
243 
244   strcpy(curr_parm->value,token);
245 
246 
247   /* printf("\n. >> %s",curr_parm->value); */
248 
249   if(!strcmp(curr_parm->name,"datatype"))
250     {
251       if(!strcmp(curr_parm->value,"standard"))
252 	{
253 	  io->datatype = GENERIC;
254 	  io->mod->whichmodel = JC69;
255 	  io->mod->s_opt->opt_kappa  = NO;
256 	  io->mod->s_opt->opt_lambda = NO;
257 	  io->mod->ns = 2;
258 	  io->alphabet[0][0] = '0'; io->alphabet[0][1] = '\0';
259 	  io->alphabet[1][0] = '1'; io->alphabet[1][1] = '\0';
260 	}
261 
262       else if(!strcmp(curr_parm->value,"dna"))
263 	{
264 	  io->datatype = NT;
265 	  io->mod->ns = 4;
266 	}
267 
268       else if(!strcmp(curr_parm->value,"rna"))
269 	{
270 	  io->datatype = NT;
271 	  io->mod->ns = 4;
272 	}
273 
274       else if(!strcmp(curr_parm->value,"nucleotide"))
275 	{
276 	  io->datatype = NT;
277 	  io->mod->ns = 4;
278 	}
279 
280       else if(!strcmp(curr_parm->value,"protein"))
281 	{
282 	  io->datatype = AA;
283 	  io->mod->ns = 20;
284 	}
285 
286       else if(!strcmp(curr_parm->value,"continuous"))
287 	{
288 	  PhyML_Printf("\n== The 'continuous' format is not supported by PhyML. Sorry.\n");
289 	  PhyML_Printf("\n== Err. in file %s at line %d\n",__FILE__,__LINE__);
290 	  Exit("");
291 	}
292     }
293 
294   else if(!strcmp(curr_parm->name,"missing"))
295     {
296       PhyML_Printf("\n== The 'missing' subcommand is not supported by PhyML. Please remove it from the NEXUS file.");
297       PhyML_Printf("\n== Note that the characters 'X', '?' and '-' will be considered as indels by default.");
298       PhyML_Printf("\n== Err. in file %s at line %d\n",__FILE__,__LINE__);
299       Exit("");
300     }
301 
302   else if(!strcmp(curr_parm->name,"gap"))
303     {
304       PhyML_Printf("\n== The 'gap' subcommand is not supported by PhyML. Please remove it from the NEXUS file.");
305       PhyML_Printf("\n== Note that the characters 'X', '?' and '-' will be considered as indels by default.");
306       PhyML_Printf("\n== Err. in file %s at line %d\n",__FILE__,__LINE__);
307       Exit("");
308     }
309 
310   else if(!strcmp(curr_parm->name,"symbols"))
311     {
312       if(*token != '"' || *(token+strlen(token)-1) != '"')
313 	{
314 	  PhyML_Printf("\n== Symbols list is supposed to be displayed between quotation marks (e.g., \"ACTG\").\n");
315 	  PhyML_Printf("\n== Err. in file %s at line %d\n",__FILE__,__LINE__);
316 	  Exit("");
317 	}
318 
319 
320       int i,has_spaces,state_len;
321 
322       i          = 0;
323       has_spaces = 0;
324       token++; /* Get rid of the first '"' character */
325       while(token[i] != '"')  { if(token[i] == ' ') { has_spaces = 1; break; } i++; }
326 
327       io->mod->ns = 0;
328       if(!has_spaces)
329 	{
330 	  while(token[i] != '"')
331 	    {
332 	      io->alphabet[io->mod->ns][0] = token[i];
333 	      io->alphabet[io->mod->ns][1] = '\0';
334 	      io->mod->ns++;
335 	      i++;
336 	      if(io->mod->ns > T_MAX_ALPHABET)
337 		{
338 		  PhyML_Printf("\n== The alphabet cannot contain more than %d characters. Sorry.",T_MAX_ALPHABET);
339 		  PhyML_Printf("\n== Err. in file %s at line %d\n",__FILE__,__LINE__);
340 		  Exit("");
341 		}
342 	    }
343 	}
344       else
345 	{
346 	  i = 0;
347 	  do
348 	    {
349 	      state_len = 0;
350 	      while(token[i] != ' ' && token[i] != '"')
351 		{
352 		  io->alphabet[io->mod->ns][state_len] = token[i];
353 		  state_len++;
354 		  i++;
355 		  if(state_len > T_MAX_STATE)
356 		    {
357 		      PhyML_Printf("\n== A state cannot contain more than %d characters. Sorry.\n",T_MAX_STATE);
358 		      PhyML_Printf("\n== Err. in file %s at line %d\n",__FILE__,__LINE__);
359 		      Exit("");
360 		    }
361 		}
362 
363 	      io->alphabet[io->mod->ns][state_len] = '\0';
364 	      io->mod->ns++;
365 	      if(token[i] != '"') i++;
366 	    }
367 	  while(token[i] != '"');
368 	}
369 
370       int len;
371       len = strlen(io->alphabet[0]);
372       for(i=0;i<io->mod->ns;i++)
373 	{
374 	  if(strlen(io->alphabet[i]) != len)
375 	    {
376 	      PhyML_Printf("\n== All character states defined in the symbol list are supposed to have the same length.\n");
377 	      PhyML_Printf("\n== Er.r in file %s at line %d\n",__FILE__,__LINE__);
378 	      Exit("");
379 	    }
380 	}
381       io->state_len = len;
382 
383 /*       for(i=0;i<io->mod->ns;i++) PhyML_Printf("\n. '%s'",io->alphabet[i]); */
384     }
385 
386   else if(!strcmp(curr_parm->name,"equate"))
387     {
388       PhyML_Printf("\n== PhyML does not recognize the command '%s' yet. Sorry.",curr_parm->name);
389       PhyML_Printf("\n== Err. in file %s at line %d\n",__FILE__,__LINE__);
390       Exit("");
391     }
392 
393   else if(!strcmp(curr_parm->name,"matchchar"))
394     {
395       PhyML_Printf("\n== PhyML does not recognize the command '%s' yet. Sorry.",curr_parm->name);
396       PhyML_Printf("\n== Err. in file %s at line %d\n",__FILE__,__LINE__);
397       Exit("");
398     }
399 
400   else if(!strcmp(curr_parm->name,"items"))
401     {
402       PhyML_Printf("\n== PhyML does not recognize the command '%s' yet. Sorry.",curr_parm->name);
403       PhyML_Printf("\n== Err. in file %s at line %d\n",__FILE__,__LINE__);
404       Exit("");
405     }
406 
407   else if(!strcmp(curr_parm->name,"interleave"))
408     {
409       io->interleaved = YES;
410     }
411 
412   return 1;
413 }
414 
415 //////////////////////////////////////////////////////////////
416 //////////////////////////////////////////////////////////////
417 
418 
Read_Nexus_Eliminate(char * token,nexparm * curr_parm,option * io)419 int Read_Nexus_Eliminate(char *token, nexparm *curr_parm, option *io)
420 {
421   if(token[0] == '=') return 0;
422 
423   PhyML_Printf("\n== 'Eliminate' command is not supported by PhyML. Sorry.");
424   PhyML_Printf("\n== Err. in file %s at line %d\n",__FILE__,__LINE__);
425   Exit("");
426 
427   return 1;
428 }
429 
430 //////////////////////////////////////////////////////////////
431 //////////////////////////////////////////////////////////////
432 
433 
Read_Nexus_Taxlabel(char * token,nexparm * curr_parm,option * io)434 int Read_Nexus_Taxlabel(char *token, nexparm *curr_parm, option *io)
435 {
436   if(token[0] == '=') return 0;
437 
438   PhyML_Printf("\n== 'Taxlabels' command is not supported by PhyML. Sorry.");
439   PhyML_Printf("\n== Err. in file %s at line %d\n",__FILE__,__LINE__);
440   Exit("");
441 
442   return 1;
443 }
444 
445 //////////////////////////////////////////////////////////////
446 //////////////////////////////////////////////////////////////
447 
448 
Read_Nexus_Charstatelabels(char * token,nexparm * curr_parm,option * io)449 int Read_Nexus_Charstatelabels(char *token, nexparm *curr_parm, option *io)
450 {
451 
452   if(token[0] == '=') return 0;
453 
454   PhyML_Printf("\n== 'CharStateLabels' command is not supported by PhyML. Sorry.");
455   PhyML_Printf("\n== Err. in file %s at line %d\n",__FILE__,__LINE__);
456   Exit("");
457 
458   return 1;
459 }
460 
461 //////////////////////////////////////////////////////////////
462 //////////////////////////////////////////////////////////////
463 
464 
Read_Nexus_Charlabels(char * token,nexparm * curr_parm,option * io)465 int Read_Nexus_Charlabels(char *token, nexparm *curr_parm, option *io)
466 {
467   if(token[0] == '=') return 0;
468 
469   PhyML_Printf("\n== 'CharLabels' command is not supported by PhyML. Sorry.");
470   PhyML_Printf("\n== Err. in file %s at line %d\n",__FILE__,__LINE__);
471   Exit("");
472 
473   return 1;
474 }
475 
476 //////////////////////////////////////////////////////////////
477 //////////////////////////////////////////////////////////////
478 
479 
Read_Nexus_Statelabels(char * token,nexparm * curr_parm,option * io)480 int Read_Nexus_Statelabels(char *token, nexparm *curr_parm, option *io)
481 {
482   if(token[0] == '=') return 0;
483 
484   PhyML_Printf("\n== 'StateLabels' command is not supported by PhyML. Sorry.");
485   PhyML_Printf("\n== Err. in file %s at line %d\n",__FILE__,__LINE__);
486   Exit("");
487 
488   return 1;
489 }
490 
491 //////////////////////////////////////////////////////////////
492 //////////////////////////////////////////////////////////////
493 
494