1 /*
2
3 PhyML: a program that computes maximum likelihood phylogenies from
4 DNA or AA homologous sequences.
5
6 Copyright (C) Stephane Guindon. Oct 2003 onward.
7
8 All parts of the source except where indicated are distributed under
9 the GNU public licence. See http://www.opensource.org for details.
10
11 */
12
13 #include "nexus.h"
14
Find_Nexus_Com(char * token,nexcom ** found_com,nexparm ** default_parm,nexcom ** com_list)15 void Find_Nexus_Com(char *token, nexcom **found_com, nexparm **default_parm, nexcom **com_list)
16 {
17 int i,j,tokenlen,ndiff;
18
19 for(i=0;i<N_MAX_NEX_COM;i++)
20 {
21 tokenlen = strlen(token);
22 ndiff = -1;
23 if(tokenlen && (tokenlen == strlen(com_list[i]->name)))
24 {
25 ndiff = 0;
26 for(j=0;j<tokenlen;j++)
27 {
28 Lowercase(token+j);
29 Lowercase(com_list[i]->name+j);
30 if(token[j] != com_list[i]->name[j]) ndiff++;
31 }
32 }
33 if(!ndiff) { *found_com = com_list[i]; break; }
34 }
35
36 if(*found_com && (*found_com)->nparm) *default_parm = (*found_com)->parm[0];
37
38 /* if(*found_com) PhyML_Printf("\n. Found command '%s'.\n",(*found_com)->name); */
39 }
40
41 //////////////////////////////////////////////////////////////
42 //////////////////////////////////////////////////////////////
43
44
Find_Nexus_Parm(char * token,nexparm ** found_parm,nexcom * curr_com)45 void Find_Nexus_Parm(char *token, nexparm **found_parm, nexcom *curr_com)
46 {
47 int i,j;
48 int tokenlen;
49 int ndiff;
50
51 if(!curr_com)
52 {
53 PhyML_Printf("\n. Err in file %s at line %d\n",__FILE__,__LINE__);
54 Exit("");
55 }
56
57 for(i=0;i<curr_com->nparm;i++)
58 {
59 tokenlen = strlen(token);
60 ndiff = -1;
61 if(tokenlen == strlen(curr_com->parm[i]->name))
62 {
63 ndiff = 0;
64 for(j=0;j<tokenlen;j++)
65 {
66 Lowercase(token+j);
67 Lowercase(curr_com->parm[i]->name+j);
68 if(token[j] != curr_com->parm[i]->name[j]) ndiff++;
69 }
70 }
71 if(!ndiff) { *found_parm = curr_com->parm[i]; break; }
72 }
73
74 /* if(*found_parm) PhyML_Printf("\n. Found parameter '%s'.\n",(*found_parm)->name); */
75 }
76
77 //////////////////////////////////////////////////////////////
78 //////////////////////////////////////////////////////////////
79
80
Read_Nexus_Taxa(char * token,nexparm * curr_parm,option * io)81 int Read_Nexus_Taxa(char *token, nexparm *curr_parm, option *io)
82 {
83
84 PhyML_Printf("\n. Skipping 'taxa' block");
85
86 do
87 {
88 Get_Token(io->fp_in_align,token);
89 if(token[0] == ';') break;
90 }while(strlen(token) > 0);
91
92 fseek(io->fp_in_align,-1*sizeof(char),SEEK_CUR);
93
94 return 1;
95 }
96
97 //////////////////////////////////////////////////////////////
98 //////////////////////////////////////////////////////////////
99
100
Read_Nexus_Translate(char * token,nexparm * curr_parm,option * io)101 int Read_Nexus_Translate(char *token, nexparm *curr_parm, option *io)
102 {
103 int tax_num;
104 char *end;
105
106 PhyML_Printf("\n. Reading 'translate' block");
107 io->size_tax_names = 0;
108
109 do
110 {
111 Get_Token(io->fp_in_tree,token);
112 if(token[0] == ';') break;
113 tax_num = (int)strtol(token,&end,10);
114 if(*end =='\0' && token[0])
115 {
116 io->size_tax_names++;
117
118 io->short_tax_names = (char **)realloc(io->short_tax_names,io->size_tax_names*sizeof(char *));
119 io->short_tax_names[io->size_tax_names-1] = (char *)mCalloc(strlen(token)+1,sizeof(char));
120 sprintf(io->short_tax_names[io->size_tax_names-1],"%d",tax_num);
121
122 Get_Token(io->fp_in_tree,token);
123
124 io->long_tax_names = (char **)realloc(io->long_tax_names,io->size_tax_names*sizeof(char *));
125 io->long_tax_names[io->size_tax_names-1] = (char *)mCalloc(strlen(token)+1,sizeof(char));
126 strcpy(io->long_tax_names[io->size_tax_names-1],token);
127
128 /* printf("\n. Copying %s number %d",io->long_tax_names[io->size_long_tax_names-1],tax_num-1); */
129 }
130 }while(strlen(token) > 0);
131
132 fseek(io->fp_in_tree,-1*sizeof(char),SEEK_CUR);
133
134 return 1;
135 }
136
137 //////////////////////////////////////////////////////////////
138 //////////////////////////////////////////////////////////////
139
140
Read_Nexus_Matrix(char * token,nexparm * curr_parm,option * io)141 int Read_Nexus_Matrix(char *token, nexparm *curr_parm, option *io)
142 {
143
144 if(io->interleaved) io->data = Read_Seq_Interleaved(io);
145 else io->data = Read_Seq_Sequential(io);
146
147 fseek(io->fp_in_align,-1*sizeof(char),SEEK_CUR);
148
149 return 1;
150 }
151
152 //////////////////////////////////////////////////////////////
153 //////////////////////////////////////////////////////////////
154
155
Read_Nexus_Tree(char * token,nexparm * curr_parm,option * io)156 int Read_Nexus_Tree(char *token, nexparm *curr_parm, option *io)
157 {
158 io->treelist->tree = (t_tree **)realloc(io->treelist->tree,(io->treelist->list_size+1)*sizeof(t_tree *));
159 io->tree = Read_Tree_File_Phylip(io->fp_in_tree);
160 if(!(io->treelist->list_size%10) && io->treelist->list_size > 1)
161 {
162 PhyML_Printf("\n. Reading tree %d",io->treelist->list_size);
163 if(io->tree->n_root) PhyML_Printf(" (that is a rooted tree)");
164 else PhyML_Printf(" (that is an unrooted tree)");
165 }
166 io->treelist->tree[io->treelist->list_size] = io->tree;
167 io->treelist->list_size++;
168 fseek(io->fp_in_tree,-1*sizeof(char),SEEK_CUR);
169 return 1;
170 }
171
172 //////////////////////////////////////////////////////////////
173 //////////////////////////////////////////////////////////////
174
175
Read_Nexus_Begin(char * token,nexparm * curr_parm,option * io)176 int Read_Nexus_Begin(char *token, nexparm *curr_parm, option *io)
177 {
178 if(token[0] == '=') return 0;
179
180 if(!curr_parm)
181 {
182 PhyML_Printf("\n. Err in file %s at line %d\n",__FILE__,__LINE__);
183 Exit("");
184 }
185
186 if(!strcmp(curr_parm->name,"data") || !strcmp(curr_parm->name,"trees"))
187 PhyML_Printf("\n. Reading '%s' block.\n",curr_parm->value);
188 else
189 {
190 PhyML_Printf("\n. The '%s' block type is not supported by PhyML. Sorry.\n",curr_parm->name);
191 PhyML_Printf("\n. Err in file %s at line %d\n",__FILE__,__LINE__);
192 Exit("");
193 }
194
195 return 1;
196 }
197
198 //////////////////////////////////////////////////////////////
199 //////////////////////////////////////////////////////////////
200
201
Read_Nexus_Dimensions(char * token,nexparm * curr_parm,option * io)202 int Read_Nexus_Dimensions(char *token, nexparm *curr_parm, option *io)
203 {
204 if(token[0] == '=') return 0;
205
206 if(!curr_parm)
207 {
208 PhyML_Printf("\n. Err in file %s at line %d\n",__FILE__,__LINE__);
209 Exit("");
210 }
211
212 strcpy(curr_parm->value,token);
213
214 if(!strcmp(curr_parm->name,"ntax"))
215 {
216 sscanf(curr_parm->value,"%d",&(io->n_otu));
217 }
218
219 if(!strcmp(curr_parm->name,"nchar"))
220 {
221 sscanf(curr_parm->value,"%d",&(io->init_len));
222 }
223 return 1;
224 }
225
226 //////////////////////////////////////////////////////////////
227 //////////////////////////////////////////////////////////////
228
229
Read_Nexus_Format(char * token,nexparm * curr_parm,option * io)230 int Read_Nexus_Format(char *token, nexparm *curr_parm, option *io)
231 {
232 int i;
233
234 if(token[0] == '=') return 0;
235
236 if(!curr_parm)
237 {
238 PhyML_Printf("\n. Err in file %s at line %d\n",__FILE__,__LINE__);
239 Exit("");
240 }
241
242 For(i,strlen(token)) Lowercase(token+i);
243
244 strcpy(curr_parm->value,token);
245
246
247 /* printf("\n. >> %s",curr_parm->value); */
248
249 if(!strcmp(curr_parm->name,"datatype"))
250 {
251 if(!strcmp(curr_parm->value,"standard"))
252 {
253 io->datatype = GENERIC;
254 io->mod->whichmodel = JC69;
255 io->mod->s_opt->opt_kappa = NO;
256 io->mod->s_opt->opt_lambda = NO;
257 io->mod->ns = 2;
258 io->alphabet[0][0] = '0'; io->alphabet[0][1] = '\0';
259 io->alphabet[1][0] = '1'; io->alphabet[1][1] = '\0';
260 }
261
262 else if(!strcmp(curr_parm->value,"dna"))
263 {
264 io->datatype = NT;
265 io->mod->ns = 4;
266 }
267
268 else if(!strcmp(curr_parm->value,"rna"))
269 {
270 io->datatype = NT;
271 io->mod->ns = 4;
272 }
273
274 else if(!strcmp(curr_parm->value,"nucleotide"))
275 {
276 io->datatype = NT;
277 io->mod->ns = 4;
278 }
279
280 else if(!strcmp(curr_parm->value,"protein"))
281 {
282 io->datatype = AA;
283 io->mod->ns = 20;
284 }
285
286 else if(!strcmp(curr_parm->value,"continuous"))
287 {
288 PhyML_Printf("\n== The 'continuous' format is not supported by PhyML. Sorry.\n");
289 PhyML_Printf("\n== Err. in file %s at line %d\n",__FILE__,__LINE__);
290 Exit("");
291 }
292 }
293
294 else if(!strcmp(curr_parm->name,"missing"))
295 {
296 PhyML_Printf("\n== The 'missing' subcommand is not supported by PhyML. Please remove it from the NEXUS file.");
297 PhyML_Printf("\n== Note that the characters 'X', '?' and '-' will be considered as indels by default.");
298 PhyML_Printf("\n== Err. in file %s at line %d\n",__FILE__,__LINE__);
299 Exit("");
300 }
301
302 else if(!strcmp(curr_parm->name,"gap"))
303 {
304 PhyML_Printf("\n== The 'gap' subcommand is not supported by PhyML. Please remove it from the NEXUS file.");
305 PhyML_Printf("\n== Note that the characters 'X', '?' and '-' will be considered as indels by default.");
306 PhyML_Printf("\n== Err. in file %s at line %d\n",__FILE__,__LINE__);
307 Exit("");
308 }
309
310 else if(!strcmp(curr_parm->name,"symbols"))
311 {
312 if(*token != '"' || *(token+strlen(token)-1) != '"')
313 {
314 PhyML_Printf("\n== Symbols list is supposed to be displayed between quotation marks (e.g., \"ACTG\").\n");
315 PhyML_Printf("\n== Err. in file %s at line %d\n",__FILE__,__LINE__);
316 Exit("");
317 }
318
319
320 int i,has_spaces,state_len;
321
322 i = 0;
323 has_spaces = 0;
324 token++; /* Get rid of the first '"' character */
325 while(token[i] != '"') { if(token[i] == ' ') { has_spaces = 1; break; } i++; }
326
327 io->mod->ns = 0;
328 if(!has_spaces)
329 {
330 while(token[i] != '"')
331 {
332 io->alphabet[io->mod->ns][0] = token[i];
333 io->alphabet[io->mod->ns][1] = '\0';
334 io->mod->ns++;
335 i++;
336 if(io->mod->ns > T_MAX_ALPHABET)
337 {
338 PhyML_Printf("\n== The alphabet cannot contain more than %d characters. Sorry.",T_MAX_ALPHABET);
339 PhyML_Printf("\n== Err. in file %s at line %d\n",__FILE__,__LINE__);
340 Exit("");
341 }
342 }
343 }
344 else
345 {
346 i = 0;
347 do
348 {
349 state_len = 0;
350 while(token[i] != ' ' && token[i] != '"')
351 {
352 io->alphabet[io->mod->ns][state_len] = token[i];
353 state_len++;
354 i++;
355 if(state_len > T_MAX_STATE)
356 {
357 PhyML_Printf("\n== A state cannot contain more than %d characters. Sorry.\n",T_MAX_STATE);
358 PhyML_Printf("\n== Err. in file %s at line %d\n",__FILE__,__LINE__);
359 Exit("");
360 }
361 }
362
363 io->alphabet[io->mod->ns][state_len] = '\0';
364 io->mod->ns++;
365 if(token[i] != '"') i++;
366 }
367 while(token[i] != '"');
368 }
369
370 int len;
371 len = strlen(io->alphabet[0]);
372 for(i=0;i<io->mod->ns;i++)
373 {
374 if(strlen(io->alphabet[i]) != len)
375 {
376 PhyML_Printf("\n== All character states defined in the symbol list are supposed to have the same length.\n");
377 PhyML_Printf("\n== Er.r in file %s at line %d\n",__FILE__,__LINE__);
378 Exit("");
379 }
380 }
381 io->state_len = len;
382
383 /* for(i=0;i<io->mod->ns;i++) PhyML_Printf("\n. '%s'",io->alphabet[i]); */
384 }
385
386 else if(!strcmp(curr_parm->name,"equate"))
387 {
388 PhyML_Printf("\n== PhyML does not recognize the command '%s' yet. Sorry.",curr_parm->name);
389 PhyML_Printf("\n== Err. in file %s at line %d\n",__FILE__,__LINE__);
390 Exit("");
391 }
392
393 else if(!strcmp(curr_parm->name,"matchchar"))
394 {
395 PhyML_Printf("\n== PhyML does not recognize the command '%s' yet. Sorry.",curr_parm->name);
396 PhyML_Printf("\n== Err. in file %s at line %d\n",__FILE__,__LINE__);
397 Exit("");
398 }
399
400 else if(!strcmp(curr_parm->name,"items"))
401 {
402 PhyML_Printf("\n== PhyML does not recognize the command '%s' yet. Sorry.",curr_parm->name);
403 PhyML_Printf("\n== Err. in file %s at line %d\n",__FILE__,__LINE__);
404 Exit("");
405 }
406
407 else if(!strcmp(curr_parm->name,"interleave"))
408 {
409 io->interleaved = YES;
410 }
411
412 return 1;
413 }
414
415 //////////////////////////////////////////////////////////////
416 //////////////////////////////////////////////////////////////
417
418
Read_Nexus_Eliminate(char * token,nexparm * curr_parm,option * io)419 int Read_Nexus_Eliminate(char *token, nexparm *curr_parm, option *io)
420 {
421 if(token[0] == '=') return 0;
422
423 PhyML_Printf("\n== 'Eliminate' command is not supported by PhyML. Sorry.");
424 PhyML_Printf("\n== Err. in file %s at line %d\n",__FILE__,__LINE__);
425 Exit("");
426
427 return 1;
428 }
429
430 //////////////////////////////////////////////////////////////
431 //////////////////////////////////////////////////////////////
432
433
Read_Nexus_Taxlabel(char * token,nexparm * curr_parm,option * io)434 int Read_Nexus_Taxlabel(char *token, nexparm *curr_parm, option *io)
435 {
436 if(token[0] == '=') return 0;
437
438 PhyML_Printf("\n== 'Taxlabels' command is not supported by PhyML. Sorry.");
439 PhyML_Printf("\n== Err. in file %s at line %d\n",__FILE__,__LINE__);
440 Exit("");
441
442 return 1;
443 }
444
445 //////////////////////////////////////////////////////////////
446 //////////////////////////////////////////////////////////////
447
448
Read_Nexus_Charstatelabels(char * token,nexparm * curr_parm,option * io)449 int Read_Nexus_Charstatelabels(char *token, nexparm *curr_parm, option *io)
450 {
451
452 if(token[0] == '=') return 0;
453
454 PhyML_Printf("\n== 'CharStateLabels' command is not supported by PhyML. Sorry.");
455 PhyML_Printf("\n== Err. in file %s at line %d\n",__FILE__,__LINE__);
456 Exit("");
457
458 return 1;
459 }
460
461 //////////////////////////////////////////////////////////////
462 //////////////////////////////////////////////////////////////
463
464
Read_Nexus_Charlabels(char * token,nexparm * curr_parm,option * io)465 int Read_Nexus_Charlabels(char *token, nexparm *curr_parm, option *io)
466 {
467 if(token[0] == '=') return 0;
468
469 PhyML_Printf("\n== 'CharLabels' command is not supported by PhyML. Sorry.");
470 PhyML_Printf("\n== Err. in file %s at line %d\n",__FILE__,__LINE__);
471 Exit("");
472
473 return 1;
474 }
475
476 //////////////////////////////////////////////////////////////
477 //////////////////////////////////////////////////////////////
478
479
Read_Nexus_Statelabels(char * token,nexparm * curr_parm,option * io)480 int Read_Nexus_Statelabels(char *token, nexparm *curr_parm, option *io)
481 {
482 if(token[0] == '=') return 0;
483
484 PhyML_Printf("\n== 'StateLabels' command is not supported by PhyML. Sorry.");
485 PhyML_Printf("\n== Err. in file %s at line %d\n",__FILE__,__LINE__);
486 Exit("");
487
488 return 1;
489 }
490
491 //////////////////////////////////////////////////////////////
492 //////////////////////////////////////////////////////////////
493
494