1 /*
2
3 HyPhy - Hypothesis Testing Using Phylogenies.
4
5 Copyright (C) 1997-now
6 Core Developers:
7 Sergei L Kosakovsky Pond (sergeilkp@icloud.com)
8 Art FY Poon (apoon42@uwo.ca)
9 Steven Weaver (sweaver@temple.edu)
10
11 Module Developers:
12 Lance Hepler (nlhepler@gmail.com)
13 Martin Smith (martin.audacis@gmail.com)
14
15 Significant contributions from:
16 Spencer V Muse (muse@stat.ncsu.edu)
17 Simon DW Frost (sdf22@cam.ac.uk)
18
19 Permission is hereby granted, free of charge, to any person obtaining a
20 copy of this software and associated documentation files (the
21 "Software"), to deal in the Software without restriction, including
22 without limitation the rights to use, copy, modify, merge, publish,
23 distribute, sublicense, and/or sell copies of the Software, and to
24 permit persons to whom the Software is furnished to do so, subject to
25 the following conditions:
26
27 The above copyright notice and this permission notice shall be included
28 in all copies or substantial portions of the Software.
29
30 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
31 OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
32 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
33 IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
34 CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
35 TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
36 SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
37
38 */
39
40 #include <math.h>
41 #include <string.h>
42 #include <ctype.h>
43
44 #include "hy_string_buffer.h"
45 #include "dataset.h"
46 #include "list.h"
47 #include "batchlan.h"
48 #include "hbl_env.h"
49 #include "global_object_lists.h"
50 #include "global_things.h"
51
52
53 using namespace hy_global;
54 using namespace hyphy_global_objects;
55
56
57 //_________________________________________________________
58
59 _DataSet* lastNexusDataMatrix = nil;
60 _StringBuffer nexusBFBody;
61
62
63
64 void checkTTStatus (FileState* fs);
65 void processCommand (_StringBuffer*s, FileState*fs);
66 void FilterRawString (_StringBuffer& s, FileState* fs, _DataSet & ds);
67 long ProcessLine (_StringBuffer&s , FileState *fs, _DataSet& ds);
68 void PadLine (FileState& fState, _DataSet& result);
69 void ISelector (FileState& fState, _StringBuffer& CurrentLine, _DataSet& result);
70 bool SkipLine (_StringBuffer& theLine, FileState* fS);
71 void TrimPhylipLine (_StringBuffer& CurrentLine, _DataSet& ds);
72 bool ProcessNexusData (FileState&, long, hyFile*, _StringBuffer&, _DataSet&);
73 void ProcessNexusHYPHY (FileState&, long, hyFile*, _StringBuffer&, _DataSet&);
74 void ProcessNexusAssumptions (FileState&, long, hyFile*, _StringBuffer&, _DataSet&);
75 void ProcessNexusTaxa (FileState&,long, hyFile*, _StringBuffer&, _DataSet&);
76 void ProcessNexusTrees (FileState&, long, hyFile*, _StringBuffer&, _DataSet&);
77 bool FindNextNexusToken (FileState& fState, hyFile* f, _StringBuffer& CurrentLine, long pos);
78 bool SkipUntilNexusBlockEnd (FileState& fState, hyFile* f, _StringBuffer& CurrentLine, long pos);
79 bool ReadNextNexusStatement (FileState&, hyFile* , _StringBuffer&, long, _StringBuffer&, bool, bool = true, bool = true, bool = false, bool = false, bool = false);
80 long ReadNextNexusEquate (FileState&, hyFile* , _StringBuffer&, long, _String&, bool = false, bool = true);
81 void NexusParseEqualStatement (_StringBuffer&);
82
__anon737ca7b90102(_String const& buffer, long position) 83 static auto error_conext = [] (_String const& buffer, long position) -> const _String {return (buffer.Cut (0,position) & " <=? " & buffer.Cut (position+1,kStringEnd)).Enquote();};
84
85
86 //_________________________________________________________
87
FindNextNexusToken(FileState & fState,hyFile * f,_StringBuffer & CurrentLine,long pos)88 bool FindNextNexusToken (FileState& fState, hyFile* f, _StringBuffer& CurrentLine, long pos) {
89
90 pos = CurrentLine.FirstNonSpaceIndex (pos,-1,kStringDirectionForward);
91 if (pos==kNotFound) {
92 ReadNextLine(f,&CurrentLine,&fState,false);
93 pos = CurrentLine.FirstNonSpaceIndex (0,-1,kStringDirectionForward);
94 if (pos==kNotFound) {
95 return false;
96 }
97 }
98 CurrentLine.Trim (pos, kStringEnd);
99 return true;
100 }
101
102
103 //_________________________________________________________
104
SkipUntilNexusBlockEnd(FileState & fState,hyFile * file,_StringBuffer & CurrentLine,long pos)105 bool SkipUntilNexusBlockEnd (FileState& fState, hyFile* file, _StringBuffer& CurrentLine, long pos) {
106 static const _String endMark ("END");
107 pos = CurrentLine.Find (endMark,pos+1,kStringEnd);
108 while (pos == kNotFound) {
109 ReadNextLine(file,&CurrentLine,&fState,false);
110 if (CurrentLine.empty()) {
111 return false;
112 }
113 pos = CurrentLine.Find (endMark,0,kStringEnd);
114 if (pos != kNotFound) {
115 pos = CurrentLine.Find (';',pos+endMark.length(),kStringEnd);
116 if (pos != kNotFound) {
117 CurrentLine.Trim (pos+endMark.length(), kStringEnd);
118 if (CurrentLine.empty()) {
119 ReadNextLine(file,&CurrentLine,&fState,false);
120 }
121 } else {
122 ReportWarning ("Found END w/o a trailing semicolon. Assuming end of block and skipping the rest of the line.");
123 ReadNextLine(file,&CurrentLine,&fState,false);
124 }
125 return true;
126 }
127 }
128 return false;
129 }
130 //_________________________________________________________
NexusParseEqualStatement(_StringBuffer & source)131 void NexusParseEqualStatement (_StringBuffer& source)
132 {
133 long f = source.Find('=');
134 if (f != kNotFound) {
135 f = source.FirstNonSpaceIndex (f+1,kStringEnd);
136 if (f != kNotFound) {
137 source.Trim (f,kStringEnd);
138 return;
139 }
140 }
141 source.Clear();
142
143 }
144 //_________________________________________________________
145
ReadNextNexusStatement(FileState & fState,hyFile * f,_StringBuffer & CurrentLine,long pos,_StringBuffer & blank,bool stopOnSpace,bool stopOnComma,bool stopOnQuote,bool NLonly,bool preserveSpaces,bool preserveQuotes)146 bool ReadNextNexusStatement (FileState& fState, hyFile* f, _StringBuffer& CurrentLine, long pos, _StringBuffer & blank, bool stopOnSpace, bool stopOnComma, bool stopOnQuote, bool NLonly, bool preserveSpaces, bool preserveQuotes) {
147 bool done = false,
148 insideLiteral = false,
149 startedReading = false;
150
151 long newPos = pos>0?pos+1L:pos;
152 char c = '\0';
153
154 while (1) {
155 while (newPos<CurrentLine.length()) {
156 c = CurrentLine.char_at (newPos);
157 if (isspace(c)) {
158 if (stopOnSpace && startedReading && (!insideLiteral) && (!NLonly || (NLonly && (c=='\r' || c=='\n')))) {
159 done = true;
160 break;
161 } else {
162 if (insideLiteral||preserveSpaces) {
163 blank<<' ';
164 }
165 }
166 } else {
167 if (c==';' && ! insideLiteral) { // terminate always
168 done = true;
169 newPos++;
170 break;
171 } else if (stopOnComma && c==',' && ! insideLiteral) {
172 done = true;
173 newPos++;
174 break;
175 } else if (! preserveQuotes && (c=='\'' || c=='"') ) {
176 if (c=='\'') {
177 if (newPos+1<CurrentLine.length())
178 // check for a double quote
179 {
180 c = CurrentLine.char_at (newPos+1);
181 if (c=='\'') {
182 newPos += 2;
183 blank<<c;
184 continue;
185 }
186 //else
187 // if (!startedReading || insideLiteral)
188 // newPos--;
189 }
190 }
191 if (startedReading &&stopOnQuote) {
192 done = true;
193 newPos++;
194 break;
195 } else {
196 insideLiteral = !insideLiteral;
197 }
198 } else {
199 startedReading = true;
200 blank<<c;
201 }
202 }
203 newPos++;
204 }
205 if (!done) {
206 if (NLonly&&(blank.FirstNonSpaceIndex(0,kStringEnd,kStringDirectionForward)>=0)) {
207 break;
208 }
209 ReadNextLine(f,&CurrentLine,&fState,false);
210 newPos = 0;
211 if (CurrentLine.empty()) {
212 c=';';
213 break;
214 }
215 } else {
216 break;
217 }
218
219 }
220 // TODO 20170821: SLKP, this needs to be case sensitive
221 blank.ChangeCaseInPlace(kStringUpperCase);
222 if (newPos<CurrentLine.length()) {
223 CurrentLine.Trim (newPos,kStringEnd);
224 } else {
225 CurrentLine.Clear();
226 }
227 return c==';';
228 }
229
230 //_________________________________________________________
231
ReadNextNexusEquate(FileState & fState,hyFile * f,_StringBuffer & CurrentLine,long pos2,_String & blank,bool resetP,bool demandSemicolon)232 long ReadNextNexusEquate (FileState& fState, hyFile* f, _StringBuffer& CurrentLine, long pos2, _String& blank, bool resetP, bool demandSemicolon) {
233 long pos = blank.Find ('=',pos2,-1), res;
234 if (pos>=0) {
235 if (pos<blank.length()-1) {
236 blank.Trim (pos+1,-1);
237 return 1;
238 } else {
239 _StringBuffer buffer (128UL);
240 res = ReadNextNexusStatement (fState, f, CurrentLine, resetP?0:pos, buffer, true, true, false,false,false);
241 if (!buffer.empty()) {
242 blank = buffer;
243 return res?2:1;
244 }
245 }
246 return 0;
247 } else {
248 _StringBuffer buffer (128UL);
249 res = ReadNextNexusStatement (fState, f, CurrentLine, pos2, buffer, true, true, false,false,false)?2:1;
250 if (res!=2 && demandSemicolon) {
251 if((res=ReadNextNexusEquate (fState, f, CurrentLine, 0, buffer))) {
252 blank = buffer;
253 return res;
254 }
255 } else if((res = ReadNextNexusEquate (fState, f, CurrentLine, 0, buffer, resetP, false))) {
256 blank = buffer;
257 return res;
258 } else {
259 return 0;
260 }
261 }
262 return 0;
263 }
264
265 //_________________________________________________________
ProcessNexusTaxa(FileState & fState,long pos,hyFile * f,_StringBuffer & CurrentLine,_DataSet & result)266 void ProcessNexusTaxa (FileState& fState, long pos, hyFile*f, _StringBuffer& CurrentLine, _DataSet& result) {
267 static const _String key1 = "DIMENSIONS", key2 = "NTAX", key3 = "TAXLABELS", keyEnd = "END";
268
269 bool done = false;
270
271 long speciesExpected = -1, offset;
272
273 while (!done) {
274 if (!FindNextNexusToken (fState, f, CurrentLine, pos)) {
275 break;
276 }
277 // now that we've got stuff to work with see what it is
278
279 if (CurrentLine.BeginsWith (keyEnd, false)) {
280 pos = -1;
281 break;
282 }
283
284 if (CurrentLine.BeginsWith (key1, false)) {
285 if (result.GetNames().lLength) { // check the number of dimensions
286 // some data already present
287 ReportWarning ("Only one taxa definition per NEXUS file is recognized, the others will be ignored.");
288 SkipUntilNexusBlockEnd (fState, f,CurrentLine, pos);
289 break;
290 } else {
291 _StringBuffer buffer (128UL);
292 ReadNextNexusStatement (fState, f, CurrentLine, key1.length(), buffer, false,true, true,false,false);
293 // this will actually return '= number'
294 NexusParseEqualStatement (buffer);
295 speciesExpected = buffer.to_long();
296 }
297 } else if (CurrentLine.BeginsWith (key3, false)) {
298 if (speciesExpected == -1) {
299 ReportWarning ("TAXLABELS must be preceded by a valid NTAX statement. Skipping the entire TAXA block.");
300 SkipUntilNexusBlockEnd (fState, f,CurrentLine, pos);
301 break;
302 } else {
303 offset = key3.length();
304 do {
305 _StringBuffer buffer (128UL);
306 if (ReadNextNexusStatement (fState, f, CurrentLine,offset, buffer, true,true,true,false,false)) {
307 if (buffer.nonempty()) {
308 result.AddName(buffer);
309 }
310 break;
311 } else {
312 if (buffer.nonempty()) {
313 result.AddName(buffer);;
314 }
315 }
316 offset = 0;
317
318 } while (1);
319 if (result.GetNames().lLength!=speciesExpected) {
320 ReportWarning ( _String ("TAXALABELS provided ") &
321 _String ((long)result.GetNames().lLength) &" species, whereas the NTAX statement promised:" &
322 _String (speciesExpected) & ". HYPHY will use TAXALABELS count.");
323 }
324 done = true;
325 }
326 } else {
327 long offSet = 0;
328
329 ReportWarning (CurrentLine.Cut (0, CurrentLine.FirstSpaceIndex(1,kStringEnd)) & " is not used by HYPHY");
330 while (!done) {
331 _StringBuffer buffer (128UL);
332 done = ReadNextNexusStatement (fState, f, CurrentLine, offSet, buffer, false, false,true,false,false);
333 }
334 done = false;
335 }
336
337 if (!done) {
338 pos = 0;
339 }
340 }
341
342 SkipUntilNexusBlockEnd (fState, f,CurrentLine, pos);
343 }
344
345 //_________________________________________________________
346
ProcessNexusAssumptions(FileState & fState,long pos,hyFile * f,_StringBuffer & CurrentLine,_DataSet &)347 void ProcessNexusAssumptions (FileState& fState, long pos, hyFile*f, _StringBuffer& CurrentLine, _DataSet&) {
348 static const _String key1 = "CHARSET", keyEnd = "END";
349
350 bool done = false;
351
352 _List charSetIDs,
353 charSetSpec;
354
355 while (!done) {
356 if (!FindNextNexusToken (fState, f, CurrentLine, pos)) {
357 break;
358 }
359 // now that we've got stuff to work with see what it is
360
361 if (CurrentLine.BeginsWith (keyEnd, false)) {
362 pos = -1;
363 break;
364 }
365
366 if (CurrentLine.BeginsWith (key1, false)) { // actual tree strings & idents
367 _StringBuffer buffer (128UL);
368 if (!ReadNextNexusStatement (fState, f, CurrentLine, key1.length(), buffer, false, false, false,false,true)) {
369 ReportWarning ("CHARSET construct not followed by ';'.");
370 break;
371 } else {
372 pos = buffer.Find ('=',1,kStringEnd);
373 if (pos==-1) {
374 ReportWarning (buffer.Enquote() & " is not of the form Charset ID = specification of the partition.");
375 } else {
376 long pos2 = buffer.FirstNonSpaceIndex (0,pos-1,kStringDirectionBackward);
377 if (pos2 != kNotFound) {
378 long j = buffer.FirstNonSpaceIndex (0,pos2-1,kStringDirectionForward);
379 if (j != kNotFound) {
380 if (buffer.char_at (j) == '*') {
381 j = buffer.FirstNonSpaceIndex (j+1,pos2-1,kStringDirectionForward);
382 }
383
384 if (j != kNotFound) {
385 _String nexus_name (buffer,j,pos2),
386 charset_id (nexus_name);
387
388 if (!nexus_name.IsValidIdentifier(fIDAllowCompound)) {
389 charset_id = nexus_name.ConvertToAnIdent();
390 }
391
392 charset_id = charSetIDs.GenerateUniqueNameForList(GenerateUniqueObjectIDByType (nexus_name, HY_BL_DATASET_FILTER), false);
393
394 if (charset_id != nexus_name) {
395 ReportWarning(nexus_name.Enquote('\'') & " has been renamed to " & charset_id.Enquote('\'') & " to avoid naming conflicts and/or comply with HyPhy ID requirements");
396 }
397
398
399 // now get the rest of the tree string
400 pos2 = buffer.FirstNonSpaceIndex(pos+1,kStringEnd);
401 pos = buffer.FirstNonSpaceIndex(pos2,kStringEnd,kStringDirectionBackward);
402 buffer.Trim (pos2,pos);
403 buffer = buffer.CompressSpaces () & " ";
404
405 _StringBuffer hpSpec (buffer.length()+1UL);
406
407 _String numberOne,
408 numberTwo,
409 numberThree;
410
411 bool spoolInto2nd = false,
412 spoolInto3rd = false,
413 okFlag = true,
414 firstFlag = true;
415
416 for (long k=0; k<buffer.length(); k++) {
417 char ch = buffer.char_at(k);
418
419 if ((ch>='0' && ch<='9') || ch=='.') {
420 if (spoolInto2nd) {
421 numberTwo = numberTwo & ch;
422 } else if (spoolInto3rd) {
423 numberThree = numberThree & ch;
424 } else {
425 numberOne = numberOne & ch;
426 }
427 }
428
429 if (ch==' ') {
430 if (numberTwo.length() == 1 && numberTwo.char_at (0) == '.') {
431 numberTwo = (long)fState.totalSitesRead;
432 }
433
434 if (spoolInto3rd) {
435 spoolInto3rd = false;
436 // handle 'every' n-th
437
438
439 long from = numberOne.to_long()-1,
440 upto = numberTwo.to_long()-1,
441 step = numberThree.to_long();
442
443 if ((upto>=from)&&(step>0)) {
444 if (!firstFlag) {
445 hpSpec << ',';
446 }
447 hpSpec << _String(from);
448 for (long kk = from+step; kk<=upto; kk+=step) {
449 hpSpec << ',' << (_String)(kk);
450 }
451
452 numberOne.Clear();
453 numberTwo.Clear();
454 numberThree.Clear();
455
456 } else {
457 ReportWarning (_String("Invalid from-to\\step specification: ") & error_conext (buffer, k));
458 okFlag = false;
459 break;
460 }
461
462 firstFlag = false;
463 } else {
464 if (spoolInto2nd) {
465 spoolInto2nd = false;
466 if (!firstFlag) {
467 hpSpec << ',';
468 }
469
470 numberOne = numberOne.to_long ()-1;
471 hpSpec << numberOne;
472 numberOne = ch;
473 hpSpec << '-';
474 numberTwo = numberTwo.to_long()-1;
475 hpSpec << numberTwo;
476 numberTwo.Clear();
477 firstFlag = false;
478
479 } else {
480 long n1;
481 if (numberOne.nonempty() && (n1 = numberOne.to_long() > 0)) {
482 numberOne = n1-1;
483 if (!firstFlag) {
484 hpSpec << ',';
485 }
486 hpSpec << numberOne;
487 }
488 numberOne.Clear();
489 firstFlag = false;
490 }
491 }
492 //hitASpace = true;
493
494 } else if (ch=='-') {
495 if (spoolInto2nd||spoolInto3rd) {
496 ReportWarning (_String("Misplaced '-' in CHARSET specification: ") & error_conext (buffer, k));
497 okFlag = false;
498 break;
499 }
500 spoolInto2nd = true;
501 } else if (ch=='\\') {
502 if ((!spoolInto2nd)||spoolInto3rd) {
503 ReportWarning (_String("Misplaced '\\' in CHARSET specification: ") & buffer.Enquote());
504 okFlag = false;
505 break;
506 }
507 spoolInto2nd = false;
508 spoolInto3rd = true;
509 }
510 }
511
512
513 if (okFlag) {
514 charSetIDs && & charset_id;
515 charSetSpec && & hpSpec;
516 }
517 }
518 }
519 if (j<0) {
520 ReportWarning (_String("Could not find a charset identifier in: ")& buffer.Enquote());
521 }
522 } else {
523 ReportWarning (buffer.Enquote() &" is not of the form CharSetID = char set string");
524 }
525 }
526 }
527 } else {
528 long offSet = 0L;
529
530 ReportWarning (CurrentLine.Cut (0, CurrentLine.FirstSpaceIndex(1,-1)) & " is not used by HYPHY");
531 while (!done) {
532 _StringBuffer buffer (128UL);
533 done = ReadNextNexusStatement (fState, f, CurrentLine, offSet, buffer, false, false,true,false,false);
534 }
535 done = false;
536 }
537
538 if (!done) {
539 //ReadNextLine(f,&CurrentLine,&fState,false);
540 pos = 0;
541 }
542 }
543
544 if (charSetIDs.lLength) {
545 _StringBuffer defineCharsets (256UL);
546
547 defineCharsets << hy_env::data_file_partition_matrix << "={2," << _String ((long)charSetIDs.lLength) << "};\n";
548
549 for (long id = 0; id < charSetIDs.lLength; id++) {
550 defineCharsets << hy_env::data_file_partition_matrix
551 << "[0]["
552 << _String (id)
553 << "]:=\""
554 << (_String*)charSetIDs(id)
555 << "\";\n"
556 << hy_env::data_file_partition_matrix
557 << "[1]["
558 << _String (id)
559 << "]:=\""
560 << (_String*)charSetSpec(id)
561 << "\";\n";
562 }
563 _ExecutionList defMx (defineCharsets);
564 defMx.Execute();
565 terminate_execution = false;
566 }
567
568 SkipUntilNexusBlockEnd (fState, f,CurrentLine, pos);
569 }
570
571 //_________________________________________________________
572
ProcessNexusTrees(FileState & fState,long pos,hyFile * f,_StringBuffer & CurrentLine,_DataSet & result)573 void ProcessNexusTrees (FileState& fState, long pos, hyFile*f, _StringBuffer& CurrentLine, _DataSet& result) {
574 static _String const key1 = "TRANSLATE", key2 = "TREE", errMsg, keyEnd = "END";
575
576 bool done = false, readResult, good;
577 _List translationsFrom, translationsTo;
578 _List treeIdents, treeStrings;
579 long treeSelected = 0, insertPos = 0;
580
581 while (!done) {
582
583 if (!FindNextNexusToken (fState, f, CurrentLine, pos)) {
584 break;
585 }
586 // now that we've got stuff to work with see what it is
587
588 if (CurrentLine.BeginsWith (keyEnd, false)) {
589 pos = -1;
590 break;
591 }
592
593 if (CurrentLine.BeginsWith (key1, false)) {
594 // set up translations between nodes and data labels
595 long offset = key1.length();
596 do {
597 _StringBuffer buffer (128UL);
598 readResult = ReadNextNexusStatement (fState, f, CurrentLine, offset, buffer, true, true,true,false,false);
599 if (buffer.nonempty()) {
600 if (translationsTo.lLength<translationsFrom.lLength) {
601 good = (result.GetNames().FindObject(&buffer)>=0);
602 if (good) {
603 translationsTo.InsertElement (&buffer, insertPos);
604 } else {
605 ReportWarning (buffer.Enquote() & " is not a valid taxon name for TRANSLATE" );
606 translationsFrom.Delete (insertPos);
607 }
608
609 } else {
610 if (!readResult) {
611 insertPos = translationsFrom.BinaryInsert (&buffer);
612 }
613 }
614 }
615 if (readResult) {
616 break;
617 }
618 if ((f&&f->feof())||(fState.theSource&&(fState.theSource->length()<=fState.pInSrc))) {
619 break;
620 }
621 offset = 0;
622
623 } while (1);
624 } else if (CurrentLine.BeginsWith (key2, false)) { // actual tree strings & idents
625 _StringBuffer buffer (128UL);
626 if (!ReadNextNexusStatement (fState, f, CurrentLine, key2.length(), buffer, false, false, false,false,false, true)) {
627 ReportWarning ("TREE construct not followed by ';'.");
628 break;
629 } else {
630 // here goes the tree string in the form: treeID = treeString
631 // pull the ID out first - check if it is a valid one
632 // next crudely parse the tree string, extracting species names and
633 pos = buffer.Find ('=',1,kStringEnd);
634 if (pos==kNotFound) {
635 ReportWarning (buffer.Enquote () &" is not of the form TreeID = TreeString");
636 } else {
637 long pos2 = buffer.FirstNonSpaceIndex (0,pos-1,kStringDirectionBackward);
638 if (pos2 != kNotFound) {
639 long j = buffer.FirstNonSpaceIndex (0,pos2-1,kStringDirectionForward);
640 if (j != kNotFound ) {
641 if (buffer.char_at (j) == '*') {
642 j = buffer.FirstNonSpaceIndex (j+1,pos2-1,kStringDirectionForward);
643 treeSelected = treeIdents.lLength;
644 }
645 if (j != kNotFound) {
646 _String nexus_tree_id (buffer,j,pos2),
647 tree_id (nexus_tree_id);
648
649 if (!nexus_tree_id.IsValidIdentifier(fIDAllowCompound)) {
650 tree_id = nexus_tree_id.ConvertToAnIdent();
651 }
652
653 tree_id = treeIdents.GenerateUniqueNameForList(GenerateUniqueObjectIDByType (nexus_tree_id, HY_BL_TREE) ,false);
654
655 if (tree_id != nexus_tree_id) {
656 ReportWarning(nexus_tree_id.Enquote('\'') & " has been renamed to " & tree_id.Enquote('\'') & " to avoid naming conflicts and/or comply with HyPhy ID requirements");
657 }
658
659
660 treeIdents && & tree_id;
661 // now get the rest of the tree string
662 pos2 = buffer.FirstNonSpaceIndex(pos2,pos+1, kStringDirectionBackward);
663 buffer.Trim (pos2,kStringEnd);
664 treeStrings && & buffer;
665 }
666 }
667 if (j == kNotFound) {
668 ReportWarning (_String("Could not find a tree identifier in:") & buffer.Enquote());
669 }
670 } else {
671 ReportWarning (buffer.Enquote () &" is not of the form TreeID = TreeString");
672 }
673 }
674
675 }
676 } else {
677
678 long offSet = 0L;
679
680 ReportWarning (CurrentLine.Cut (0, CurrentLine.FirstSpaceIndex(1,kStringEnd)) & " is not used by HYPHY in TREES block");
681 while (!done) {
682 _StringBuffer buffer (128UL);
683 done = ReadNextNexusStatement (fState, f, CurrentLine, offSet, buffer, false, false,true,false,false);
684 }
685 done = false;
686 }
687
688 if (!done) {
689 //ReadNextLine(f,&CurrentLine,&fState,false);
690 pos = 0;
691 }
692 }
693
694 // now we shall check the string and match up node names with those present in the file
695
696 for (long id = 0L; id<treeStrings.lLength; id++) {
697 _String const * file_tree_string = (_String const *) treeStrings (id);
698 long treeLevel = 0L,
699 lastNode,
700 i = 0L;
701
702 _StringBuffer revisedTreeString (128L);
703
704 // TODO SLKP 20170621: looks like this is a generic Newick parser; why duplicate it here?
705 for (i=0; i<file_tree_string->length(); ++i) {
706 char cc = file_tree_string->char_at (i);
707
708 switch (cc) {
709 case '(': { // creating a new internal node one level down
710 treeLevel++;
711 revisedTreeString<<'(';
712 break;
713 }
714
715 case ',':
716 case ')': { // creating a new node on the same level and finishes updating the list of parameters
717 if (cc==')') { // also create a new node on the same level
718 treeLevel--;
719 }
720 revisedTreeString<<cc;
721 break;
722 }
723
724 case ':' : { // tree branch definition
725 lastNode = i+1;
726 revisedTreeString<<':';
727 char c = file_tree_string->char_at (lastNode);
728
729 while (isdigit (c) || c=='.' || c=='-' || c=='e' || c=='E') {
730 if (lastNode<file_tree_string->length()) {
731 lastNode++;
732 revisedTreeString<<c;
733 c = file_tree_string->char_at (lastNode);
734 } else {
735 break;
736 }
737 }
738 i = lastNode-1;
739 break;
740 }
741
742 default: { // node name
743 lastNode = i;
744 char c = file_tree_string->char_at (lastNode);
745 if (isspace (c)) {
746 break;
747 }
748 if (!(isalnum(c)||(c=='_'))) {
749 ReportWarning (_String("Node names should begin with a letter, a number, or an underscore: ") & error_conext (*file_tree_string, i) );
750 i = file_tree_string->length() +2;
751 break;
752 }
753 while (isalnum(c)||(c=='_')) {
754 if (lastNode<file_tree_string->length()) {
755 lastNode++;
756 c = file_tree_string->char_at (lastNode);
757 } else {
758 break;
759 }
760 }
761 _String node_name (*file_tree_string, i, lastNode-1);
762 i = lastNode-1;
763 lastNode = translationsFrom.BinaryFindObject (&node_name);
764 if (lastNode != kNotFound) {
765 revisedTreeString<< (_String*)translationsTo.list_data[lastNode];
766 } else {
767 revisedTreeString<< node_name;
768 }
769 break;
770 }
771 }
772 }
773 if (treeLevel) {
774 ReportWarning (_String("Unbalanced '(,)' in the tree string:") & revisedTreeString.Enquote());
775 } else if (i==file_tree_string->length()) {
776 *((_String*)treeStrings.list_data[id]) = revisedTreeString;
777 }
778 }
779
780 if (treeSelected < treeStrings.lLength) {
781 hy_env :: EnvVariableSetNamespace(hy_env::data_file_tree, new HY_CONSTANT_TRUE,fState.theNamespace, false);
782 hy_env :: EnvVariableSetNamespace(hy_env::data_file_tree_string, new _FString(*(_String*)treeStrings.list_data[treeSelected], false),fState.theNamespace, false);
783 }
784
785 if (treeStrings.lLength) {
786 _StringBuffer initTreeMatrix (1024UL);
787
788 initTreeMatrix << hy_env::nexus_file_tree_matrix
789 << "={"
790 << _String ((long)treeStrings.lLength)
791 << ",2};\n";
792
793
794 for (long id = 0; id < treeStrings.lLength; id++) {
795 initTreeMatrix << hy_env::nexus_file_tree_matrix
796 << '['
797 << _String (id)
798 << "][0]=\""
799 << (_String*)treeIdents(id)
800 << "\";\n"
801 << hy_env::nexus_file_tree_matrix
802 << '['
803 << _String (id)
804 << "][1]=\""
805 << (_String*)treeStrings(id)
806 << "\";\n";
807 }
808
809 _ExecutionList el (initTreeMatrix);
810 el.Execute();
811 terminate_execution = false;
812 }
813 SkipUntilNexusBlockEnd (fState, f,CurrentLine, pos);
814 }
815
816 //_________________________________________________________
817
ProcessNexusHYPHY(FileState & fState,long pos,hyFile * file,_StringBuffer & CurrentLine,_DataSet &)818 void ProcessNexusHYPHY (FileState& fState, long pos, hyFile*file, _StringBuffer& CurrentLine, _DataSet&) {
819 static _String const endMark ("END;");
820 _StringBuffer bfBody (128UL);
821
822 long p2 = pos;
823 pos = CurrentLine.FindAnyCase (endMark,pos+1,kStringEnd);
824
825 fState.fileType = 0;
826
827 if (pos != kNotFound) {
828 bfBody << CurrentLine.Cut (p2,pos-1);
829 CurrentLine.Trim(pos+endMark.length(),-1);
830 } else {
831 bfBody << CurrentLine.Cut (p2,-1);
832 while (pos == kNotFound) {
833 ReadNextLine(file,&CurrentLine,&fState,false,false);
834 if (CurrentLine.empty()) {
835 break;
836 }
837
838 pos = CurrentLine.FindAnyCase (endMark,0,kStringEnd);
839 if (pos != kNotFound) {
840 if (pos > 0) {
841 bfBody << CurrentLine.Cut (0,pos-1);
842 }
843
844 CurrentLine.Trim (pos+endMark.length(), -1);
845 if (CurrentLine.empty()) {
846 ReadNextLine(file,&CurrentLine,&fState,false,false);
847 }
848
849 break;
850 } else {
851 bfBody << CurrentLine;
852 }
853
854 }
855 }
856 nexusBFBody = bfBody;
857
858 fState.fileType = 3;
859
860 CurrentLine = CurrentLine.ChangeCase(kStringUpperCase);
861
862 }
863
864 //_________________________________________________________
865
ProcessNexusData(FileState & fState,long pos,hyFile * f,_StringBuffer & CurrentLine,_DataSet & result)866 bool ProcessNexusData (FileState& fState, long pos, hyFile*f, _StringBuffer& CurrentLine, _DataSet& result) {
867 static const _String key1 ("DIMENSIONS"), key11 ("NTAX"), key12 ("NCHAR"),
868 key2 ("FORMAT"),key21 ("DATATYPE"), key22 ("MISSING"), key23 ("GAP"), key24 ("SYMBOLS"),
869 key25 ("EQUATE"), key26 ("MATCHCHAR"), key27 ("NOLABELS"), key28 ("INTERLEAVE"), key3 ("MATRIX"), keyEnd ("END");
870
871 _String newAlph;
872
873 bool done = false,
874 labels = true;
875
876 char charState = 0;
877
878 _List translations;
879 char missing = '?', gap = '-' , repeat = '.', charSwitcher;
880
881 long offSet = 0L, count, spExp = result.GetNames().lLength, sitesExp = 0;
882
883 while (!done) {
884 if (!FindNextNexusToken (fState, f, CurrentLine, pos)) {
885 break;
886 }
887
888 if (CurrentLine.BeginsWith (keyEnd, false)) {
889 pos = -1;
890 break;
891 }
892
893 if (CurrentLine.BeginsWith (key1, false)) {
894 // DIMENSIONS
895 offSet = key1.length ();
896 while (!done) {
897 _StringBuffer buffer (128UL);
898 done = ReadNextNexusStatement (fState, f, CurrentLine, offSet, buffer, true, true,true,false,false);
899
900 if (buffer.BeginsWith(key11, false)) {
901 if (result.GetNames().lLength) {
902 ReportWarning ("NTAX will override the definition of taxa names from the TAXA block");
903 }
904 if (!(count=ReadNextNexusEquate (fState,f,CurrentLine, 0 ,buffer))) {
905 ReportWarning ("NTAX is not followed by '= number-of-taxa'");
906 done = true;
907 } else {
908 done = done||(count>1);
909 spExp = buffer.to_long();
910 if(spExp<=0L) {
911 ReportWarning ("NTAX must be a positive number");
912 done = true;
913 spExp = result.GetNames().lLength?result.GetNames().lLength:1;
914 }
915 }
916 } else if (buffer.BeginsWith(key12, false)) {
917 if (!(count=ReadNextNexusEquate (fState,f,CurrentLine, 0 ,buffer))) {
918 ReportWarning ("NCHAR is not followed by '= number-of-charaters'");
919 done = true;
920 } else {
921 done = done||(count>1);
922 sitesExp = buffer.to_long();
923 }
924 }
925 offSet = 0L;
926 }
927 done = false;
928 } else if (CurrentLine.BeginsWith (key2, false)) {
929 // FORMAT
930 offSet = key2.length();
931 while (!done) {
932 charSwitcher = 0;
933 _StringBuffer buffer (128UL);
934 done = ReadNextNexusStatement (fState, f, CurrentLine, offSet, buffer, true, true,true,false,false);
935 offSet = 0L;
936 buffer.Trim (buffer.FirstNonSpaceIndex(),kStringEnd);
937 if (buffer.BeginsWith(key21)) { // datatype
938 if (!(count=ReadNextNexusEquate (fState,f,CurrentLine, 0 ,buffer))) {
939 ReportWarning ("DATATYPE is not followed by '= DNA|RNA|NUCLEOTIDE|PROTEIN|BINARY'");
940 done = true;
941 } else {
942 done = done||(count>1);
943 if ( buffer == _String("DNA") || buffer == _String("RNA") || buffer == _String("NUCLEOTIDE" )) {
944 if (newAlph.nonempty()) {
945 ReportWarning (_String("DNA|RNA|NUCLEOTIDE datatype directive will over-ride the custom symbols definition: ") & newAlph.Enquote());
946 newAlph.Clear();
947 }
948 if (done) {
949 done = false;
950 break;
951 }
952 continue;
953 } else if (buffer==_String("PROTEIN") || buffer == _String ("BINARY")) {
954 charState = 1+(buffer==_String("BINARY"));
955 if (newAlph.nonempty()) {
956 newAlph = kEmptyString;
957 ReportWarning (_String("PROTEIN|BINARY datatype directive will override the custom symbols definition: ") & newAlph.Enquote());
958 newAlph.Clear();
959 }
960 if (done) {
961 done = false;
962 break;
963 }
964 continue;
965 } else {
966 ReportWarning (buffer.Enquote() &" is not a recognized data type (DNA|RNA|NUCLEOTIDE|PROTEIN|BINARY are allowed).");
967 done = false;
968 }
969 }
970 } else if (buffer.BeginsWith (key22, false)) { // MISSING
971 charSwitcher = 1;
972 } else if (buffer.BeginsWith (key23, false)) { // GAP
973 charSwitcher = 2;
974 } else if (buffer.BeginsWith (key26, false)) { // MATCHCHAR
975 charSwitcher = 3;
976 } else if (buffer.BeginsWith (key27, false)) { // NOLABELS
977 labels = false;
978 } else if (buffer.BeginsWith (key28, false)) { // INTERLEAVE
979 fState.interleaved = true;
980 } else if (buffer.BeginsWith(key24, false)) { // SYMBOLS
981 count=ReadNextNexusEquate (fState,f,CurrentLine, 0 ,buffer, true,false);
982 if (buffer.empty()) {
983 ReportWarning (buffer.Enquote() & _String("is not of the form SYMBOLS = \"sym1 sym2 ...\". The entire block is ignored."));
984 done = true;
985 break;
986 }
987 _StringBuffer tempNewAlpha (128UL);
988 for (long pos1 = 0; pos1<buffer.length (); pos1++) {
989 charSwitcher = buffer.char_at (pos1);
990 if (!isspace(charSwitcher)) {
991 tempNewAlpha<<charSwitcher;
992 }
993
994 }
995 if (done) {
996 break;
997 }
998 newAlph = tempNewAlpha;
999 charSwitcher = 0;
1000 done = done||(count>1);
1001 } else if (buffer.BeginsWith(key25, false)) { // EQUATE
1002 buffer.Trim(key25.length(),kStringEnd);
1003 if (!(count=ReadNextNexusEquate (fState,f,CurrentLine, 0,buffer,true,false))) {
1004 ReportWarning (buffer.Enquote ()&" is not followed by '=char'");
1005 done = true;
1006 }
1007 done = done||(count>1);
1008 // blank now contains a full list of the form token=(token)
1009 _String symbol, meaning;
1010 bool symbolDefined = false, meaningDefined = false;
1011 for (count=0; count<buffer.length(); count++) {
1012 charSwitcher = buffer.char_at (count);
1013 if (isspace(charSwitcher)) {
1014 continue;
1015 } else if (charSwitcher == '=') {
1016 if (symbolDefined&&!meaningDefined) {
1017 meaningDefined = true;
1018 }
1019 } else
1020 if (!symbolDefined) {
1021 symbolDefined = true;
1022 symbol = charSwitcher;
1023 continue;
1024 }
1025 if (!meaningDefined) {
1026 ReportWarning("EQUATE can only be used to define single-character tokens. Ignoring the EQUATE command.");
1027 translations.Clear();
1028 break;
1029 }
1030 meaning = meaning & charSwitcher;
1031 }
1032 if (symbol.length () && meaning.length () ) {
1033 translations < new _String (symbol);
1034 translations < new _String (meaning);
1035 }
1036 charSwitcher = 0;
1037 buffer.Clear();
1038 }
1039
1040 offSet = 0;
1041
1042 _String built_in;
1043
1044 if (charSwitcher) {
1045 switch (charSwitcher) {
1046 case 1:
1047 built_in = "MISSING";
1048 break;
1049 case 2:
1050 built_in = "GAP";
1051 break;
1052 case 3:
1053 built_in = "MATCHCHAR";
1054 break;
1055 }
1056 if (!(count=ReadNextNexusEquate (fState,f,CurrentLine, 0 ,buffer, true))) {
1057 ReportWarning (buffer.Enquote() & " is not followed by '=char'");
1058 done = true;
1059 } else {
1060 done = done||(count>1);
1061 if (buffer.length () !=1) {
1062 ReportWarning (buffer.Enquote() &" is not a valid " & built_in &" character.");
1063 }
1064 }
1065 switch (charSwitcher) {
1066 case 1:
1067 missing = buffer.char_at (0);
1068 if (gap == missing) {
1069 gap = 0;
1070 }
1071 if (repeat == missing) {
1072 repeat = 0;
1073 }
1074
1075 break;
1076 case 2:
1077 gap = buffer.char_at (0);
1078 if (missing == gap) {
1079 missing = 0;
1080 }
1081 if (repeat == gap) {
1082 repeat= 0;
1083 }
1084
1085 break;
1086 case 3:
1087 repeat = buffer.char_at(0);
1088 if (missing == repeat) {
1089 missing = 0;
1090 }
1091 if (repeat == gap) {
1092 gap = 0;
1093 }
1094
1095 break;
1096 }
1097 }
1098
1099 if (done) {
1100 done = false;
1101 break;
1102 }
1103 done = false;
1104 }
1105 } else if (CurrentLine.BeginsWith (key3, false)) { // matrix instruction
1106 // if needed, set up a new symbol set
1107 offSet = key3.length();
1108 if (newAlph.length()>1) { // a valid new alphabet set
1109 checkTTStatus (&fState);
1110 fState.translationTable->AddBaseSet (newAlph);
1111 } else {
1112 if (charState) {
1113 checkTTStatus (&fState);
1114 if (charState==1) {
1115 newAlph = _TranslationTable::GetDefaultTable(HY_TRANSLATION_TABLE_PROTEIN);
1116 fState.translationTable->baseLength = 20;
1117 } else {
1118 newAlph = _TranslationTable::GetDefaultTable(HY_TRANSLATION_TABLE_BINARY);
1119 fState.translationTable->baseLength = 2;
1120 }
1121 } else {
1122 newAlph = _TranslationTable::GetDefaultTable(HY_TRANSLATION_TABLE_DNA);
1123 }
1124 }
1125 // set up translations
1126 if (translations.lLength) {
1127 checkTTStatus (&fState);
1128 }
1129
1130 for (long k = 0; k<translations.lLength; k+=2) {
1131 char c = ((_String*)translations(k))->char_at (0);
1132 fState.translationTable->AddTokenCode (c,*((_String*)translations(k+1)));
1133 }
1134
1135 if (fState.translationTable->GetSkipChar()!=missing) {
1136 checkTTStatus (&fState);
1137 fState.translationTable->AddTokenCode (missing,newAlph);
1138 }
1139
1140 if (fState.translationTable->GetGapChar()!=gap) {
1141 checkTTStatus (&fState);
1142 newAlph = "";
1143 fState.translationTable->AddTokenCode (gap,newAlph);
1144 }
1145
1146 if (repeat == missing) {
1147 repeat = 0;
1148 }
1149
1150 fState.repeat = repeat;
1151 fState.skip = missing;
1152
1153 //fState.totalSitesExpected = sitesExp;
1154
1155 // now proceed to read the data
1156
1157 long loopIterations = 0;
1158 if (labels == true) {
1159 result.ClearNames();
1160 }
1161
1162
1163 while (1) {
1164
1165 _StringBuffer buffer (128L),
1166 buffer_2 (128L),
1167 * source;
1168
1169 done = ReadNextNexusStatement (fState, f, CurrentLine, offSet?offSet+1:0, buffer, true, true,true,!labels,false);
1170 offSet = 0;
1171 // in each line that should produce first the name of the taxon
1172 // and then the data string for the taxon
1173
1174 if (labels) {
1175 if (result.GetNames().lLength<spExp) {
1176 if (spExp>0 && buffer.empty ()) {
1177 ReportWarning (_String("Could not find NTAX taxon names in the matrix. Read: ")&_String((long)result.GetNames().lLength) & " sequences.");
1178 break;
1179 }
1180
1181 if (!(sitesExp&&fState.curSite&&(fState.curSite<sitesExp)&&(!fState.interleaved))) {
1182 result.AddName(buffer);
1183 fState.totalSpeciesExpected++;
1184 }
1185 } else {
1186 if (done) {
1187 break;
1188 }
1189 }
1190
1191 if (!(sitesExp&&fState.curSite&&(fState.curSite<sitesExp)&&(!fState.interleaved))) {
1192 done = ReadNextNexusStatement (fState, f, CurrentLine, offSet, buffer_2, true, true,true,true,false);
1193 source = &buffer_2;
1194 } else {
1195 source = &buffer;
1196 }
1197 } else {
1198 if (loopIterations<spExp) {
1199 if (!(sitesExp&&fState.curSite&&(fState.curSite<sitesExp)&&(!fState.interleaved))) {
1200 fState.totalSpeciesExpected++;
1201 } else {
1202 loopIterations --;
1203 }
1204 }
1205 source = &buffer;
1206 }
1207
1208 if (source->empty()) {
1209 ReportWarning (_String("Could not find NTAX data strings in the matrix. Read: ")&_String((long)result.GetNames().lLength) & " sequences.");
1210 break;
1211 }
1212 loopIterations++;
1213 ISelector (fState, *source, result);
1214
1215 if (done)
1216 if (loopIterations>=fState.totalSpeciesExpected) {
1217 break; // finished reading
1218 }
1219
1220 if ((f&&f->feof())||(fState.theSource&&(fState.theSource->length()<=fState.pInSrc))) {
1221 break;
1222 }
1223 }
1224
1225
1226 if (result.GetNames().lLength!=spExp) {
1227 ReportWarning(_String ("Expected ")&spExp&" taxa, but found "&(long)result.GetNames().lLength);
1228 }
1229 if (result.lLength!=sitesExp && result.InternalStorageMode() == 0) {
1230 ReportWarning(_String ("Expected ")&sitesExp&" sites, but found "&(long)result.lLength);
1231 }
1232 if (spExp && loopIterations%spExp) {
1233 ReportWarning(_String ("There is an inconsistency between NTAX and the number of data strings in the matrix"));
1234 }
1235 done = true;
1236 } else {
1237 ReportWarning (CurrentLine.Cut (0, CurrentLine.FirstSpaceIndex(1,kStringEnd)) & " is not used by HYPHY");
1238 while (!done) {
1239 _StringBuffer buffer (128L);
1240 done = ReadNextNexusStatement (fState, f, CurrentLine, offSet, buffer, true, false,true,false,false);
1241 }
1242 done = false;
1243 }
1244 if (!done) {
1245 if (CurrentLine.empty ()) {
1246 ReadNextLine(f,&CurrentLine,&fState,false);
1247 }
1248 pos = 0;
1249 if (CurrentLine.empty () ) {
1250 done = true;
1251 }
1252 }
1253 }
1254
1255 SkipUntilNexusBlockEnd (fState, f,CurrentLine, pos);
1256 return true;
1257 }
1258
1259 //_________________________________________________________
1260
ReadNexusFile(FileState & fState,hyFile * file,_DataSet & result)1261 void ReadNexusFile (FileState& fState, hyFile*file, _DataSet& result) {
1262 bool dataRead = false, lookForEnd = false;
1263 long f,g, file_line = fState.currentFileLine;
1264
1265 fState.fileType = 3; // NEXUS
1266 static const _String beginMark ("BEGIN"), endMark ("END"), data ("DATA"), chars ("CHARACTERS"),
1267 taxa ("TAXA"), trees ("TREES"), assumptions ("ASSUMPTIONS"), hyphy ("HYPHY"), sets ("SETS");
1268
1269 _StringBuffer CurrentLine, blockName;
1270
1271 ReadNextLine(file,&CurrentLine,&fState,false);
1272 while (CurrentLine.nonempty()) {
1273 f = 0;
1274 /** TODO SLKP 20180921 : if any of the commands loads a new CurrentLine, the marker 'f' needs to be reset
1275 but currently we have no way of knowing whether or not a new line was loaded.
1276 For the time-being fixing by adding a line # tracker for fState
1277 */
1278 while ((f = CurrentLine.FindAnyCase(beginMark,file_line == fState.currentFileLine ? f : 0L,kStringEnd ))>=0) {
1279 file_line = fState.currentFileLine;
1280
1281 f = CurrentLine.FirstNonSpaceIndex (f+beginMark.length(),kStringEnd,kStringDirectionForward);
1282 if (f!=-1) { // process
1283 g = CurrentLine.Find (';', f, -1);
1284 if (g!=kNotFound) {
1285 blockName = CurrentLine.Cut (f,g-1);
1286 // dispatch to block readers
1287 if (blockName.EqualIgnoringCase(data)) {
1288 ReportWarning (blockName.Enquote() & " block is now deprecated in NEXUS and should not be used.");
1289
1290 if (!dataRead) {
1291 dataRead = ProcessNexusData (fState, g+1, file, CurrentLine, result);
1292 }
1293 //SkipUntilNexusBlockEnd (fState,file,CurrentLine,f);
1294
1295 else {
1296 ReportWarning ("Only one data set per NEXUS file is read by ReadDataSet - the 1st valid one.");
1297 }
1298 } else if (blockName.EqualIgnoringCase(taxa)) {
1299 if (!dataRead) {
1300 ProcessNexusTaxa (fState, g+1, file, CurrentLine, result);
1301 } else {
1302 ReportWarning ("The TAXA block was encountered after CHARACTER had been read and will be ignored.");
1303 }
1304 } else if (blockName.EqualIgnoringCase(trees)) {
1305 ProcessNexusTrees (fState, g+1, file, CurrentLine, result);
1306 } else if (blockName.EqualIgnoringCase(chars)) {
1307 if (!dataRead) {
1308 dataRead = ProcessNexusData (fState, g+1, file, CurrentLine, result);
1309 } else {
1310 ReportWarning ("Only one data set per NEXUS file is read by ReadDataSet - the 1st valid one.");
1311 }
1312 } else if (blockName.EqualIgnoringCase(assumptions)||blockName.EqualIgnoringCase(sets)) {
1313 ProcessNexusAssumptions (fState, g+1, file, CurrentLine, result);
1314 } else if (blockName.EqualIgnoringCase(hyphy)) {
1315 ProcessNexusHYPHY (fState, g+1, file, CurrentLine, result);
1316 } else {
1317 ReportWarning (_String("NEXUS blocks ")&blockName.Enquote()&(" are not used by HYPHY."));
1318 lookForEnd = true;
1319 break;
1320 // now look for the end of this block
1321 }
1322
1323 } else {
1324 break;
1325 }
1326 } else {
1327 ReportWarning (_String ("NEXUS BEGIN must be followed by the name of the block. Skipping until next BEGIN statement."));
1328 break;
1329 }
1330 }
1331
1332 if (lookForEnd) {
1333 lookForEnd = false;
1334 SkipUntilNexusBlockEnd (fState,file,CurrentLine,f);
1335 } else {
1336 ReadNextLine(file,&CurrentLine,&fState,false);
1337 }
1338
1339 }
1340
1341 }
1342