1 /*
2 
3  HyPhy - Hypothesis Testing Using Phylogenies.
4 
5  Copyright (C) 1997-now
6  Core Developers:
7  Sergei L Kosakovsky Pond (sergeilkp@icloud.com)
8  Art FY Poon    (apoon42@uwo.ca)
9  Steven Weaver (sweaver@temple.edu)
10 
11  Module Developers:
12  Lance Hepler (nlhepler@gmail.com)
13  Martin Smith (martin.audacis@gmail.com)
14 
15  Significant contributions from:
16  Spencer V Muse (muse@stat.ncsu.edu)
17  Simon DW Frost (sdf22@cam.ac.uk)
18 
19  Permission is hereby granted, free of charge, to any person obtaining a
20  copy of this software and associated documentation files (the
21  "Software"), to deal in the Software without restriction, including
22  without limitation the rights to use, copy, modify, merge, publish,
23  distribute, sublicense, and/or sell copies of the Software, and to
24  permit persons to whom the Software is furnished to do so, subject to
25  the following conditions:
26 
27  The above copyright notice and this permission notice shall be included
28  in all copies or substantial portions of the Software.
29 
30  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
31  OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
32  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
33  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
34  CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
35  TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
36  SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
37 
38  */
39 
40 #include <math.h>
41 #include <string.h>
42 #include <ctype.h>
43 
44 #include "hy_string_buffer.h"
45 #include "dataset.h"
46 #include "list.h"
47 #include "batchlan.h"
48 #include "hbl_env.h"
49 #include "global_object_lists.h"
50 #include "global_things.h"
51 
52 
53 using namespace hy_global;
54 using namespace hyphy_global_objects;
55 
56 
57 //_________________________________________________________
58 
59 _DataSet* lastNexusDataMatrix = nil;
60 _StringBuffer   nexusBFBody;
61 
62 
63 
64 void    checkTTStatus               (FileState* fs);
65 void    processCommand              (_StringBuffer*s, FileState*fs);
66 void    FilterRawString             (_StringBuffer& s, FileState* fs, _DataSet & ds);
67 long    ProcessLine                 (_StringBuffer&s , FileState *fs, _DataSet& ds);
68 void    PadLine                     (FileState& fState, _DataSet& result);
69 void    ISelector                   (FileState& fState, _StringBuffer& CurrentLine, _DataSet& result);
70 bool    SkipLine                    (_StringBuffer& theLine, FileState* fS);
71 void    TrimPhylipLine              (_StringBuffer& CurrentLine, _DataSet& ds);
72 bool    ProcessNexusData            (FileState&, long,  hyFile*, _StringBuffer&, _DataSet&);
73 void    ProcessNexusHYPHY           (FileState&, long,  hyFile*, _StringBuffer&, _DataSet&);
74 void    ProcessNexusAssumptions     (FileState&, long,  hyFile*, _StringBuffer&, _DataSet&);
75 void    ProcessNexusTaxa            (FileState&,long, hyFile*, _StringBuffer&, _DataSet&);
76 void    ProcessNexusTrees           (FileState&, long, hyFile*, _StringBuffer&, _DataSet&);
77 bool    FindNextNexusToken          (FileState& fState, hyFile* f, _StringBuffer& CurrentLine, long pos);
78 bool    SkipUntilNexusBlockEnd      (FileState& fState, hyFile* f, _StringBuffer& CurrentLine, long pos);
79 bool    ReadNextNexusStatement      (FileState&, hyFile* , _StringBuffer&, long, _StringBuffer&, bool, bool = true, bool = true, bool = false, bool = false, bool = false);
80 long    ReadNextNexusEquate         (FileState&, hyFile* , _StringBuffer&, long, _String&, bool = false, bool = true);
81 void    NexusParseEqualStatement    (_StringBuffer&);
82 
__anon737ca7b90102(_String const& buffer, long position) 83 static auto  error_conext = [] (_String const& buffer, long position) -> const _String {return (buffer.Cut (0,position) & " <=? " & buffer.Cut (position+1,kStringEnd)).Enquote();};
84 
85 
86 //_________________________________________________________
87 
FindNextNexusToken(FileState & fState,hyFile * f,_StringBuffer & CurrentLine,long pos)88 bool    FindNextNexusToken (FileState& fState, hyFile* f, _StringBuffer& CurrentLine, long pos) {
89 
90     pos = CurrentLine.FirstNonSpaceIndex (pos,-1,kStringDirectionForward);
91     if (pos==kNotFound) {
92         ReadNextLine(f,&CurrentLine,&fState,false);
93         pos = CurrentLine.FirstNonSpaceIndex (0,-1,kStringDirectionForward);
94         if (pos==kNotFound) {
95             return false;
96         }
97     }
98     CurrentLine.Trim (pos, kStringEnd);
99     return true;
100 }
101 
102 
103 //_________________________________________________________
104 
SkipUntilNexusBlockEnd(FileState & fState,hyFile * file,_StringBuffer & CurrentLine,long pos)105 bool    SkipUntilNexusBlockEnd (FileState& fState, hyFile* file, _StringBuffer& CurrentLine, long pos) {
106     static const _String endMark ("END");
107     pos = CurrentLine.Find (endMark,pos+1,kStringEnd);
108     while (pos == kNotFound) {
109         ReadNextLine(file,&CurrentLine,&fState,false);
110         if (CurrentLine.empty()) {
111             return false;
112         }
113         pos = CurrentLine.Find (endMark,0,kStringEnd);
114         if (pos != kNotFound) {
115             pos = CurrentLine.Find (';',pos+endMark.length(),kStringEnd);
116             if (pos != kNotFound) {
117                 CurrentLine.Trim (pos+endMark.length(), kStringEnd);
118                 if (CurrentLine.empty()) {
119                     ReadNextLine(file,&CurrentLine,&fState,false);
120                 }
121             } else {
122                 ReportWarning ("Found END w/o a trailing semicolon. Assuming end of block and skipping the rest of the line.");
123                 ReadNextLine(file,&CurrentLine,&fState,false);
124             }
125             return true;
126         }
127     }
128     return false;
129 }
130 //_________________________________________________________
NexusParseEqualStatement(_StringBuffer & source)131 void    NexusParseEqualStatement (_StringBuffer& source)
132 {
133     long f = source.Find('=');
134     if (f != kNotFound) {
135         f = source.FirstNonSpaceIndex (f+1,kStringEnd);
136         if (f != kNotFound) {
137             source.Trim (f,kStringEnd);
138             return;
139         }
140     }
141     source.Clear();
142 
143 }
144 //_________________________________________________________
145 
ReadNextNexusStatement(FileState & fState,hyFile * f,_StringBuffer & CurrentLine,long pos,_StringBuffer & blank,bool stopOnSpace,bool stopOnComma,bool stopOnQuote,bool NLonly,bool preserveSpaces,bool preserveQuotes)146 bool ReadNextNexusStatement (FileState& fState, hyFile* f, _StringBuffer& CurrentLine, long pos, _StringBuffer & blank, bool stopOnSpace, bool stopOnComma, bool stopOnQuote, bool NLonly, bool preserveSpaces, bool preserveQuotes) {
147     bool done          = false,
148          insideLiteral = false,
149          startedReading = false;
150 
151     long newPos = pos>0?pos+1L:pos;
152     char c = '\0';
153 
154     while (1) {
155         while (newPos<CurrentLine.length()) {
156             c = CurrentLine.char_at (newPos);
157             if (isspace(c)) {
158                 if (stopOnSpace && startedReading && (!insideLiteral) && (!NLonly || (NLonly && (c=='\r' || c=='\n')))) {
159                     done = true;
160                     break;
161                 } else {
162                     if (insideLiteral||preserveSpaces) {
163                         blank<<' ';
164                     }
165                 }
166             } else {
167                 if (c==';' && ! insideLiteral) { // terminate always
168                     done = true;
169                     newPos++;
170                     break;
171                 } else if (stopOnComma && c==',' && ! insideLiteral) {
172                     done = true;
173                     newPos++;
174                     break;
175                 } else if (! preserveQuotes && (c=='\'' || c=='"') ) {
176                     if (c=='\'') {
177                         if (newPos+1<CurrentLine.length())
178                             // check for a double quote
179                         {
180                             c = CurrentLine.char_at (newPos+1);
181                             if (c=='\'') {
182                                 newPos += 2;
183                                 blank<<c;
184                                 continue;
185                             }
186                             //else
187                             //  if (!startedReading || insideLiteral)
188                             //      newPos--;
189                         }
190                     }
191                     if (startedReading &&stopOnQuote) {
192                         done = true;
193                         newPos++;
194                         break;
195                     } else {
196                         insideLiteral = !insideLiteral;
197                     }
198                 } else {
199                     startedReading = true;
200                     blank<<c;
201                 }
202             }
203             newPos++;
204         }
205         if (!done) {
206             if (NLonly&&(blank.FirstNonSpaceIndex(0,kStringEnd,kStringDirectionForward)>=0)) {
207                 break;
208             }
209             ReadNextLine(f,&CurrentLine,&fState,false);
210             newPos = 0;
211             if (CurrentLine.empty()) {
212                 c=';';
213                 break;
214             }
215         } else {
216             break;
217         }
218 
219     }
220     // TODO 20170821: SLKP, this needs to be case sensitive
221     blank.ChangeCaseInPlace(kStringUpperCase);
222     if (newPos<CurrentLine.length()) {
223         CurrentLine.Trim (newPos,kStringEnd);
224     } else {
225         CurrentLine.Clear();
226     }
227     return c==';';
228 }
229 
230 //_________________________________________________________
231 
ReadNextNexusEquate(FileState & fState,hyFile * f,_StringBuffer & CurrentLine,long pos2,_String & blank,bool resetP,bool demandSemicolon)232 long    ReadNextNexusEquate (FileState& fState, hyFile* f, _StringBuffer& CurrentLine, long pos2, _String& blank, bool resetP, bool demandSemicolon) {
233     long pos = blank.Find ('=',pos2,-1), res;
234     if (pos>=0) {
235         if (pos<blank.length()-1) {
236             blank.Trim (pos+1,-1);
237             return 1;
238         } else {
239             _StringBuffer buffer (128UL);
240             res = ReadNextNexusStatement (fState, f, CurrentLine, resetP?0:pos, buffer, true, true, false,false,false);
241             if (!buffer.empty()) {
242                 blank = buffer;
243                 return res?2:1;
244             }
245         }
246         return 0;
247     } else {
248         _StringBuffer buffer (128UL);
249         res = ReadNextNexusStatement (fState, f, CurrentLine, pos2, buffer, true, true, false,false,false)?2:1;
250         if (res!=2 && demandSemicolon) {
251             if((res=ReadNextNexusEquate (fState, f, CurrentLine, 0, buffer))) {
252                 blank = buffer;
253                 return res;
254             }
255         } else if((res = ReadNextNexusEquate (fState, f, CurrentLine, 0, buffer, resetP, false))) {
256             blank = buffer;
257             return res;
258         } else {
259             return 0;
260         }
261     }
262     return 0;
263 }
264 
265 //_________________________________________________________
ProcessNexusTaxa(FileState & fState,long pos,hyFile * f,_StringBuffer & CurrentLine,_DataSet & result)266 void    ProcessNexusTaxa (FileState& fState, long pos, hyFile*f, _StringBuffer& CurrentLine, _DataSet& result) {
267     static const _String key1 = "DIMENSIONS", key2 = "NTAX", key3 = "TAXLABELS", keyEnd = "END";
268 
269     bool    done = false;
270 
271     long    speciesExpected = -1, offset;
272 
273     while (!done) {
274         if (!FindNextNexusToken (fState, f, CurrentLine, pos)) {
275             break;
276         }
277         // now that we've got stuff to work with see what it is
278 
279         if (CurrentLine.BeginsWith (keyEnd, false)) {
280             pos = -1;
281             break;
282         }
283 
284         if (CurrentLine.BeginsWith (key1, false)) {
285             if (result.GetNames().lLength) { // check the number of dimensions
286                 // some data already present
287                 ReportWarning ("Only one taxa definition per NEXUS file is recognized, the others will be ignored.");
288                 SkipUntilNexusBlockEnd (fState, f,CurrentLine, pos);
289                 break;
290             } else {
291                 _StringBuffer buffer (128UL);
292                 ReadNextNexusStatement (fState, f, CurrentLine, key1.length(), buffer, false,true, true,false,false);
293                 // this will actually return '= number'
294                 NexusParseEqualStatement (buffer);
295                 speciesExpected = buffer.to_long();
296             }
297         } else if (CurrentLine.BeginsWith (key3, false)) {
298             if (speciesExpected == -1) {
299                 ReportWarning ("TAXLABELS must be preceded by a valid NTAX statement. Skipping the entire TAXA block.");
300                 SkipUntilNexusBlockEnd (fState, f,CurrentLine, pos);
301                 break;
302             } else {
303                 offset = key3.length();
304                 do {
305                     _StringBuffer buffer (128UL);
306                     if (ReadNextNexusStatement (fState, f, CurrentLine,offset, buffer, true,true,true,false,false)) {
307                         if (buffer.nonempty()) {
308                           result.AddName(buffer);
309                         }
310                         break;
311                     } else {
312                         if (buffer.nonempty()) {
313                           result.AddName(buffer);;
314                         }
315                     }
316                     offset = 0;
317 
318                 } while (1);
319                 if (result.GetNames().lLength!=speciesExpected) {
320                     ReportWarning ( _String ("TAXALABELS provided ") &
321                                     _String ((long)result.GetNames().lLength) &" species, whereas the NTAX statement promised:" &
322                                     _String (speciesExpected) & ". HYPHY will use TAXALABELS count.");
323                 }
324                 done = true;
325             }
326         } else {
327             long offSet = 0;
328 
329             ReportWarning (CurrentLine.Cut (0, CurrentLine.FirstSpaceIndex(1,kStringEnd)) & " is not used by HYPHY");
330             while (!done) {
331                 _StringBuffer buffer (128UL);
332                 done = ReadNextNexusStatement (fState, f, CurrentLine, offSet, buffer, false, false,true,false,false);
333             }
334             done = false;
335         }
336 
337         if (!done) {
338             pos = 0;
339         }
340     }
341 
342     SkipUntilNexusBlockEnd (fState, f,CurrentLine, pos);
343 }
344 
345 //_________________________________________________________
346 
ProcessNexusAssumptions(FileState & fState,long pos,hyFile * f,_StringBuffer & CurrentLine,_DataSet &)347 void    ProcessNexusAssumptions (FileState& fState, long pos, hyFile*f, _StringBuffer& CurrentLine, _DataSet&) {
348     static const _String key1 = "CHARSET", keyEnd = "END";
349 
350     bool    done = false;
351 
352     _List   charSetIDs,
353             charSetSpec;
354 
355     while (!done) {
356         if (!FindNextNexusToken (fState, f, CurrentLine, pos)) {
357             break;
358         }
359         // now that we've got stuff to work with see what it is
360 
361         if (CurrentLine.BeginsWith (keyEnd, false)) {
362             pos = -1;
363             break;
364         }
365 
366         if (CurrentLine.BeginsWith (key1, false)) { // actual tree strings & idents
367             _StringBuffer buffer (128UL);
368             if (!ReadNextNexusStatement (fState, f, CurrentLine, key1.length(), buffer, false, false, false,false,true)) {
369                 ReportWarning ("CHARSET construct not followed by ';'.");
370                 break;
371             } else {
372                 pos = buffer.Find ('=',1,kStringEnd);
373                 if (pos==-1) {
374                     ReportWarning (buffer.Enquote() & " is not of the form Charset ID = specification of the partition.");
375                 } else {
376                     long pos2 = buffer.FirstNonSpaceIndex (0,pos-1,kStringDirectionBackward);
377                     if (pos2 != kNotFound) {
378                         long j = buffer.FirstNonSpaceIndex (0,pos2-1,kStringDirectionForward);
379                         if (j != kNotFound) {
380                             if (buffer.char_at (j) == '*') {
381                                 j = buffer.FirstNonSpaceIndex (j+1,pos2-1,kStringDirectionForward);
382                             }
383 
384                             if (j != kNotFound) {
385                                 _String nexus_name (buffer,j,pos2),
386                                         charset_id (nexus_name);
387 
388                                 if (!nexus_name.IsValidIdentifier(fIDAllowCompound)) {
389                                     charset_id = nexus_name.ConvertToAnIdent();
390                                 }
391 
392                                 charset_id = charSetIDs.GenerateUniqueNameForList(GenerateUniqueObjectIDByType (nexus_name, HY_BL_DATASET_FILTER), false);
393 
394                                 if (charset_id != nexus_name) {
395                                   ReportWarning(nexus_name.Enquote('\'') & " has been renamed to " & charset_id.Enquote('\'') & " to avoid naming conflicts and/or comply with HyPhy ID requirements");
396                                 }
397 
398 
399                                 //  now get the rest of the tree string
400                                 pos2 = buffer.FirstNonSpaceIndex(pos+1,kStringEnd);
401                                 pos  = buffer.FirstNonSpaceIndex(pos2,kStringEnd,kStringDirectionBackward);
402                                 buffer.Trim (pos2,pos);
403                                 buffer  = buffer.CompressSpaces () & " ";
404 
405                                 _StringBuffer hpSpec (buffer.length()+1UL);
406 
407                                 _String numberOne,
408                                         numberTwo,
409                                         numberThree;
410 
411                                 bool    spoolInto2nd = false,
412                                         spoolInto3rd = false,
413                                         okFlag         = true,
414                                         firstFlag  = true;
415 
416                                 for (long k=0; k<buffer.length(); k++) {
417                                     char ch = buffer.char_at(k);
418 
419                                     if ((ch>='0' && ch<='9') || ch=='.') {
420                                         if (spoolInto2nd) {
421                                             numberTwo = numberTwo & ch;
422                                         } else if (spoolInto3rd) {
423                                             numberThree = numberThree & ch;
424                                         } else {
425                                             numberOne = numberOne & ch;
426                                         }
427                                     }
428 
429                                     if (ch==' ') {
430                                         if (numberTwo.length() == 1 && numberTwo.char_at (0) == '.') {
431                                             numberTwo = (long)fState.totalSitesRead;
432                                         }
433 
434                                         if (spoolInto3rd) {
435                                             spoolInto3rd = false;
436                                             // handle 'every' n-th
437 
438 
439                                             long    from = numberOne.to_long()-1,
440                                                     upto = numberTwo.to_long()-1,
441                                                     step = numberThree.to_long();
442 
443                                             if ((upto>=from)&&(step>0)) {
444                                                 if (!firstFlag) {
445                                                     hpSpec << ',';
446                                                 }
447                                                 hpSpec << _String(from);
448                                                 for (long kk = from+step; kk<=upto; kk+=step) {
449                                                     hpSpec << ',' << (_String)(kk);
450                                                 }
451 
452                                                 numberOne.Clear();
453                                                 numberTwo.Clear();
454                                                 numberThree.Clear();
455 
456                                             } else {
457                                                  ReportWarning (_String("Invalid from-to\\step specification: ") & error_conext (buffer, k));
458                                                 okFlag = false;
459                                                 break;
460                                             }
461 
462                                             firstFlag = false;
463                                         } else {
464                                             if (spoolInto2nd) {
465                                                 spoolInto2nd = false;
466                                                 if (!firstFlag) {
467                                                     hpSpec << ',';
468                                                 }
469 
470                                                 numberOne = numberOne.to_long ()-1;
471                                                 hpSpec << numberOne;
472                                                 numberOne = ch;
473                                                 hpSpec << '-';
474                                                 numberTwo = numberTwo.to_long()-1;
475                                                 hpSpec << numberTwo;
476                                                 numberTwo.Clear();
477                                                 firstFlag = false;
478 
479                                             } else {
480                                               long n1;
481                                                 if (numberOne.nonempty() && (n1 = numberOne.to_long() > 0)) {
482                                                     numberOne = n1-1;
483                                                     if (!firstFlag) {
484                                                         hpSpec << ',';
485                                                     }
486                                                     hpSpec << numberOne;
487                                                 }
488                                                 numberOne.Clear();
489                                                 firstFlag = false;
490                                             }
491                                         }
492                                         //hitASpace = true;
493 
494                                     } else if (ch=='-') {
495                                         if (spoolInto2nd||spoolInto3rd) {
496                                             ReportWarning (_String("Misplaced '-' in CHARSET specification: ") & error_conext (buffer, k));
497                                             okFlag = false;
498                                             break;
499                                         }
500                                         spoolInto2nd = true;
501                                     } else if (ch=='\\') {
502                                         if ((!spoolInto2nd)||spoolInto3rd) {
503                                             ReportWarning (_String("Misplaced '\\' in CHARSET specification: ") & buffer.Enquote());
504                                             okFlag = false;
505                                             break;
506                                         }
507                                         spoolInto2nd = false;
508                                         spoolInto3rd = true;
509                                     }
510                                 }
511 
512 
513                                 if (okFlag) {
514                                     charSetIDs  && & charset_id;
515                                     charSetSpec && & hpSpec;
516                                 }
517                             }
518                         }
519                         if (j<0) {
520                             ReportWarning (_String("Could not find a charset identifier in: ")& buffer.Enquote());
521                         }
522                     } else {
523                         ReportWarning (buffer.Enquote() &" is not of the form CharSetID = char set string");
524                     }
525                 }
526             }
527         } else {
528             long offSet = 0L;
529 
530             ReportWarning (CurrentLine.Cut (0, CurrentLine.FirstSpaceIndex(1,-1)) & " is not used by HYPHY");
531             while (!done) {
532                 _StringBuffer buffer (128UL);
533                 done = ReadNextNexusStatement (fState, f, CurrentLine, offSet, buffer, false, false,true,false,false);
534             }
535             done = false;
536         }
537 
538         if (!done) {
539             //ReadNextLine(f,&CurrentLine,&fState,false);
540             pos = 0;
541         }
542     }
543 
544     if (charSetIDs.lLength) {
545         _StringBuffer defineCharsets (256UL);
546 
547         defineCharsets << hy_env::data_file_partition_matrix << "={2," << _String ((long)charSetIDs.lLength) << "};\n";
548 
549         for (long id = 0; id < charSetIDs.lLength; id++) {
550             defineCharsets << hy_env::data_file_partition_matrix
551              << "[0]["
552              << _String (id)
553              << "]:=\""
554              << (_String*)charSetIDs(id)
555              << "\";\n"
556              << hy_env::data_file_partition_matrix
557              << "[1]["
558              << _String (id)
559              << "]:=\""
560              << (_String*)charSetSpec(id)
561              << "\";\n";
562         }
563          _ExecutionList defMx (defineCharsets);
564         defMx.Execute();
565         terminate_execution = false;
566     }
567 
568     SkipUntilNexusBlockEnd (fState, f,CurrentLine, pos);
569 }
570 
571 //_________________________________________________________
572 
ProcessNexusTrees(FileState & fState,long pos,hyFile * f,_StringBuffer & CurrentLine,_DataSet & result)573 void    ProcessNexusTrees (FileState& fState, long pos, hyFile*f, _StringBuffer& CurrentLine, _DataSet& result) {
574     static _String const key1 = "TRANSLATE", key2 = "TREE", errMsg, keyEnd = "END";
575 
576     bool    done = false, readResult, good;
577     _List   translationsFrom, translationsTo;
578     _List   treeIdents, treeStrings;
579     long    treeSelected = 0, insertPos = 0;
580 
581     while (!done) {
582 
583         if (!FindNextNexusToken (fState, f, CurrentLine, pos)) {
584             break;
585         }
586         // now that we've got stuff to work with see what it is
587 
588         if (CurrentLine.BeginsWith (keyEnd, false)) {
589             pos = -1;
590             break;
591         }
592 
593         if (CurrentLine.BeginsWith (key1, false)) {
594             // set up translations between nodes and data labels
595             long offset = key1.length();
596             do {
597                 _StringBuffer buffer (128UL);
598                 readResult = ReadNextNexusStatement (fState, f, CurrentLine, offset, buffer, true, true,true,false,false);
599                 if (buffer.nonempty()) {
600                     if (translationsTo.lLength<translationsFrom.lLength) {
601                         good = (result.GetNames().FindObject(&buffer)>=0);
602                         if (good) {
603                             translationsTo.InsertElement (&buffer, insertPos);
604                         } else {
605                             ReportWarning (buffer.Enquote() & " is not a valid taxon name for TRANSLATE" );
606                             translationsFrom.Delete (insertPos);
607                         }
608 
609                     } else {
610                         if (!readResult) {
611                             insertPos = translationsFrom.BinaryInsert (&buffer);
612                         }
613                     }
614                 }
615                 if (readResult) {
616                     break;
617                 }
618                 if  ((f&&f->feof())||(fState.theSource&&(fState.theSource->length()<=fState.pInSrc))) {
619                     break;
620                 }
621                 offset = 0;
622 
623             } while (1);
624         } else if (CurrentLine.BeginsWith (key2, false)) { // actual tree strings & idents
625             _StringBuffer buffer (128UL);
626             if (!ReadNextNexusStatement (fState, f, CurrentLine, key2.length(), buffer, false, false, false,false,false, true)) {
627                 ReportWarning ("TREE construct not followed by ';'.");
628                 break;
629             } else {
630                 // here goes the tree string in the form: treeID = treeString
631                 // pull the ID out first - check if it is a valid one
632                 // next crudely parse the tree string, extracting species names and
633                 pos = buffer.Find ('=',1,kStringEnd);
634                 if (pos==kNotFound) {
635                     ReportWarning (buffer.Enquote () &" is not of the form TreeID = TreeString");
636                 } else {
637                     long pos2 = buffer.FirstNonSpaceIndex (0,pos-1,kStringDirectionBackward);
638                     if (pos2 != kNotFound) {
639                         long j = buffer.FirstNonSpaceIndex (0,pos2-1,kStringDirectionForward);
640                         if (j != kNotFound ) {
641                             if (buffer.char_at (j) == '*') {
642                                 j = buffer.FirstNonSpaceIndex (j+1,pos2-1,kStringDirectionForward);
643                                 treeSelected = treeIdents.lLength;
644                             }
645                             if (j != kNotFound) {
646                                 _String nexus_tree_id (buffer,j,pos2),
647                                         tree_id (nexus_tree_id);
648 
649                                 if (!nexus_tree_id.IsValidIdentifier(fIDAllowCompound)) {
650                                   tree_id = nexus_tree_id.ConvertToAnIdent();
651                                 }
652 
653                                 tree_id = treeIdents.GenerateUniqueNameForList(GenerateUniqueObjectIDByType (nexus_tree_id, HY_BL_TREE) ,false);
654 
655                                 if (tree_id != nexus_tree_id) {
656                                   ReportWarning(nexus_tree_id.Enquote('\'') & " has been renamed to " & tree_id.Enquote('\'') & " to avoid naming conflicts and/or comply with HyPhy ID requirements");
657                                 }
658 
659 
660                                 treeIdents && & tree_id;
661                                 //  now get the rest of the tree string
662                                 pos2 = buffer.FirstNonSpaceIndex(pos2,pos+1, kStringDirectionBackward);
663                                 buffer.Trim (pos2,kStringEnd);
664                                 treeStrings && & buffer;
665                             }
666                         }
667                         if (j == kNotFound) {
668                              ReportWarning (_String("Could not find a tree identifier in:") & buffer.Enquote());
669                         }
670                     } else {
671                         ReportWarning (buffer.Enquote () &" is not of the form TreeID = TreeString");
672                     }
673                 }
674 
675             }
676         } else {
677 
678            long offSet = 0L;
679 
680            ReportWarning (CurrentLine.Cut (0, CurrentLine.FirstSpaceIndex(1,kStringEnd)) & " is not used by HYPHY in TREES block");
681            while (!done) {
682                 _StringBuffer buffer (128UL);
683                 done = ReadNextNexusStatement (fState, f, CurrentLine, offSet, buffer, false, false,true,false,false);
684            }
685            done = false;
686         }
687 
688         if (!done) {
689             //ReadNextLine(f,&CurrentLine,&fState,false);
690             pos = 0;
691         }
692     }
693 
694     // now we shall check the string and match up node names with those present in the file
695 
696     for (long id = 0L; id<treeStrings.lLength; id++) {
697         _String const * file_tree_string = (_String const *) treeStrings (id);
698         long    treeLevel = 0L,
699                 lastNode,
700                 i = 0L;
701 
702         _StringBuffer revisedTreeString (128L);
703 
704       // TODO SLKP 20170621: looks like this is a generic Newick parser; why duplicate it here?
705         for (i=0; i<file_tree_string->length(); ++i) {
706             char    cc = file_tree_string->char_at (i);
707 
708             switch (cc) {
709                 case '(': { // creating a new internal node one level down
710                   treeLevel++;
711                   revisedTreeString<<'(';
712                   break;
713                 }
714 
715                 case ',':
716                 case ')': { // creating a new node on the same level and finishes updating the list of parameters
717                   if (cc==')') { // also create a new node on the same level
718                     treeLevel--;
719                   }
720                   revisedTreeString<<cc;
721                   break;
722                 }
723 
724                 case ':' : { // tree branch definition
725                   lastNode = i+1;
726                   revisedTreeString<<':';
727                   char c = file_tree_string->char_at (lastNode);
728 
729                   while (isdigit (c) || c=='.' || c=='-' || c=='e' || c=='E') {
730                     if (lastNode<file_tree_string->length()) {
731                       lastNode++;
732                       revisedTreeString<<c;
733                       c = file_tree_string->char_at (lastNode);
734                     } else {
735                       break;
736                     }
737                   }
738                   i = lastNode-1;
739                   break;
740                 }
741 
742                 default: { // node name
743                   lastNode = i;
744                   char c = file_tree_string->char_at (lastNode);
745                   if (isspace (c)) {
746                     break;
747                   }
748                   if (!(isalnum(c)||(c=='_'))) {
749                     ReportWarning (_String("Node names should begin with a letter, a number, or an underscore: ") & error_conext (*file_tree_string, i) );
750                     i = file_tree_string->length() +2;
751                     break;
752                   }
753                   while (isalnum(c)||(c=='_')) {
754                     if (lastNode<file_tree_string->length()) {
755                       lastNode++;
756                       c = file_tree_string->char_at (lastNode);
757                     } else {
758                       break;
759                     }
760                   }
761                   _String node_name (*file_tree_string, i, lastNode-1);
762                   i = lastNode-1;
763                   lastNode = translationsFrom.BinaryFindObject (&node_name);
764                   if (lastNode != kNotFound) {
765                     revisedTreeString<< (_String*)translationsTo.list_data[lastNode];
766                   } else {
767                     revisedTreeString<< node_name;
768                   }
769                   break;
770                 }
771           }
772         }
773         if (treeLevel) {
774             ReportWarning (_String("Unbalanced '(,)' in the tree string:") & revisedTreeString.Enquote());
775         } else if (i==file_tree_string->length()) {
776             *((_String*)treeStrings.list_data[id]) = revisedTreeString;
777         }
778     }
779 
780     if (treeSelected < treeStrings.lLength) {
781         hy_env :: EnvVariableSetNamespace(hy_env::data_file_tree, new HY_CONSTANT_TRUE,fState.theNamespace, false);
782         hy_env :: EnvVariableSetNamespace(hy_env::data_file_tree_string, new _FString(*(_String*)treeStrings.list_data[treeSelected], false),fState.theNamespace, false);
783      }
784 
785     if (treeStrings.lLength) {
786         _StringBuffer initTreeMatrix (1024UL);
787 
788         initTreeMatrix   << hy_env::nexus_file_tree_matrix
789                          << "={"
790                          << _String ((long)treeStrings.lLength)
791                          << ",2};\n";
792 
793 
794         for (long id = 0; id < treeStrings.lLength; id++) {
795             initTreeMatrix   << hy_env::nexus_file_tree_matrix
796                              << '['
797                              << _String (id)
798                              << "][0]=\""
799                              << (_String*)treeIdents(id)
800                              << "\";\n"
801                              << hy_env::nexus_file_tree_matrix
802                              << '['
803                              << _String (id)
804                              << "][1]=\""
805                              << (_String*)treeStrings(id)
806                              << "\";\n";
807         }
808 
809         _ExecutionList el (initTreeMatrix);
810         el.Execute();
811         terminate_execution = false;
812     }
813     SkipUntilNexusBlockEnd (fState, f,CurrentLine, pos);
814 }
815 
816 //_________________________________________________________
817 
ProcessNexusHYPHY(FileState & fState,long pos,hyFile * file,_StringBuffer & CurrentLine,_DataSet &)818 void    ProcessNexusHYPHY (FileState& fState, long pos, hyFile*file, _StringBuffer& CurrentLine, _DataSet&) {
819     static _String const endMark ("END;");
820     _StringBuffer bfBody  (128UL);
821 
822     long      p2 = pos;
823     pos = CurrentLine.FindAnyCase (endMark,pos+1,kStringEnd);
824 
825     fState.fileType = 0;
826 
827     if (pos != kNotFound) {
828         bfBody << CurrentLine.Cut (p2,pos-1);
829         CurrentLine.Trim(pos+endMark.length(),-1);
830     } else {
831         bfBody << CurrentLine.Cut (p2,-1);
832         while (pos == kNotFound) {
833             ReadNextLine(file,&CurrentLine,&fState,false,false);
834             if (CurrentLine.empty()) {
835                 break;
836             }
837 
838             pos = CurrentLine.FindAnyCase (endMark,0,kStringEnd);
839             if (pos != kNotFound) {
840                 if (pos > 0) {
841                     bfBody << CurrentLine.Cut (0,pos-1);
842                 }
843 
844                 CurrentLine.Trim (pos+endMark.length(), -1);
845                 if (CurrentLine.empty()) {
846                     ReadNextLine(file,&CurrentLine,&fState,false,false);
847                 }
848 
849                 break;
850             } else {
851                 bfBody << CurrentLine;
852             }
853 
854         }
855     }
856      nexusBFBody = bfBody;
857 
858     fState.fileType = 3;
859 
860     CurrentLine = CurrentLine.ChangeCase(kStringUpperCase);
861 
862 }
863 
864 //_________________________________________________________
865 
ProcessNexusData(FileState & fState,long pos,hyFile * f,_StringBuffer & CurrentLine,_DataSet & result)866 bool    ProcessNexusData (FileState& fState, long pos, hyFile*f, _StringBuffer& CurrentLine, _DataSet& result) {
867     static const _String key1 ("DIMENSIONS"), key11 ("NTAX"), key12 ("NCHAR"),
868             key2 ("FORMAT"),key21 ("DATATYPE"), key22 ("MISSING"), key23 ("GAP"), key24 ("SYMBOLS"),
869             key25 ("EQUATE"), key26 ("MATCHCHAR"), key27 ("NOLABELS"), key28 ("INTERLEAVE"), key3 ("MATRIX"), keyEnd ("END");
870 
871     _String newAlph;
872 
873     bool    done = false,
874             labels = true;
875 
876     char    charState = 0;
877 
878     _List   translations;
879     char    missing = '?', gap = '-' , repeat = '.', charSwitcher;
880 
881     long    offSet = 0L, count, spExp = result.GetNames().lLength, sitesExp = 0;
882 
883     while (!done) {
884         if (!FindNextNexusToken (fState, f, CurrentLine, pos)) {
885             break;
886         }
887 
888         if (CurrentLine.BeginsWith (keyEnd, false)) {
889             pos = -1;
890             break;
891         }
892 
893         if (CurrentLine.BeginsWith (key1, false)) {
894           // DIMENSIONS
895             offSet = key1.length ();
896             while (!done) {
897                 _StringBuffer buffer (128UL);
898                 done = ReadNextNexusStatement (fState, f, CurrentLine, offSet, buffer, true, true,true,false,false);
899 
900                 if (buffer.BeginsWith(key11, false)) {
901                     if (result.GetNames().lLength) {
902                         ReportWarning ("NTAX will override the definition of taxa names from the TAXA block");
903                     }
904                     if (!(count=ReadNextNexusEquate (fState,f,CurrentLine, 0 ,buffer))) {
905                         ReportWarning ("NTAX is not followed by '= number-of-taxa'");
906                         done = true;
907                     } else {
908                         done = done||(count>1);
909                         spExp = buffer.to_long();
910                         if(spExp<=0L) {
911                             ReportWarning ("NTAX must be a positive number");
912                             done = true;
913                             spExp = result.GetNames().lLength?result.GetNames().lLength:1;
914                         }
915                     }
916                 } else if (buffer.BeginsWith(key12, false)) {
917                     if (!(count=ReadNextNexusEquate (fState,f,CurrentLine, 0 ,buffer))) {
918                         ReportWarning ("NCHAR is not followed by '= number-of-charaters'");
919                         done = true;
920                     } else {
921                         done = done||(count>1);
922                         sitesExp = buffer.to_long();
923                     }
924                 }
925                 offSet = 0L;
926             }
927             done = false;
928         } else if (CurrentLine.BeginsWith (key2, false)) {
929             // FORMAT
930             offSet = key2.length();
931             while (!done) {
932                 charSwitcher = 0;
933                 _StringBuffer buffer (128UL);
934                 done = ReadNextNexusStatement (fState, f, CurrentLine, offSet, buffer, true, true,true,false,false);
935                 offSet = 0L;
936                 buffer.Trim (buffer.FirstNonSpaceIndex(),kStringEnd);
937                 if (buffer.BeginsWith(key21)) { // datatype
938                     if (!(count=ReadNextNexusEquate (fState,f,CurrentLine, 0 ,buffer))) {
939                         ReportWarning ("DATATYPE is not followed by '= DNA|RNA|NUCLEOTIDE|PROTEIN|BINARY'");
940                         done = true;
941                     } else {
942                         done = done||(count>1);
943                         if ( buffer == _String("DNA") || buffer == _String("RNA") || buffer ==  _String("NUCLEOTIDE" )) {
944                             if (newAlph.nonempty()) {
945                                ReportWarning (_String("DNA|RNA|NUCLEOTIDE datatype directive will over-ride the custom symbols definition: ") & newAlph.Enquote());
946                               newAlph.Clear();
947                             }
948                             if (done) {
949                                 done = false;
950                                 break;
951                             }
952                             continue;
953                         } else if (buffer==_String("PROTEIN") || buffer == _String ("BINARY")) {
954                             charState = 1+(buffer==_String("BINARY"));
955                             if (newAlph.nonempty()) {
956                                  newAlph = kEmptyString;
957                                  ReportWarning (_String("PROTEIN|BINARY datatype directive will override the custom symbols definition: ") & newAlph.Enquote());
958                                 newAlph.Clear();
959                             }
960                             if (done) {
961                                 done = false;
962                                 break;
963                             }
964                             continue;
965                         } else {
966                             ReportWarning (buffer.Enquote() &" is not a recognized data type (DNA|RNA|NUCLEOTIDE|PROTEIN|BINARY are allowed).");
967                             done = false;
968                         }
969                     }
970                 } else if (buffer.BeginsWith (key22, false)) { // MISSING
971                     charSwitcher = 1;
972                 } else if (buffer.BeginsWith (key23, false)) { // GAP
973                     charSwitcher = 2;
974                 } else if (buffer.BeginsWith (key26, false)) { // MATCHCHAR
975                     charSwitcher = 3;
976                 } else if (buffer.BeginsWith (key27, false)) { // NOLABELS
977                     labels = false;
978                 } else if (buffer.BeginsWith (key28, false)) { // INTERLEAVE
979                     fState.interleaved = true;
980                 } else if (buffer.BeginsWith(key24, false)) { // SYMBOLS
981                     count=ReadNextNexusEquate (fState,f,CurrentLine, 0 ,buffer, true,false);
982                     if (buffer.empty()) {
983                         ReportWarning (buffer.Enquote() & _String("is not of the form SYMBOLS = \"sym1 sym2 ...\". The entire block is ignored."));
984                         done = true;
985                         break;
986                     }
987                     _StringBuffer tempNewAlpha (128UL);
988                     for (long pos1 = 0; pos1<buffer.length (); pos1++) {
989                         charSwitcher = buffer.char_at (pos1);
990                         if (!isspace(charSwitcher)) {
991                             tempNewAlpha<<charSwitcher;
992                         }
993 
994                     }
995                     if (done) {
996                         break;
997                     }
998                     newAlph = tempNewAlpha;
999                     charSwitcher = 0;
1000                     done = done||(count>1);
1001                 } else if (buffer.BeginsWith(key25, false)) { // EQUATE
1002                     buffer.Trim(key25.length(),kStringEnd);
1003                     if (!(count=ReadNextNexusEquate (fState,f,CurrentLine, 0,buffer,true,false))) {
1004                         ReportWarning (buffer.Enquote ()&" is not followed by '=char'");
1005                         done = true;
1006                     }
1007                     done = done||(count>1);
1008                     // blank now contains a full list of the form token=(token)
1009                     _String symbol, meaning;
1010                     bool    symbolDefined = false, meaningDefined = false;
1011                     for (count=0; count<buffer.length(); count++) {
1012                         charSwitcher = buffer.char_at (count);
1013                         if (isspace(charSwitcher)) {
1014                             continue;
1015                         } else if (charSwitcher == '=') {
1016                             if (symbolDefined&&!meaningDefined) {
1017                                 meaningDefined = true;
1018                             }
1019                         } else
1020                             if (!symbolDefined) {
1021                                 symbolDefined = true;
1022                                 symbol = charSwitcher;
1023                                 continue;
1024                             }
1025                             if (!meaningDefined) {
1026                                 ReportWarning("EQUATE can only be used to define single-character tokens. Ignoring the EQUATE command.");
1027                                 translations.Clear();
1028                                 break;
1029                             }
1030                             meaning = meaning & charSwitcher;
1031                      }
1032                     if (symbol.length () && meaning.length () ) {
1033                         translations < new _String (symbol);
1034                         translations < new _String (meaning);
1035                     }
1036                     charSwitcher = 0;
1037                   buffer.Clear();
1038                 }
1039 
1040                 offSet = 0;
1041 
1042                 _String built_in;
1043 
1044                 if (charSwitcher) {
1045                     switch (charSwitcher) {
1046                       case 1:
1047                           built_in = "MISSING";
1048                           break;
1049                       case 2:
1050                           built_in = "GAP";
1051                           break;
1052                       case 3:
1053                           built_in = "MATCHCHAR";
1054                           break;
1055                     }
1056                     if (!(count=ReadNextNexusEquate (fState,f,CurrentLine, 0 ,buffer, true))) {
1057                         ReportWarning (buffer.Enquote() & " is not followed by '=char'");
1058                         done = true;
1059                     } else {
1060                         done = done||(count>1);
1061                         if (buffer.length () !=1) {
1062                             ReportWarning (buffer.Enquote() &" is not a valid " & built_in &" character.");
1063                         }
1064                     }
1065                     switch (charSwitcher) {
1066                       case 1:
1067                           missing = buffer.char_at (0);
1068                           if (gap == missing) {
1069                               gap = 0;
1070                           }
1071                           if (repeat == missing) {
1072                               repeat = 0;
1073                           }
1074 
1075                           break;
1076                       case 2:
1077                           gap = buffer.char_at (0);
1078                           if (missing == gap) {
1079                               missing = 0;
1080                           }
1081                           if (repeat == gap) {
1082                               repeat= 0;
1083                           }
1084 
1085                           break;
1086                       case 3:
1087                           repeat = buffer.char_at(0);
1088                           if (missing == repeat) {
1089                               missing = 0;
1090                           }
1091                           if (repeat == gap) {
1092                               gap = 0;
1093                           }
1094 
1095                           break;
1096                       }
1097                 }
1098 
1099                 if (done) {
1100                     done = false;
1101                     break;
1102                 }
1103                 done = false;
1104             }
1105         } else if (CurrentLine.BeginsWith (key3, false)) { // matrix instruction
1106             // if needed, set up a new symbol set
1107             offSet = key3.length();
1108             if (newAlph.length()>1) { // a valid new alphabet set
1109                 checkTTStatus (&fState);
1110                 fState.translationTable->AddBaseSet (newAlph);
1111             } else {
1112                 if (charState) {
1113                     checkTTStatus (&fState);
1114                     if (charState==1) {
1115                         newAlph = _TranslationTable::GetDefaultTable(HY_TRANSLATION_TABLE_PROTEIN);
1116                         fState.translationTable->baseLength = 20;
1117                     } else {
1118                         newAlph = _TranslationTable::GetDefaultTable(HY_TRANSLATION_TABLE_BINARY);
1119                         fState.translationTable->baseLength = 2;
1120                     }
1121                 } else {
1122                     newAlph = _TranslationTable::GetDefaultTable(HY_TRANSLATION_TABLE_DNA);
1123                 }
1124             }
1125             // set up translations
1126             if (translations.lLength) {
1127                 checkTTStatus (&fState);
1128             }
1129 
1130             for (long k = 0; k<translations.lLength; k+=2) {
1131                 char c = ((_String*)translations(k))->char_at (0);
1132                 fState.translationTable->AddTokenCode (c,*((_String*)translations(k+1)));
1133             }
1134 
1135             if (fState.translationTable->GetSkipChar()!=missing) {
1136                 checkTTStatus (&fState);
1137                 fState.translationTable->AddTokenCode (missing,newAlph);
1138             }
1139 
1140             if (fState.translationTable->GetGapChar()!=gap) {
1141                 checkTTStatus (&fState);
1142                 newAlph = "";
1143                 fState.translationTable->AddTokenCode (gap,newAlph);
1144             }
1145 
1146             if (repeat == missing) {
1147                 repeat = 0;
1148             }
1149 
1150             fState.repeat               = repeat;
1151             fState.skip                 = missing;
1152 
1153             //fState.totalSitesExpected   = sitesExp;
1154 
1155             // now proceed to read the data
1156 
1157             long loopIterations = 0;
1158             if (labels == true) {
1159                 result.ClearNames();
1160             }
1161 
1162 
1163             while (1) {
1164 
1165               _StringBuffer buffer   (128L),
1166                             buffer_2 (128L),
1167                             * source;
1168 
1169                 done = ReadNextNexusStatement (fState, f, CurrentLine, offSet?offSet+1:0, buffer, true, true,true,!labels,false);
1170                 offSet = 0;
1171                 // in each line that should produce first the name of the taxon
1172                 // and then the data string for the taxon
1173 
1174                 if (labels) {
1175                     if (result.GetNames().lLength<spExp) {
1176                         if (spExp>0 && buffer.empty ()) {
1177                             ReportWarning (_String("Could not find NTAX taxon names in the matrix. Read: ")&_String((long)result.GetNames().lLength) & " sequences.");
1178                             break;
1179                         }
1180 
1181                         if (!(sitesExp&&fState.curSite&&(fState.curSite<sitesExp)&&(!fState.interleaved))) {
1182                             result.AddName(buffer);
1183                             fState.totalSpeciesExpected++;
1184                         }
1185                     } else {
1186                         if (done) {
1187                             break;
1188                         }
1189                     }
1190 
1191                     if (!(sitesExp&&fState.curSite&&(fState.curSite<sitesExp)&&(!fState.interleaved))) {
1192                         done = ReadNextNexusStatement (fState, f, CurrentLine, offSet, buffer_2, true, true,true,true,false);
1193                         source = &buffer_2;
1194                     } else {
1195                         source = &buffer;
1196                     }
1197                 } else {
1198                     if (loopIterations<spExp) {
1199                         if (!(sitesExp&&fState.curSite&&(fState.curSite<sitesExp)&&(!fState.interleaved))) {
1200                             fState.totalSpeciesExpected++;
1201                         } else {
1202                             loopIterations --;
1203                         }
1204                     }
1205                     source = &buffer;
1206                 }
1207 
1208                 if (source->empty()) {
1209                     ReportWarning (_String("Could not find NTAX data strings in the matrix. Read: ")&_String((long)result.GetNames().lLength) & " sequences.");
1210                     break;
1211                 }
1212                 loopIterations++;
1213                 ISelector (fState, *source, result);
1214 
1215                 if (done)
1216                     if (loopIterations>=fState.totalSpeciesExpected) {
1217                         break;    // finished reading
1218                     }
1219 
1220                 if  ((f&&f->feof())||(fState.theSource&&(fState.theSource->length()<=fState.pInSrc))) {
1221                     break;
1222                 }
1223             }
1224 
1225 
1226             if (result.GetNames().lLength!=spExp) {
1227                 ReportWarning(_String ("Expected ")&spExp&" taxa, but found "&(long)result.GetNames().lLength);
1228             }
1229             if (result.lLength!=sitesExp && result.InternalStorageMode() == 0) {
1230                 ReportWarning(_String ("Expected ")&sitesExp&" sites, but found "&(long)result.lLength);
1231             }
1232             if (spExp && loopIterations%spExp) {
1233                 ReportWarning(_String ("There is an inconsistency between NTAX and the number of data strings in the matrix"));
1234             }
1235             done = true;
1236         } else {
1237             ReportWarning (CurrentLine.Cut (0, CurrentLine.FirstSpaceIndex(1,kStringEnd)) & " is not used by HYPHY");
1238             while (!done) {
1239                 _StringBuffer buffer (128L);
1240                 done = ReadNextNexusStatement (fState, f, CurrentLine, offSet, buffer, true, false,true,false,false);
1241             }
1242             done = false;
1243         }
1244         if (!done) {
1245             if (CurrentLine.empty ()) {
1246                 ReadNextLine(f,&CurrentLine,&fState,false);
1247             }
1248             pos = 0;
1249             if (CurrentLine.empty () ) {
1250                 done = true;
1251             }
1252         }
1253     }
1254 
1255     SkipUntilNexusBlockEnd (fState, f,CurrentLine, pos);
1256     return true;
1257 }
1258 
1259 //_________________________________________________________
1260 
ReadNexusFile(FileState & fState,hyFile * file,_DataSet & result)1261 void    ReadNexusFile (FileState& fState, hyFile*file, _DataSet& result) {
1262     bool   dataRead = false, lookForEnd = false;
1263     long   f,g, file_line = fState.currentFileLine;
1264 
1265     fState.fileType = 3; // NEXUS
1266     static const _String beginMark ("BEGIN"), endMark ("END"), data ("DATA"), chars ("CHARACTERS"),
1267             taxa ("TAXA"), trees ("TREES"), assumptions ("ASSUMPTIONS"), hyphy ("HYPHY"), sets ("SETS");
1268 
1269     _StringBuffer CurrentLine, blockName;
1270 
1271     ReadNextLine(file,&CurrentLine,&fState,false);
1272     while (CurrentLine.nonempty()) {
1273         f = 0;
1274         /** TODO SLKP 20180921 : if any of the commands loads a new CurrentLine, the marker 'f' needs to be reset
1275             but currently we have no way of knowing whether or not a new line was loaded.
1276             For the time-being fixing by adding a line # tracker for fState
1277          */
1278         while ((f = CurrentLine.FindAnyCase(beginMark,file_line == fState.currentFileLine ? f : 0L,kStringEnd ))>=0) {
1279            file_line = fState.currentFileLine;
1280 
1281             f = CurrentLine.FirstNonSpaceIndex (f+beginMark.length(),kStringEnd,kStringDirectionForward);
1282             if (f!=-1) { // process
1283                 g = CurrentLine.Find (';', f, -1);
1284                 if (g!=kNotFound) {
1285                     blockName = CurrentLine.Cut (f,g-1);
1286                     // dispatch to block readers
1287                     if (blockName.EqualIgnoringCase(data)) {
1288                         ReportWarning (blockName.Enquote() & " block is now deprecated in NEXUS and should not be used.");
1289 
1290                         if (!dataRead) {
1291                             dataRead = ProcessNexusData (fState, g+1, file, CurrentLine, result);
1292                         }
1293                         //SkipUntilNexusBlockEnd (fState,file,CurrentLine,f);
1294 
1295                         else {
1296                             ReportWarning ("Only one data set per NEXUS file is read by ReadDataSet - the 1st valid one.");
1297                         }
1298                     } else if (blockName.EqualIgnoringCase(taxa)) {
1299                         if (!dataRead) {
1300                             ProcessNexusTaxa (fState, g+1, file, CurrentLine, result);
1301                         } else {
1302                             ReportWarning ("The TAXA block was encountered after CHARACTER had been read and will be ignored.");
1303                         }
1304                     } else if (blockName.EqualIgnoringCase(trees)) {
1305                         ProcessNexusTrees (fState, g+1, file, CurrentLine, result);
1306                     } else if (blockName.EqualIgnoringCase(chars)) {
1307                         if (!dataRead) {
1308                             dataRead = ProcessNexusData (fState, g+1, file, CurrentLine, result);
1309                         } else {
1310                             ReportWarning ("Only one data set per NEXUS file is read by ReadDataSet - the 1st valid one.");
1311                         }
1312                     } else if (blockName.EqualIgnoringCase(assumptions)||blockName.EqualIgnoringCase(sets)) {
1313                         ProcessNexusAssumptions (fState, g+1, file, CurrentLine, result);
1314                     } else if (blockName.EqualIgnoringCase(hyphy)) {
1315                         ProcessNexusHYPHY (fState, g+1, file, CurrentLine, result);
1316                     } else {
1317                         ReportWarning (_String("NEXUS blocks ")&blockName.Enquote()&(" are not used by HYPHY."));
1318                         lookForEnd = true;
1319                         break;
1320                         // now look for the end of this block
1321                     }
1322 
1323                 } else {
1324                     break;
1325                 }
1326             } else {
1327                 ReportWarning (_String ("NEXUS BEGIN must be followed by the name of the block. Skipping until next BEGIN statement."));
1328                 break;
1329             }
1330         }
1331 
1332         if (lookForEnd) {
1333             lookForEnd = false;
1334             SkipUntilNexusBlockEnd (fState,file,CurrentLine,f);
1335         } else {
1336             ReadNextLine(file,&CurrentLine,&fState,false);
1337         }
1338 
1339     }
1340 
1341 }
1342