1 /*
2 
3  HyPhy - Hypothesis Testing Using Phylogenies.
4 
5  Copyright (C) 1997-now
6  Core Developers:
7  Sergei L Kosakovsky Pond (sergeilkp@icloud.com)
8  Art FY Poon    (apoon42@uwo.ca)
9  Steven Weaver (sweaver@temple.edu)
10 
11  Module Developers:
12  Lance Hepler (nlhepler@gmail.com)
13  Martin Smith (martin.audacis@gmail.com)
14 
15  Significant contributions from:
16  Spencer V Muse (muse@stat.ncsu.edu)
17  Simon DW Frost (sdf22@cam.ac.uk)
18 
19  Permission is hereby granted, free of charge, to any person obtaining a
20  copy of this software and associated documentation files (the
21  "Software"), to deal in the Software without restriction, including
22  without limitation the rights to use, copy, modify, merge, publish,
23  distribute, sublicense, and/or sell copies of the Software, and to
24  permit persons to whom the Software is furnished to do so, subject to
25  the following conditions:
26 
27  The above copyright notice and this permission notice shall be included
28  in all copies or substantial portions of the Software.
29 
30  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
31  OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
32  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
33  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
34  CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
35  TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
36  SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
37 
38  */
39 
40 #include "function_templates.h"
41 
42 #include "topology.h"
43 #include "tree.h"
44 #include "vector.h"
45 
46 #include "global_things.h"
47 #include "hbl_env.h"
48 
49 #include <ctype.h>
50 
51 using namespace hy_global;
52 using namespace hy_env;
53 
54 const _String _TreeTopology::kCompareEqualWithReroot          = "Equal with reroot at ",
55               _TreeTopology::kCompareEqualWithoutReroot       = "Equal without rerooting",
56               _TreeTopology::kCompareUnequalToplogies         = "Unequal topologies (matching label sets).",
57               _TreeTopology::kCompareUnequalLabelSets         = "Unequal label sets.",
58               _TreeTopology::kMeta                            =  "__meta";
59 
60 //_______________________________________________________________________________________________
61 
_TreeTopology()62 _TreeTopology::_TreeTopology () {
63     rooted = UNROOTED;
64 }
65 
66 //_______________________________________________________________________________________________
_TreeTopology(_TheTree const * top)67 _TreeTopology::_TreeTopology (_TheTree const *top):_CalcNode (*top->GetName(), kEmptyString) {
68     _TreeTopology::PreTreeConstructor   (false);
69     if (top->theRoot) {
70         isDefiningATree         = kTreeIsBeingParsed;
71         theRoot                 = top->theRoot->duplicate_tree ();
72 
73         _TreeTopologyParseSettings parse_settings = CollectParseSettings ();
74 
75         ConditionalTraverser (
76               [&] (node<long>* iterator, node_iterator<long> const& ni) -> bool {
77                   _String              nodeVS   (top->GetBranchValue (iterator)),
78                                        *nodeSpec = map_node_to_calcnode(iterator)->GetBranchSpec();
79 
80                   FinalizeNode (iterator, 0, top->GetNodeName    (iterator), *nodeSpec, nodeVS, parse_settings);
81                   DeleteObject (nodeSpec);
82                   return false;
83               },
84         false);
85         isDefiningATree         = kTreeNotBeingDefined;
86     } else {
87         HandleApplicationError ("Can't create an empty tree");
88         return;
89     }
90 }
91 
92 //_______________________________________________________________________________________________
_TreeTopology(_TreeTopology const & top)93 _TreeTopology::_TreeTopology (_TreeTopology const &top) {
94     //PreTreeConstructor   (false);
95     *this = top;
96 }
97 
98 //_______________________________________________________________________________________________
operator =(_TreeTopology const & top)99 _TreeTopology const &  _TreeTopology::operator = (_TreeTopology const &top) {
100     if (top.theRoot) {
101         theRoot                 = top.theRoot->duplicate_tree ();
102         flatTree.Duplicate(&top.flatTree),
103         flatCLeaves.Duplicate(&top.flatCLeaves);
104         rooted = top.rooted;
105     } else {
106         HandleApplicationError ("Can't create an empty tree");
107     }
108     return *this;
109 }
110 
111 
112 //_______________________________________________________________________________________________
_TreeTopology(_String const & name,_String const & parms,bool dupMe,_AssociativeList * mapping)113 _TreeTopology::_TreeTopology    (_String const & name, _String const & parms, bool dupMe, _AssociativeList* mapping):_CalcNode (name,kEmptyString)
114 // builds a tree from a string
115 {
116     _TreeTopology::PreTreeConstructor   (dupMe);
117     _TreeTopologyParseSettings parse_settings = CollectParseSettings();
118 
119     if (_AssociativeList * meta = MainTreeConstructor  (parms, parse_settings, false, mapping)) {
120         _TreeTopology::PostTreeConstructor  (dupMe, meta);
121     } else {
122         DeleteObject     (compExp);
123         compExp = nil;
124     }
125 }
126 
127 //_______________________________________________________________________________________________
_TreeTopology(_String const * name)128 _TreeTopology::_TreeTopology    (_String const * name):_CalcNode (*name,kEmptyString) {}
129 
130 //_______________________________________________________________________________________________
131 
~_TreeTopology(void)132 _TreeTopology::~_TreeTopology (void) {
133     if (theRoot) {
134         theRoot->delete_tree();
135         delete (theRoot);
136         theRoot = nil;
137     }
138     if (compExp) {
139         DeleteAndZeroObject(compExp);
140     }
141 }
142 
143 //_______________________________________________________________________________________________
144 
PreTreeConstructor(bool)145 void    _TreeTopology::PreTreeConstructor (bool) {
146     rooted                  = UNROOTED;
147     compExp                 = new _Vector;
148 }
149 
150 //_______________________________________________________________________________________________
151 
PostTreeConstructor(bool make_copy,_AssociativeList * meta)152 void    _TreeTopology::PostTreeConstructor (bool make_copy, _AssociativeList * meta) {
153 
154     auto variable_handler = [&] (void) -> void {
155         /** TODO SLKP 20171211, make sure the semantics are unchanged */
156         variablePtrs.Replace (get_index(), make_copy ? this->makeDynamic() : this, false);
157         setParameter (WrapInNamespace (_TreeTopology::kMeta, GetName()), meta ? meta : new _MathObject, nil, false);
158 
159 
160         /*
161          BaseRef temp =  variablePtrs(theIndex);
162          variablePtrs[theIndex]=make_copy ? this->makeDynamic() : this;
163          DeleteObject(temp);
164          */
165     };
166 
167     bool accept_rooted = EnvVariableTrue(accept_rooted_trees);
168 
169     if (theRoot->get_num_nodes() <= 2) { // rooted tree - check
170         if (accept_rooted == false) {
171 
172             long node_index = theRoot->get_data();
173 
174             bool recurse = false;
175 
176             if (theRoot->get_num_nodes() == 2) {
177                 for (int i = 1; i<=2; i++) {
178                     node<long> *node_temp = theRoot->go_down(i);
179                     if (node_temp->get_num_nodes()) { // an internal node - make it a root
180                         node_temp->detach_parent();
181                         node_temp->add_node(*theRoot->go_down(3-i));
182                         delete theRoot;
183                         theRoot = node_temp;
184                         rooted = i == 1 ? ROOTED_LEFT : ROOTED_RIGHT;
185                         ReportWarning (_String("Rooted topology. Removing one branch - the ") & (i==1 ? "left" : "right") & " root child has been promoted to be the new root");
186                         break;
187                     }
188                 }
189 
190                 if (rooted==UNROOTED) {
191                     ReportWarning ("One branch topology supplied - hopefully this IS what you meant to do.");
192                     node<long> *node_temp = theRoot->go_down(1);
193                     node_temp->detach_parent();
194                     node_temp->add_node(*theRoot->go_down(2));
195                     delete theRoot;
196                     theRoot = node_temp;
197                     rooted = ROOTED_LEFT;
198                 }
199             } else {
200                 if (theRoot->get_num_nodes() == 0) {
201                     ReportWarning ("An empty topology has been constructed");
202                     variable_handler ();
203                     return;
204                 }
205                 node<long> *node_temp = theRoot->go_down(1);
206                 node_temp->detach_parent();
207                 delete theRoot;
208                 theRoot = node_temp;
209                 ReportWarning ("The root has a single child, which is be promoted to the root");
210                 recurse = false;
211             }
212 
213             flatTree.Delete (node_index);
214             flatCLeaves.Delete (node_index);
215             ((_Vector*)compExp)->Delete (node_index);
216 
217             node_iterator<long>  tree_iterator (theRoot, _HY_TREE_TRAVERSAL_POSTORDER);
218             while (node<long>*topTraverser = tree_iterator.Next()) {
219                 if (topTraverser->get_data () > node_index) {
220                     topTraverser->init (topTraverser->get_data () - 1);
221                 }
222             }
223 
224             if (recurse) {
225               PostTreeConstructor (make_copy, meta);
226               return;
227             }
228         }
229     }
230     variable_handler ();
231 
232 }
233 
234 //_______________________________________________________________________________________________
235 
236 struct _any_char_in_set {
237 
238     bool valid [256];
_any_char_in_set_any_char_in_set239     _any_char_in_set (char * accept) {
240         InitializeArray(valid, 256, false);
241         long c = strlen (accept);
242         for (unsigned long i = 0UL; i < c; i++) {
243             valid [accept[i]] = true;
244         }
245     }
246 
247 
invert_any_char_in_set248     void invert () {
249         ArrayForEach(valid, 256, [] (bool v, unsigned long) -> bool {return !v;});
250     }
251 
operator ==_any_char_in_set252     bool operator == (char c) {
253         return valid [(unsigned char)c];
254     }
255 
256 };
257 
258 //_______________________________________________________________________________________________
259 
CollectParseSettings(void)260 const _TreeTopologyParseSettings    _TreeTopology::CollectParseSettings (void) {
261     _String const kInternalNodePrefix("INTERNAL_NODE_PREFIX"),
262                   kIgnoreUserINames  ("IGNORE_INTERNAL_NODE_LABELS");
263 
264     _TreeTopologyParseSettings parse_settings;
265 
266     parse_settings.auto_convert_lengths = EnvVariableTrue(automatically_convert_branch_lengths);
267     parse_settings.accept_user_lengths  = EnvVariableTrue(accept_branch_lengths);
268     parse_settings.ingore_user_inode_names  = EnvVariableTrue(kIgnoreUserINames);
269 
270     HBLObjectRef user_node_name            = EnvVariableGet(kInternalNodePrefix, STRING);
271     if (user_node_name) {
272         parse_settings.inode_prefix = ((_FString*)user_node_name)->get_str();
273     }
274 
275     return parse_settings;
276 
277 }
278 
279 //_______________________________________________________________________________________________
MainTreeConstructor(_String const & parms,_TreeTopologyParseSettings & parse_settings,bool checkNames,_AssociativeList * mapping)280 _AssociativeList*    _TreeTopology::MainTreeConstructor  (_String const& parms, _TreeTopologyParseSettings & parse_settings, bool checkNames, _AssociativeList* mapping) {
281 
282     /** TODO SLKP 20171211 this parser needs to be checked
283      It admits invalid strings like
284      ( a (1,2) , d (4,5),   6)
285      strings like ( (,), (,))
286      are parsed incorrectly
287      */
288 
289 
290     long i,
291     nodeCount=0,
292     lastNode;
293 
294     static _String       kBootstrap ("bootstrap"),
295                          kComment   ("comment");
296 
297     static long          kBLLookahead = 128;
298 
299     _SimpleList nodeStack,
300     nodeNumbers;
301 
302     _String     nodeParameters,
303                 nodeValue,
304                 nodeComment,
305                 nodeBootstrap;
306 
307     _StringBuffer      nodeName;
308     _AssociativeList * node_comments = new _AssociativeList;
309 
310     _List              local_var_manager;
311     local_var_manager < node_comments;
312 
313     _any_char_in_set non_space        (" \t\r\n"),
314                      newick_delimiter (" \t\r\n(),{}[];:");
315     non_space.invert();
316     newick_delimiter.valid [0] = true;
317 
318 
319     char        lastChar    = '\0';
320     bool        isInLiteral = false;
321 
322     node<long>* currentNode = theRoot = nil,
323     * newNode     = nil,
324     * parentNode  = nil;
325 
326     isDefiningATree         = kTreeIsBeingParsed;
327 
328 
329     try {
330         auto       push_new_node = [&] (bool make_new) -> void {
331             currentNode = newNode;
332             nodeStack << (long) currentNode;
333             nodeNumbers << nodeCount;
334             nodeCount ++;
335             if (make_new) {
336                 newNode = new node <long>;
337             }
338         };
339 
340         auto pop_node = [&] () -> void {
341             nodeStack.Pop();
342             nodeNumbers.Pop();
343             //nodeName = kEmptyString;
344             nodeName.Reset();
345         };
346 
347         for (i=0; i<parms.length(); i++) {
348 
349             char   look_at_me = parms.char_at(i);
350 
351             if (isspace (look_at_me)) {
352                 continue;
353             }
354 
355             switch (look_at_me) {
356                 case '(': { // creating a new internal node one level down
357                             // a new node
358 
359                     newNode = new node<long>;
360 
361                     if (lastChar == '(' || lastChar == ',') {
362                         if (!currentNode) {
363                             delete newNode;
364                             throw _String ("Unexpected '") & lastChar & "'";
365                         }
366                         currentNode->add_node (*newNode);
367                     } else {
368                         if (theRoot) {
369                             parentNode = currentNode->get_parent();
370                             if (!parentNode) {
371                                 throw _String ("Unexpected '") & lastChar & "'";
372                             } else {
373                                 parentNode->add_node (*newNode);
374                             }
375                         } else {
376                             theRoot = newNode;
377                         }
378                         push_new_node (true);
379                         currentNode->add_node (*newNode);
380                     }
381                     push_new_node (false);
382                     break;
383                 }
384 
385                 case ',':
386                 case ')': { // creating a new node on the same level and finishes updating the list of parameters
387                     if (nodeStack.empty()) {
388                         throw _String ("Unexpected '") & look_at_me & "'";
389                     }
390                     lastNode = nodeStack.countitems ()-1;
391                     parentNode = (node<long>*)nodeStack.get (lastNode);
392                     if (mapping) {
393                         _FString * mapped_name = (_FString*)mapping->GetByKey (nodeName, STRING);
394                         if (!mapped_name) {
395                             mapped_name = (_FString*)mapping->GetByKey (nodeName.ChangeCase(kStringUpperCase), STRING);
396                         }
397                         if (mapped_name) {
398                             //printf ("%s => %s\n", nodeName.get_str(), mapped_name->get_str().get_str());
399                             nodeName = _String (mapped_name->get_str());
400                         }
401                     }
402 
403                     // handle bootstrap support for this node
404 
405                     if (parentNode->get_num_nodes()) {
406                         // internal node, check to see if the node name is a bootstrap value
407                         _SimpleList numerical_match (nodeName.RegExpMatch(hy_float_regex, 0));
408                         if (numerical_match.nonempty() && numerical_match(0) == 0) {
409                             nodeBootstrap = nodeName.Cut (0, numerical_match(1));
410                             nodeName.Reset();
411                         }
412                     }
413 
414                     nodeName = FinalizeNode (parentNode, nodeNumbers.get (lastNode), nodeName, nodeParameters, nodeValue, parse_settings);
415 
416                     if (nodeComment.nonempty() || nodeBootstrap.nonempty()) {
417                         _AssociativeList * node_info = new _AssociativeList;
418                         if (nodeComment.nonempty()) {
419                             if (node_info->MStore(&kComment, new _FString (nodeComment), false)) {
420                                 kComment.AddAReference();
421                                 // SLKP 20190507: this is not thread safe
422                             }
423                         }
424                         if (nodeBootstrap.nonempty()) {
425                             if (node_info->MStore(&kBootstrap, new _Constant (nodeBootstrap.to_float()), false)) {
426                                 kBootstrap.AddAReference();
427                                 // SLKP 20190507: this is not thread safe
428                             }
429                         }
430                         _String * dict_key = new _String (nodeName);
431                         if (!node_comments->MStore (dict_key, node_info, false)) {
432                             delete dict_key;
433                         }
434                     }
435 
436                     nodeParameters.Clear();
437                     nodeComment.Clear();
438                     nodeBootstrap.Clear();
439                     pop_node ();
440 
441                     if (look_at_me == ',') { // also create a new node on the same level
442                         newNode = new node<long>;
443                         if (!(parentNode = parentNode->get_parent())) {
444                             throw _String ("Unexpected '") & look_at_me & "'";
445                         }
446                         push_new_node (false);
447                         parentNode->add_node(*currentNode);
448                     }
449                     break;
450                 }
451 
452                 case '{' : { // parameter list definition
453                     long   closing_brace = parms.FindTerminator(i+1, '}');
454 
455                     if (closing_brace<0) {
456                         throw _String ("'{' has no matching '}'");
457                     }
458                     nodeParameters = parms.Cut (i+1,closing_brace-1);
459                     i = closing_brace;
460                     break;
461                 }
462 
463                 case '[' : { // hackish Newick annotation
464                     long   closing_bracket = parms.FindTerminator(i+1, ']');
465                     if (closing_bracket<0) {
466                         throw _String ("'[' has no matching ']'");
467                     }
468                     nodeComment = parms.Cut (i+1,closing_bracket-1);
469                     i = closing_bracket;
470                     break;
471                 }
472 
473                 case ':' : { // tree branch definition
474 
475                     // TODO SLKP 20171211: confirm that this will handle expressions with spaces like '..):  0.1'
476                     /*_SimpleList numerical_match (parms.RegExpMatch(hy_float_regex, i+1));
477                     if (numerical_match.empty()) {
478                         throw _String("Failed to read a number for the branch length following ':'");
479                     }
480                     nodeValue = parms.Cut (numerical_match(0), numerical_match(1));
481                     i = numerical_match(1);*/
482 
483                     /** SLKP 20200813
484                         sscanf is going to call strlen every time, which on long strings could be QUITE expensive!
485                         to limit the cost of this, we are going to assume that the float, if it's there is present within kBLLookahead characters of the current position, copy out the next kBLLookahead chars, and read from there.
486                     */
487 
488                     int end_at;
489                     hyFloat length = 0.;
490 
491                     if (i + kBLLookahead < parms.length()) {
492                         char buffer [kBLLookahead+1];
493                         memcpy (buffer, parms.get_str() + i + 1, kBLLookahead);
494                         buffer [kBLLookahead] = 0;
495                         if (sscanf (buffer, "%lf%n", &length, &end_at) != 1) {
496                             throw _String("Failed to read a number for the branch length following ':'");
497                         }
498                     } else {
499                         if (sscanf (parms.get_str()+i+1, "%lf%n", &length, &end_at) != 1) {
500                             throw _String("Failed to read a number for the branch length following ':'");
501                         }
502                     }
503                     nodeValue = parms.Cut (i+1, i+end_at);
504                     i += end_at;
505                     break;
506                 }
507 
508                 case ';' : { // done with tree definition
509                     break;
510                 }
511 
512                 default: { // node name
513 
514                     if (nodeName.nonempty()) {
515                         throw _String ("Unexpected node name");
516                     }
517 
518                     long end_of_id = parms.ExtractEnclosedExpression(i, non_space, newick_delimiter, fExtractRespectQuote | fExtractRespectEscape | fExtractOneLevelOnly);
519                     if (end_of_id == kNotFound) {
520                         throw _String ("Unexpected end of tree string while searching for a node name");
521                     } else {
522                         nodeName = parms.Cut (i, end_of_id-1);
523                         nodeName.StripQuotes("'\"","'\"");
524                     }
525 
526                     i = end_of_id - 1;
527                     break;
528                 }
529             }
530 
531             lastChar = look_at_me;
532         }
533     } catch (const _String& error) {
534         isDefiningATree = kTreeNotBeingDefined;
535         HandleApplicationError (   error & ", in the following string context " &
536                                 parms.Cut(i>31L?i-32L:0L,i)&
537                                 " <ERROR HERE> "&
538                                 parms.Cut(i+1L,parms.length()-i>32L?i+32L:-1L)
539                                 );
540 
541         return nil;
542     }
543 
544     if (!theRoot) {
545         isDefiningATree = kTreeNotBeingDefined;
546         HandleApplicationError ("Can't create empty trees.");
547         return nil;
548     }
549 
550     if (nodeStack.countitems() > 1) {
551         HandleApplicationError ("Unbalanced () in tree string");
552     }
553     if (nodeStack.countitems() == 1) {
554         parentNode = (node<long>*)nodeStack(0);
555         if (mapping) {
556             _FString * mapped_name = (_FString*)mapping->GetByKey (nodeName, STRING);
557             if (mapped_name) {
558                 nodeName = _String(mapped_name->get_str());
559             }
560         }
561         FinalizeNode (parentNode, nodeNumbers.get (lastNode), nodeName, nodeParameters, nodeValue, parse_settings);
562     }
563 
564     isDefiningATree = kTreeNotBeingDefined;
565 
566     //printf ("%s\n", _String ((_String*)node_comments->toStr()).get_str());
567     node_comments->AddAReference();
568     return node_comments;
569 
570 }
571 
572 //_______________________________________________________________________________________________
573 
FinalizeNode(node<long> * nodie,long number,_String nodeName,_String const & nodeParameters,_String & nodeValue,_TreeTopologyParseSettings const & settings)574 _String    _TreeTopology::FinalizeNode (node<long>* nodie, long number , _String nodeName, _String const& nodeParameters, _String& nodeValue, _TreeTopologyParseSettings const& settings) {
575 
576     if (nodeName.empty() || (settings.ingore_user_inode_names && nodie->get_num_nodes()>0)) {
577         nodeName = settings.inode_prefix & number;
578     }
579 
580     _String node_parameters = nodeParameters;
581 
582     if (nodie==theRoot) {
583         node_parameters = kEmptyString;
584         nodeValue = kEmptyString;
585     }
586 
587     nodie->in_object = flatTree.countitems();
588     flatTree          && & nodeName;
589     flatCLeaves.AppendNewInstance (new _String (&node_parameters, false));
590 
591     ((_Vector*)compExp)->Store (ProcessTreeBranchLength(nodeValue));
592 
593     node_parameters.Clear();// = kEmptyString;
594     nodeValue.Clear(); //      = kEmptyString;
595 
596     return nodeName;
597 }
598 
599 //_______________________________________________________________________________________________
600 
FindNodeByName(_String const * match) const601 node<long>* _TreeTopology::FindNodeByName (_String const* match) const {
602     node_iterator<long> ni (theRoot, _HY_TREE_TRAVERSAL_POSTORDER);
603     while (node<long>* iterator = ni.Next()) {
604         if (*match == GetNodeName  (iterator)) {
605             return iterator;
606         }
607     }
608     return nil;
609 
610 }
611 
612 //_______________________________________________________________________________________________
613 
_RemoveNodeList(_SimpleList const & clean_indices)614 void    _TreeTopology::_RemoveNodeList (_SimpleList const& clean_indices) {
615     flatTree.DeleteList(clean_indices);
616     flatCLeaves.DeleteList(clean_indices);
617 
618 
619     node_iterator<long> ni (theRoot, _HY_TREE_TRAVERSAL_POSTORDER);
620 
621     while (node<long>* iterator = ni.Next()) {
622         if ((iterator->in_object = clean_indices.CorrectForExclusions(iterator->in_object, -1)) < 0L) {
623             throw _String ("Internal error: index remap failed");
624         }
625     }
626 }
627 
628 //_______________________________________________________________________________________________
629 
RemoveANode(HBLObjectRef nodeName)630 void    _TreeTopology::RemoveANode (HBLObjectRef nodeName) {
631 
632     // TODO SLKP 20171213; why did the original implementation delete the entire path
633     // up to the root?
634 
635     try {
636         _SimpleList clean_indices;
637 
638         auto RemoveNodeByName = [this, &clean_indices] (_FString* remove_me) -> void {
639 
640             node<long>* remove_this_node = FindNodeByName (&remove_me->get_str()),
641             * parent_of_removed_node;
642 
643             if (!remove_this_node || ( parent_of_removed_node = remove_this_node->get_parent()) == nil) {
644                 throw _String ("Node ") & remove_me->get_str().Enquote() & " not found in the tree or is the root node";
645             }
646 
647 
648             while (parent_of_removed_node) {
649                 clean_indices << remove_this_node->in_object;
650                 //  printf ("Removing %s\n", GetNodeName(remove_this_node).get_str());
651                 parent_of_removed_node->detach_child(remove_this_node->get_child_num());
652 
653 
654                 for (int orphans = 1; orphans <= remove_this_node->get_num_nodes(); orphans++) {
655                     parent_of_removed_node->add_node(*remove_this_node->go_down(orphans));
656                 }
657 
658                 delete remove_this_node;
659                 remove_this_node = parent_of_removed_node;
660                 parent_of_removed_node = parent_of_removed_node->get_parent();
661 
662                 // SLKP 20171213: only delete nodes up the chain if they have a single child
663                 if (remove_this_node->get_num_nodes() == 1) {
664                     if (parent_of_removed_node == nil) {
665                         /* we are promoting the single remaining child of the current root to be the root */
666                         theRoot = remove_this_node->go_down (1);
667                         theRoot->detach_parent();
668                         clean_indices << remove_this_node->in_object;
669                         delete remove_this_node;
670 
671 
672                         //parent_of_removed_node = remove_this_node->get_parent();
673                         break;
674                     }
675                 } else {
676                     break;
677                 }
678             }
679         };
680 
681         if (nodeName->ObjectClass () == STRING) {
682             RemoveNodeByName ((_FString*)nodeName);
683             clean_indices.Sort();
684             _RemoveNodeList (clean_indices);
685 
686         } else if (nodeName->ObjectClass () == MATRIX) {
687             _Matrix * names = (_Matrix* ) nodeName;
688             if (names->IsAStringMatrix()) {
689                 names->ForEach([RemoveNodeByName] (HBLObjectRef n, unsigned long, unsigned long) -> void {if (n) RemoveNodeByName ((_FString*)n);},
690                                [names] (unsigned long i) -> HBLObjectRef {return names->GetFormula(i, -1)->Compute();});
691             } else {
692                 throw _String ("Matrix-valued argument was expected to contain strings");
693 
694             }
695         } else {
696            throw _String ("An invalid argument (not a string or a string matrix) supplied");
697         }
698     } catch (const _String& err) {
699         HandleApplicationError (err & " in " & __PRETTY_FUNCTION__);
700     }
701 
702 }
703 
704 
705 //_______________________________________________________________________________________________
706 
AddANode(HBLObjectRef newNode)707 void    _TreeTopology::AddANode (HBLObjectRef newNode) {
708 
709     static const _String                kNewNodeGraftName  ("NAME"),
710     kNewNodeGraftWhere      ("WHERE"),
711     kNewNodeGraftParent   ("PARENT"),
712     kNewNodeGraftLength   ("LENGTH"),
713     kNewNodeGraftParentLength ("PARENT_LENGTH");
714 
715     try {
716 
717         if (newNode->ObjectClass () == ASSOCIATIVE_LIST) {
718 
719             _AssociativeList * newNodeSpec = (_AssociativeList*)newNode;
720             _FString         * newName     = (_FString*)newNodeSpec->GetByKey (kNewNodeGraftName, STRING),
721                              * newLocation = (_FString*)newNodeSpec->GetByKey (kNewNodeGraftWhere, STRING),
722                              * newParent   = (_FString*)newNodeSpec->GetByKey (kNewNodeGraftParent, STRING);
723 
724             _Constant        * branchLengthSelf = (_Constant*)newNodeSpec->GetByKey (kNewNodeGraftLength, NUMBER),
725                              * branchLengthParent = (_Constant*)newNodeSpec->GetByKey (kNewNodeGraftParentLength, NUMBER);
726 
727 
728             if (!newLocation) {
729                 throw _String("Missing/invalid mandatory argument ") & kNewNodeGraftWhere.Enquote();
730             }
731 
732 
733             if (! (newName || newParent)) {
734                 throw _String("Either ") & kNewNodeGraftName.Enquote() & " or " &  kNewNodeGraftParent.Enquote() & " (or both) must be defined";
735             }
736 
737             node<long>* graftAt = FindNodeByName (&newLocation->get_str());
738             if (!graftAt || graftAt->get_parent() == nil) {
739                 throw _String ("Attachment node must be an exiting non-root node ");
740             }
741 
742             node<long>* newp = newParent ? new node<long> : nil,
743             * curp = graftAt->get_parent();
744 
745             if (newp) {
746                 newp->set_parent  (*curp);
747                 newp->add_node    (*graftAt);
748                 curp->replace_node(graftAt,newp);
749             }
750 
751             if (newName && !newName->empty()) {
752                 node<long>* newt = new node<long>;
753                 if (newp) {
754                     newp->add_node(*newt);
755                 } else {
756                     graftAt->add_node (*newt);
757                 }
758                 _String bl (branchLengthSelf ? branchLengthSelf->Value() : kEmptyString);
759                 FinalizeNode (newt, 0, newName->get_str(),   kEmptyString, bl, CollectParseSettings());
760             }
761 
762             if (newp && ! newParent->empty()) {
763                 _String bl (branchLengthParent ? branchLengthParent->Value() : kEmptyString);
764                 FinalizeNode (newp, 0, newParent->get_str(), kEmptyString, bl, CollectParseSettings());
765             }
766 
767         } else {
768             throw _String ("An invalid argument (not an associative array) supplied");
769         }
770     } catch (const _String& err) {
771         HandleApplicationError (err & " in " & __PRETTY_FUNCTION__);
772     }
773 
774 }
775 //_______________________________________________________________________________________________
776 
makeDynamic(void) const777 BaseRef  _TreeTopology::makeDynamic (void) const
778 {
779     _TreeTopology* res = new _TreeTopology;
780     res->_CalcNode::Duplicate (this);
781     res->flatTree.Duplicate (&flatTree);
782     res->flatCLeaves.Duplicate (&flatCLeaves);
783     if (compExp) {
784         res->compExp = (_Matrix*)compExp->makeDynamic();
785     } else {
786         res->compExp = nil;
787     }
788 
789     res->rooted = rooted;
790     res->theRoot = CopyTreeStructure (theRoot,true);
791     return res;
792 }
793 
794 
795 //_______________________________________________________________________________________________
796 
CopyTreeStructure(node<long> * theNode,bool) const797 node<long>*  _TreeTopology::CopyTreeStructure (node <long>* theNode,  bool) const {
798     node<long>* locNode = new node<long>;
799     for (long i=1L; i<=theNode->get_num_nodes(); i++) {
800         locNode->add_node(*CopyTreeStructure (theNode->go_down(i), false));
801     }
802     locNode->init (theNode->in_object);
803     return locNode;
804 }
805 
806 //_______________________________________________________________________________________________
807 
GetNodeName(node<long> * n,bool fullName) const808 const _String   _TreeTopology::GetNodeName (node<long>* n, bool fullName) const {
809     if (fullName) {
810         return *GetName()&'.'&*(_String*)(flatTree.GetItem(n->in_object));
811     }
812     return *(_String*)(flatTree.GetItem (n->in_object));
813  }
814 
815 
816 //_______________________________________________________________________________________________
817 
GetNodeModel(node<long> * n) const818 _String const*    _TreeTopology::GetNodeModel (node<long>* n) const {
819   return (_String*)flatCLeaves.GetItem(n->in_object);
820 }
821 
822   //_______________________________________________________________________________________________
823 
MapNodesToModels(void)824 _List*     _TreeTopology::MapNodesToModels (void) {
825     _List* map = new _List;
826 
827     node_iterator<long> ni (theRoot, _HY_TREE_TRAVERSAL_POSTORDER);
828 
829     while (node<long>* iterator = ni.Next()) {
830         if (iterator->is_root()) {
831             break;
832         }
833         _List * node_record = new _List;
834         (*node_record) < new _String (GetNodeName(iterator)) < new _String(*GetNodeModel(iterator));
835         (*map) < node_record;
836     }
837     return map;
838 }
839 
840   //_______________________________________________________________________________________________
841 
GetNodeStringForTree(node<long> * n,int flags) const842 _String const  _TreeTopology::GetNodeStringForTree  (node<long> * n , int flags) const {
843 
844   _String node_desc;
845 
846   if (flags & fGetNodeStringForTreeName) {
847     node_desc = GetNodeName (n);
848     // check to see if the node name has any chars that are a part of the Newick spec, and if so, enquote the string
849     _any_char_in_set newick_delimiter (" (),{}[];:'\"");
850     if (node_desc.Any ([&newick_delimiter](char c, unsigned long i) -> bool {
851           return newick_delimiter==c;
852       }
853     ) != kNotFound) {
854         node_desc = _StringBuffer ().SanitizeAndAppend(node_desc).Enquote('"');
855     }
856 
857   }
858 
859   if (flags & fGetNodeStringForTreeModel) {
860     _String const *mSpec = GetNodeModel (n);
861     if (mSpec->nonempty()) {
862         node_desc = node_desc & mSpec->Enquote('{','}');
863     }
864   }
865   return node_desc;
866 }
867 
868   //_______________________________________________________________________________________________
869 
toStr(unsigned long)870 BaseRef     _TreeTopology::toStr (unsigned long) {
871 
872   static const _String kNoInternalLabels ("NO_INTERNAL_LABELS");
873 
874   _StringBuffer     * res = new _StringBuffer((unsigned long)128),
875   num;
876 
877   bool    skip_internal_labels = EnvVariableTrue(kNoInternalLabels),
878            include_model_info = EnvVariableTrue(include_model_spec);
879 
880 
881   int            leaf_flag      = fGetNodeStringForTreeName,
882                  inode_flag     = skip_internal_labels ? 0 : fGetNodeStringForTreeName;
883 
884   if (include_model_info) {
885     leaf_flag   |= fGetNodeStringForTreeModel;
886     inode_flag  |= fGetNodeStringForTreeModel;
887   }
888 
889   if (IsDegenerate ()) {
890     node_iterator<long> ni (theRoot, _HY_TREE_TRAVERSAL_POSTORDER);
891     (*res)<<'(';
892 
893     while (node<long>* iterator = ni.Next()) {
894       (*res) << GetNodeStringForTree (iterator,  leaf_flag )
895              << (iterator->is_root() ? ')' : ',');
896     }
897   } else {
898 
899     long        level       =   0L,
900                 lastLevel   =   0L;
901 
902     node_iterator<long> ni (theRoot, _HY_TREE_TRAVERSAL_POSTORDER);
903 
904     node<long>*     curNode   =   ni.Next(),
905               *     nextNode;
906 
907     level                     = ni.Level();
908 
909     nextNode = ni.Next();
910 
911     while (nextNode) {
912       if (level>lastLevel) {
913         if (lastLevel) {
914           (*res)<<',';
915         }
916         res->AppendNCopies('(', level-lastLevel);
917       } else if (level<lastLevel) {
918         res->AppendNCopies(')', lastLevel-level);
919       } else {
920         (*res)<<',';
921       }
922 
923       (*res) << GetNodeStringForTree (curNode,  curNode->is_leaf() ? leaf_flag : inode_flag);
924 
925 
926       lastLevel = level;
927       curNode   = nextNode;
928       level     = ni.Level();
929       nextNode  = ni.Next();
930     }
931 
932     res->AppendNCopies(')', lastLevel-level);
933   }
934   (*res)<<';';
935   res->TrimSpace();
936   return res;
937 }
938 
939 //__________________________________________________________________________________
toFileStr(FILE * f,unsigned long padding)940 void _TreeTopology::toFileStr(FILE* f, unsigned long padding) {
941     _String * s = (_String*)toStr(padding);
942     fprintf (f, "%s", s->get_str());
943     DeleteObject(s);
944 }
945 
946 
947 //__________________________________________________________________________________
948 
ExecuteSingleOp(long opCode,_List * arguments,_hyExecutionContext * context,HBLObjectRef cache)949 HBLObjectRef _TreeTopology::ExecuteSingleOp (long opCode, _List* arguments, _hyExecutionContext* context,HBLObjectRef cache) {
950     const static _String kSplitNodeNames ("SPLIT_NODE_NAMES");
951 
952     switch (opCode) { // first check operations without arguments
953         case HY_OP_CODE_ABS: // Abs
954             return FlatRepresentation(cache);
955         case HY_OP_CODE_BRANCHCOUNT: //BranchCount
956             return BranchCount(cache);
957         case HY_OP_CODE_TIPCOUNT: // TipCount
958             return TipCount(cache);
959         case HY_OP_CODE_TYPE: // Type
960             return Type(cache);
961     }
962 
963     _MathObject * arg0 = _extract_argument (arguments, 0UL, false);
964 
965     try {
966         switch (opCode) { // next check operations without arguments or with one argument
967             case HY_OP_CODE_ADD: // +
968                 if (!arg0) {
969                     return Sum(cache);
970                 }
971                 AddANode (arg0);
972                 return _returnConstantOrUseCache(0.0, cache);
973 
974             case HY_OP_CODE_SUB:
975                 if (!arg0) {
976                     return new _MathObject;
977                 }
978                 RemoveANode (arg0);
979                 return _returnConstantOrUseCache(0.0, cache);
980         }
981 
982         if (arg0) {
983             switch (opCode) { // operations that require exactly one argument
984                 case HY_OP_CODE_MUL: // compute the strict consensus between T1 and T2
985                     return SplitsIdentity (arg0, cache);
986 
987                 case HY_OP_CODE_LEQ: { // MatchPattern (<=)
988 
989                     if (arg0->ObjectClass()!=TREE && arg0->ObjectClass()!=TOPOLOGY) {
990                         throw _String ("Invalid (not a tree/topology) 2nd argument is call to <= (MatchPattern) for trees/topologies.");
991                     }
992                     _String  res (((_TreeTopology*)arg0)->MatchTreePattern (this));
993                     return _returnConstantOrUseCache (!res.BeginsWith("Unequal"), cache);
994                 }
995                 case HY_OP_CODE_EQ: // ==
996                     return  Compare(arg0,cache);
997                 case HY_OP_CODE_BRANCHLENGTH: //BranchLength
998                     return BranchLength(arg0,cache);
999                 case HY_OP_CODE_BRANCHNAME: //BranchName
1000                     return TreeBranchName(arg0,false,nil,cache);
1001                 case HY_OP_CODE_TIPNAME: //BranchName
1002                     return TipName(arg0,cache);
1003                 case HY_OP_CODE_MIN: // COT (Min)
1004                     return FindCOT (arg0,cache);
1005                 case HY_OP_CODE_MAX: // Maximum parsimony
1006                     return MaximumParsimony (arg0,cache);
1007                case HY_OP_CODE_REROOTTREE: // RerootTree
1008                     return RerootTree(arg0, cache);
1009                 case HY_OP_CODE_POWER: //^
1010                     return AVLRepresentation (arg0, cache);
1011                 case HY_OP_CODE_RANDOM:
1012                     return RandomizeTips (arg0, cache);
1013                 case HY_OP_CODE_IDIV: { // Split ($) - 2nd argument
1014                     if (arg0->ObjectClass()!=NUMBER) {
1015                         throw _String ("Invalid (not a number) 2nd argument is call to $ (split)for trees.");
1016                     }
1017 
1018                     _List ref_manager;
1019 
1020                     _Constant*  cc     = (_Constant*)TipCount(nil);
1021 
1022                     ref_manager < cc;
1023 
1024                     long        size   = cc->Value()/arg0->Value();
1025 
1026                     if  (size<=4 || size * 2 >cc->Value()) {
1027                         throw _String("Poor choice of the 2nd numeric argument in to $ (split) for tree. Either the resulting cluster size is too big(>half of the tree), or too small (<4)!");
1028                     }
1029 
1030                     long        checkSize = 1L,
1031                     tol       = 0L;
1032 
1033                     while (tol<size-2) {
1034                         _List*      resL   = SplitTreeIntoClusters (size,tol);
1035 
1036                         ref_manager < resL;
1037 
1038                         checkSize = cc->Value();
1039 
1040                         if (resL->nonempty()) {
1041                             _Matrix*    mRes   = new _Matrix (resL->lLength, 2, false, true);
1042                             resL->ForEach(
1043                                   [&] (BaseRefConst cluster, unsigned long index) -> void {
1044                                       _List* cluster_list = (_List*)cluster;
1045                                       long node_count = ((_Constant*)cluster_list->GetItem(1))->Value();
1046                                       mRes->Store (index,0, node_count);
1047                                       mRes->Store (index,1, -2L + cluster_list->countitems());
1048                                   }
1049                             );
1050 
1051 
1052 
1053                             if (checkSize == 0UL) {
1054                                 _List sorted_list;
1055                                 resL->ForEach ([&] (BaseRefConst list, unsigned long) -> void {
1056                                     sorted_list << ((const _List*)list)->GetItem(0L);
1057                                 });
1058                                 sorted_list.Sort();
1059                                 EnvVariableSet (kSplitNodeNames, new _Matrix (sorted_list, false), false);
1060                                 return mRes;
1061                             }
1062 
1063                             DeleteObject (mRes);
1064                         }
1065                         tol ++;
1066                     }
1067                     return new _Matrix (1,1,false, true);
1068                 }
1069             }
1070 
1071             _MathObject * arg1 = _extract_argument (arguments, 1UL, false);
1072 
1073             switch (opCode) {
1074                 case HY_OP_CODE_MACCESS: // MAccess
1075                     return TreeBranchName (arg0,true, arg1, cache);
1076             }
1077 
1078 
1079             if (arg1) {
1080                 switch (opCode) {
1081                     case HY_OP_CODE_FORMAT: { // Format
1082                         _StringBuffer  *tStr = new _StringBuffer  (1024UL);
1083                         SubTreeString (theRoot, *tStr, CollectParseSettings(), arg0->Compute()->Value() > 0.1 , arg1->Compute()->Value() > 0.1 ? kTopologyBranchLengthExpectedSubs : kTopologyBranchLengthNone, -1, nil);
1084                         tStr->TrimSpace();
1085                         return _returnStringOrUseCache(tStr,cache);
1086                     }
1087 
1088                 }
1089             }
1090 
1091         }
1092 
1093         switch (opCode) {
1094             case HY_OP_CODE_MUL: // compute the strict consensus between T1 and T2
1095             case HY_OP_CODE_LEQ: // MatchPattern (<=)
1096             case HY_OP_CODE_EQ: // ==
1097             case HY_OP_CODE_BRANCHLENGTH: //BranchLength
1098             case HY_OP_CODE_BRANCHNAME: //BranchName
1099             case HY_OP_CODE_MIN: // COT (Min)
1100             case HY_OP_CODE_REROOTTREE: // RerootTree
1101             case HY_OP_CODE_POWER: //^
1102             case HY_OP_CODE_IDIV:  // Split ($) - 2nd argument
1103             case HY_OP_CODE_MACCESS: // MAccess
1104             case HY_OP_CODE_FORMAT:  // Format
1105                 WarnWrongNumberOfArguments (this, opCode,context, arguments);
1106                 break;
1107             default:
1108                 WarnNotDefined (this, opCode,context);
1109         }
1110     } catch (const _String& err) {
1111         context->ReportError(err);
1112     }
1113 
1114     return new _MathObject;
1115 }
1116 
1117 
1118   //_______________________________________________________________________________________________
1119 
RetrieveNodeNames(bool doTips,bool doInternals,int traversalType) const1120 const _List     _TreeTopology::RetrieveNodeNames (bool doTips, bool doInternals, int traversalType) const {
1121   _List result;
1122 
1123   node_iterator<long> ni (theRoot, traversalType);
1124 
1125   while (node<long> * iterator = ni.Next()) {
1126     bool is_leaf = iterator->is_leaf();
1127     if (is_leaf && doTips || doInternals && !is_leaf) {
1128       result < new _String (GetNodeName(iterator));
1129     }
1130   }
1131 
1132   return result;
1133 }
1134 
1135 //__________________________________________________________________________________
1136 
FindCOTHelper(node<long> * aNode,long parentIndex,_Matrix & distances,_Matrix & rootDistances,_Matrix & branchLengths,_List & childLists,_AVLListX & addressToIndexMap2,hyFloat d)1137 void _TreeTopology::FindCOTHelper (node<long>* aNode, long parentIndex, _Matrix& distances, _Matrix& rootDistances, _Matrix& branchLengths, _List& childLists, _AVLListX& addressToIndexMap2, hyFloat d)
1138 {
1139 
1140     /** TODO: SLKP 20171218, the whole FindCOT stack  needs review **/
1141 
1142     long          myIndex     = addressToIndexMap2.GetDataByKey((BaseRef)aNode),
1143                   leafCount   = distances.GetVDim();
1144 
1145 
1146     _SimpleList * childLeaves = (_SimpleList*)childLists(myIndex);
1147 
1148     _Matrix     * lookup = parentIndex>=0?&distances:&rootDistances;
1149 
1150     if (parentIndex < 0L) {
1151         parentIndex = 0L;
1152     }
1153 
1154     long ci2 = 0L;
1155 
1156     hyFloat myLength = branchLengths.get_direct(myIndex);
1157 
1158     for (long ci = 0L; ci < leafCount; ci++) {
1159         if (ci == childLeaves->list_data[ci2]) {
1160             if (ci2 < childLeaves->lLength - 1) {
1161                 ci2++;
1162             }
1163         } else {
1164             distances.Store (myIndex, ci, (*lookup)(parentIndex,ci) + myLength);
1165         }
1166     }
1167 
1168 
1169     for (long ci3 = aNode->get_num_nodes(); ci3; ci3--) {
1170         FindCOTHelper (aNode->go_down (ci3), myIndex, distances, rootDistances, branchLengths, childLists, addressToIndexMap2, myLength);
1171     }
1172 }
1173 
1174 //__________________________________________________________________________________
1175 
FindCOTHelper2(node<long> * aNode,_Matrix & branchSpans,_Matrix & branchLengths,_AVLListX & addressToIndexMap2,node<long> * referrer,hyFloat d)1176 void _TreeTopology::FindCOTHelper2 (node<long>* aNode, _Matrix& branchSpans, _Matrix& branchLengths, _AVLListX& addressToIndexMap2, node<long>* referrer, hyFloat d)
1177 {
1178     /** TODO: SLKP 20171218, the whole FindCOT stack  needs review **/
1179     long          myIndex     = aNode->parent?addressToIndexMap2.GetXtra(addressToIndexMap2.Find((BaseRef)aNode)):-1;
1180     hyFloat    myLength    = myIndex>=0?branchLengths.theData [myIndex]:0.0;
1181 
1182     for (long ci = aNode->get_num_nodes(); ci; ci--) {
1183         node <long>* daChild = aNode->go_down (ci);
1184         if (daChild != referrer) {
1185             FindCOTHelper2 (daChild,  branchSpans, branchLengths, addressToIndexMap2, nil, d+myLength);
1186         }
1187     }
1188     if (aNode->parent) { // not the root
1189         if (d>=0.0) {
1190             branchSpans.Store (myIndex,0,d);
1191         } else {
1192             branchSpans.Store (myIndex,0,0.);
1193         }
1194 
1195         branchSpans.Store (myIndex,1,d+myLength);
1196         if (referrer) {
1197             FindCOTHelper2 (aNode->parent, branchSpans, branchLengths, addressToIndexMap2, aNode, d+myLength);
1198         }
1199     }
1200 }
1201 
1202 //__________________________________________________________________________________
1203 
MaximumParsimony(HBLObjectRef parameters,HBLObjectRef cache)1204 HBLObjectRef _TreeTopology::MaximumParsimony (HBLObjectRef parameters,HBLObjectRef cache) {
1205     static const _String
1206         kMPScore                ("score"),
1207         kMPLabels               ("labels"),
1208         kMPOutNodeScore         ("node-scores"),
1209         kMPOutSubstitutions     ("substitutions");
1210 
1211     try {
1212         CheckArgumentType(parameters, ASSOCIATIVE_LIST, true);
1213         _AssociativeList * arguments = (_AssociativeList *)parameters;
1214 
1215         _AssociativeList * labels    = (_AssociativeList *)arguments->GetByKeyException(kMPLabels, ASSOCIATIVE_LIST);
1216                          //* scores    = (_AssociativeList *)arguments->GetByKey(kMPScore, ASSOCIATIVE_LIST);
1217 
1218         _List           id2name; // integer label -> string label
1219 
1220         _Trie           unique_labels, // label -> unique integer ID
1221                         mapped_labels; // node_name -> integer label
1222 
1223         _Matrix         * score_matrix = nil;
1224 
1225         auto            score_pair = [&] (unsigned long from, unsigned long to) -> hyFloat {
1226             if (score_matrix) {
1227                 return (*score_matrix)(from,to);
1228             }
1229             return from == to ? 0.0 : 1.0;
1230         };
1231 
1232         for (AVLListXLIteratorKeyValue key_value : labels->ListIterator()) {
1233             HBLObjectRef node_label = (HBLObjectRef)key_value.get_object();
1234             _String const * node_name = key_value.get_key();
1235             if (node_label->ObjectClass() != STRING) {
1236                 throw _String ("Not a string-valued label for node ") & node_name->Enquote();
1237             }
1238             _String const * label_value = &((_FString*)node_label)->get_str();
1239             bool did_insert = false;
1240             long label_index  = unique_labels.GetValue (unique_labels.InsertExtended (*label_value, unique_labels.countitems(), false, &did_insert));
1241             if (did_insert) {
1242                 id2name < new _String (*label_value);
1243             }
1244             mapped_labels.Insert (*node_name, label_index);
1245         }
1246 
1247         unsigned long unique_label_count = unique_labels.countitems();
1248 
1249         _Vector     conditional_scores;
1250         _SimpleList conditional_labels;
1251 
1252         /*
1253             for each node, in post-order traversal, these lists store K (== unique_label_count)
1254             in a serialized vector (for scores), and list (for integer labels)
1255         */
1256 
1257         _SimpleList has_labels;
1258         /*
1259             for each node, in post-order traversal
1260             stores whether or not the subtree starting at this node can be fully labeled
1261          (-1 : not labeled)
1262             only such trees are used for labeling
1263         */
1264 
1265         _SimpleList store_values;
1266         /*
1267             store node<long> values, because they will be overwritten
1268             temporarily during MP inference
1269         */
1270 
1271         node_iterator<long> ni (theRoot, _HY_TREE_TRAVERSAL_POSTORDER);
1272 
1273         // perform the first pass to
1274 
1275         long post_order_index = 0L;
1276 
1277         while (node<long>* iterator = ni.Next()) {
1278 
1279             if (iterator->is_leaf()) {
1280 
1281                 long index = mapped_labels.FindKey (GetNodeName (iterator));
1282                 if (index >= 0) {
1283                     index = mapped_labels.GetValue(index);
1284                     for (unsigned long parent_state = 0UL; parent_state < unique_label_count; parent_state++) {
1285                         conditional_scores << score_pair (parent_state, index);
1286                     }
1287                 } else {
1288                     conditional_scores.AppendRange(unique_label_count, 0., 0.);
1289                 }
1290                 has_labels << index;
1291                 conditional_labels.AppendRange(unique_label_count, index, 0);
1292             } else {
1293                 conditional_labels.AppendRange(unique_label_count, 0, 0);
1294                 conditional_scores.AppendRange(unique_label_count, 0., 0.);
1295                 bool node_has_labels = true;
1296                 for (unsigned long i = 1UL; i <= iterator->get_num_nodes(); i++) {
1297                     node_has_labels = node_has_labels && (has_labels.get (iterator->go_down(i)->in_object) >= 0);
1298                 }
1299                 has_labels << (node_has_labels ? 1 : -1);
1300             }
1301             store_values << iterator->in_object;
1302             iterator->in_object = post_order_index++;
1303         }
1304 
1305         ni.Reset(theRoot);
1306 
1307         while (node<long>* iterator = ni.Next()) {
1308             long my_index = iterator->in_object;
1309             long label = has_labels.get (my_index);
1310             if (label >= 0) {
1311                 if (!iterator->is_leaf()) {
1312                     long offset = my_index * unique_label_count;
1313                     for (unsigned long parent_state = 0UL; parent_state < unique_label_count; parent_state++) {
1314                         hyFloat best_score = 1.e100;
1315                         long best_state = -1L;
1316 
1317                         for (unsigned long my_state = 0UL; my_state < unique_label_count; my_state++) {
1318                             hyFloat running_score = 0.;
1319                             for (unsigned long i = 1UL; i <= iterator->get_num_nodes(); i++) {
1320                                 long child_postorder_offset = iterator->go_down(i)->in_object * unique_label_count;
1321                                 running_score += conditional_scores.get(0, child_postorder_offset + my_state);
1322                             }
1323                             if (StoreIfLess (best_score, running_score + score_pair (parent_state, my_state))) {
1324                                 best_state = my_state;
1325                             }
1326                         }
1327                         conditional_labels [offset+parent_state] = best_state;
1328                         conditional_scores [offset+parent_state] = best_score;
1329                     }
1330                 }
1331             }
1332         }
1333 
1334         node_iterator<long> ni_pre (theRoot, _HY_TREE_TRAVERSAL_PREORDER);
1335 
1336         hyFloat            total_score = 0.;
1337         _AssociativeList * inferred_labels          = new _AssociativeList;
1338         _AssociativeList * inferred_substitutions   = new _AssociativeList;
1339         _AssociativeList * node_scores              = new _AssociativeList;
1340 
1341 
1342 
1343         while (node<long>* iterator = ni_pre.Next()) {
1344             long my_index = iterator->in_object;
1345             long label = has_labels.get (my_index);
1346 
1347 
1348             if (!iterator->is_leaf () && label >= 0) { // labeled node
1349                 long my_label = 0L;
1350                 hyFloat best_score = 1.e100;
1351                 if (!iterator->parent || has_labels.get (iterator->parent->in_object) < 0) {
1352                     // either a root or not a part of the labeled tree
1353 
1354                     long offset = my_index * unique_label_count;
1355                     for (unsigned long k = 0UL; k < unique_label_count; k++) {
1356                         if (StoreIfLess (best_score, conditional_scores.get (0,offset + k))) {
1357                             my_label = k;
1358                         }
1359                     }
1360 
1361                     total_score +=best_score;
1362 
1363                 } else {
1364                     // read off the label for this node based on the state of the parent
1365                     my_label     = conditional_labels.get (my_index * unique_label_count + has_labels.get (iterator->parent->in_object));
1366                     best_score   = conditional_scores.get (my_index * unique_label_count + has_labels.get (iterator->parent->in_object),0);
1367 
1368                 }
1369                 has_labels[my_index] = my_label;
1370                 iterator->in_object = store_values.get (iterator->in_object);
1371 
1372                 inferred_labels->MStore (GetNodeName(iterator), (*(_String*)id2name.GetItem(my_label)));
1373                 iterator->in_object = my_index;
1374                 node_scores->MStore (GetNodeName (iterator), new _Constant (best_score), false);
1375             }
1376 
1377             if (label >= 0 && iterator->parent) {
1378                 long parent_label = has_labels.get (iterator->parent->in_object);
1379                 if (parent_label >= 0) {
1380                     // not root, has labeled parent
1381                     long my_label = has_labels.get (my_index);
1382                     if (my_label != parent_label) {
1383                         iterator->in_object = store_values.get (iterator->in_object);
1384                         _AssociativeList * substitution_record = new _AssociativeList;
1385                         *substitution_record < (_associative_list_key_value){"from", new _FString  ((*(_String*)id2name.GetItem(parent_label)))}
1386                                              < (_associative_list_key_value){"to", new _FString  ((*(_String*)id2name.GetItem(my_label)))};
1387                         inferred_substitutions->MStore (GetNodeName(iterator), substitution_record, false);
1388                         iterator->in_object = my_index;
1389                     }
1390                 }
1391             }
1392 
1393 
1394             //iterator->in_object = store_values.get (iterator->in_object);
1395         }
1396 
1397         _AssociativeList * result = new _AssociativeList;
1398         result->MStore (kMPScore, new _Constant (total_score), false);
1399         result->MStore (kMPLabels, inferred_labels, false);
1400         result->MStore (kMPOutSubstitutions, inferred_substitutions, false);
1401         result->MStore (kMPOutNodeScore, node_scores, false);
1402 
1403         //printf ("%s\n", _String ((_String*)conditional_scores.toStr(0)).get_str());
1404 
1405         ni.Reset(theRoot);
1406 
1407         while (node<long>* iterator = ni.Next()) {
1408             iterator->in_object = store_values.get (iterator->in_object);
1409             // restore node values
1410         }
1411 
1412         return result;
1413 
1414     } catch (const _String& err) {
1415         HandleApplicationError(err);
1416     }
1417     return new  _MathObject;
1418 
1419 
1420 }
1421 //__________________________________________________________________________________
1422 
FindCOT(HBLObjectRef p,HBLObjectRef cache)1423 _AssociativeList* _TreeTopology::FindCOT (HBLObjectRef p, HBLObjectRef cache) {
1424     // Find the Center of the Tree (COT) location
1425     // using an L_p metric (L_2 works well)
1426 
1427     /** TODO: SLKP 20171218, the whole FindCOT stack  needs review **/
1428 
1429     static const _String
1430     kCOTNode               ( "COT_NODE"),
1431     kCOTSplit             ("COT_SPLIT"),
1432     kCOTBranchLength         ( "COT_BRANCH_LENGTH"),
1433     kCOTDistance             ("COT_DISTANCE"),
1434     kCOTCDF                 ("COT_CDF"),
1435     kCOTSamples           ("COT_SAMPLES"),
1436     kCOTSampler            ("COT_SAMPLER"),
1437     kCOTToNode              ("COT_TO_NODE");
1438 
1439 
1440     hyFloat         power           = p->Compute()->Value(),
1441                        totalTreeLength = 0.0;
1442 
1443     _AssociativeList * resList = new _AssociativeList;
1444 
1445     if (power<=0.) {
1446         HandleApplicationError (_String("Invalid power argument in call to COT finder (Min on trees). Must be positive, had :") & power);
1447         return resList;
1448     }
1449 
1450     _SimpleList        avlSL,
1451                        avlSL2,
1452                        listOfNodes;
1453 
1454     _List              avlSL3,
1455                        childLists;
1456 
1457     _AVLListX          addressToIndexMap  (&avlSL),         // leaves only
1458                        addressToIndexMap2 (&avlSL2),        // complete index
1459                        lengthToIndexMap   (&avlSL3);
1460 
1461     long               leafCount   = 0,
1462                        branchCount = 0,
1463                        tIndex      = 0;
1464 
1465     node_iterator<long> ni (theRoot, _HY_TREE_TRAVERSAL_POSTORDER);
1466 
1467     while (node<long>* iterator = ni.Next()) {
1468         if(iterator->is_root()) {
1469             break;
1470         } else if (iterator->is_leaf()) {
1471             addressToIndexMap.Insert ((BaseRef)iterator, leafCount++);
1472         } else {
1473             branchCount++;
1474         }
1475 
1476         addressToIndexMap2.Insert ((BaseRef)iterator, tIndex++);
1477     }
1478 
1479     // allocate the matrix of path lengths with hardwired (traversal order) indices
1480     // also allocate a list of sorted lists to store children nodes
1481     // and a map of (longed) node addresses to post order traversal indices
1482 
1483     _Matrix      distances          (branchCount+leafCount, leafCount, false, true),
1484                  rootDistances        (1, leafCount,  false, true),
1485                  branchLengths        (1, branchCount+leafCount,  false, true),
1486                  branchSpans      (branchCount+leafCount+1,2,false,true);
1487 
1488      // pass 1: fill up the nodes up to the root (i.e. below any internal node)
1489 
1490     ni.Reset (theRoot);
1491     tIndex            = 0;
1492 
1493     while (node<long>* iterator = ni.Next()) {
1494       if (iterator->is_root()) {
1495         break;
1496       }
1497       hyFloat          myLength = GetBranchLength     (iterator);
1498       lengthToIndexMap.Insert (new _String(totalTreeLength), tIndex, false, true);
1499       totalTreeLength      += myLength;
1500 
1501       branchLengths.Store (0, tIndex++, myLength);
1502       listOfNodes << (long)iterator;
1503       _SimpleList         *childIndices = new _SimpleList;
1504 
1505       if (iterator->is_leaf()) {
1506           (*childIndices) << addressToIndexMap.GetXtra(addressToIndexMap.Find((BaseRef)iterator));
1507       } else {
1508           long           myIndex = addressToIndexMap2.GetXtra(addressToIndexMap2.Find((BaseRef)iterator));
1509           _SimpleList    mappedLeaves (leafCount,0,0);
1510 
1511           for (long ci = iterator->get_num_nodes(); ci; ci--) {
1512               long          childIndex = addressToIndexMap2.GetXtra(addressToIndexMap2.Find((BaseRef)iterator->go_down (ci)));
1513               _SimpleList * childLeaves = (_SimpleList*)childLists(childIndex);
1514 
1515               myLength = branchLengths.theData[childIndex];
1516 
1517               for (long ci2 = 0; ci2 < childLeaves->lLength; ci2++) {
1518                   long ttIndex = childLeaves->list_data[ci2];
1519                   mappedLeaves.list_data[ttIndex] = 1;
1520                   distances.Store (myIndex, ttIndex, distances (childIndex, ttIndex) + myLength);
1521               }
1522           }
1523 
1524           for (long ci2 = 0; ci2 < leafCount; ci2++) {
1525                   if (mappedLeaves.list_data[ci2]) {
1526                       (*childIndices) << ci2;
1527                   }
1528           }
1529       }
1530 
1531         childLists.AppendNewInstance(childIndices);
1532     }
1533 
1534     // pass 2: fill the root vector
1535 
1536     //nodeName = "COT_DM1";
1537     //setParameter (nodeName, &distances);
1538 
1539     for (long ci = theRoot->get_num_nodes(); ci; ci--) {
1540         long          childIndex = addressToIndexMap2.GetXtra(addressToIndexMap2.Find((BaseRef)theRoot->go_down (ci)));
1541         _SimpleList * childLeaves = (_SimpleList*)childLists(childIndex);
1542         hyFloat       myLength = branchLengths.theData[childIndex];
1543         for (long ci2 = 0; ci2 < childLeaves->lLength; ci2++) {
1544             tIndex = childLeaves->list_data[ci2];
1545             rootDistances.Store (0, tIndex, distances (childIndex, tIndex) + myLength);
1546             //printf ("root->%s = %g\n", ((_String*)leafNames(tIndex))->sData, distances (childIndex, tIndex) + myLength);
1547         }
1548     }
1549 
1550 
1551     // pass 3: fill in the "other site" branch lengths
1552 
1553     for (long ci3 = theRoot->get_num_nodes(); ci3; ci3--) {
1554         FindCOTHelper (theRoot->go_down (ci3), -1, distances, rootDistances, branchLengths, childLists, addressToIndexMap2, 0);
1555     }
1556 
1557     //nodeName = "COT_DM2";
1558     //setParameter (nodeName, &distances);
1559 
1560 
1561 
1562     // now traverse the tree and look for the maximum
1563 
1564     tIndex                         = 0;         // stores the index of current min
1565 
1566     hyFloat  currentMin         = 1e100,
1567                 currentBranchSplit = 0;
1568 
1569 
1570     for (long ci = distances.GetHDim()-1; ci>=0; ci--) {
1571         hyFloat    T           = branchLengths.theData[ci];
1572         _SimpleList * childLeaves = (_SimpleList*)childLists(ci);
1573         long          ci2         = 0;
1574 
1575         if (CheckEqual (power,2.0)) {
1576             hyFloat    sumbT  = 0.,
1577                           sumbT2 = 0.,
1578                           suma   = 0.,
1579                           suma2  = 0.;
1580 
1581 
1582 
1583             for (long ci3 = 0; ci3 < leafCount; ci3++) {
1584                 hyFloat tt = distances(ci,ci3);
1585 
1586                 /*
1587                 printf ("%s->%s = %g\n", ttt2.sData, ((_String*)leafNames(ci3))->sData, tt);
1588                 */
1589 
1590                 if (ci3 == childLeaves->list_data[ci2]) {
1591                     if (ci2 < childLeaves->lLength-1) {
1592                         ci2++;
1593                     }
1594 
1595                     suma   += tt;
1596                     suma2  += tt*tt;
1597                 } else {
1598                     sumbT  += tt;
1599                     sumbT2 += tt*tt;
1600                 }
1601             }
1602 
1603 
1604             hyFloat tt = (sumbT-suma)/leafCount;/*(sumbT-suma)/leafCount*/;
1605             if (tt < 0.0) {
1606                 tt = 0.;
1607             } else if (tt > T) {
1608                 tt = T;
1609             }
1610 
1611             sumbT = tt*tt*leafCount + 2*tt*(suma-sumbT) + suma2 + sumbT2;
1612 
1613             if (sumbT < currentMin) {
1614                 tIndex             = ci;
1615                 currentBranchSplit = tt;
1616                 currentMin         = sumbT;
1617             }
1618         } else {
1619             hyFloat  step        = T>0.0?T*0.0001:0.1,
1620                         currentT    = 0.;
1621 
1622             while (currentT<T) {
1623                 hyFloat dTT = 0.0;
1624 
1625                 ci2 = 0;
1626 
1627                 for (long ci3 = 0; ci3 < leafCount; ci3++) {
1628                     hyFloat tt = distances(ci,ci3);
1629                     if (ci3 == childLeaves->get (ci2)) {
1630                         if (ci2 < childLeaves->lLength-1) {
1631                             ci2++;
1632                         }
1633 
1634                         dTT   += pow(tt+currentT,power);
1635                     } else {
1636                         dTT   += pow(tt-currentT,power);
1637                     }
1638                 }
1639 
1640                 if (dTT < currentMin) {
1641                     tIndex             = ci;
1642                     currentBranchSplit = currentT;
1643                     currentMin         = dTT;
1644                 }
1645                 currentT += step;
1646             }
1647         }
1648     }
1649 
1650     node <long>*    cotBranch = (node<long>*)listOfNodes.list_data[tIndex];
1651 
1652 
1653     resList->MStore (kCOTNode,  new _FString( GetNodeName     (cotBranch),false),false);
1654     resList->MStore (kCOTSplit, new _Constant (currentBranchSplit), false);
1655     resList->MStore (kCOTDistance, new _Constant (currentMin), false);
1656     resList->MStore (kCOTBranchLength, new _Constant (branchLengths.directIndex(tIndex)), false);
1657 
1658     //  compute the distribution of lengths away from COT
1659 
1660     FindCOTHelper2  (cotBranch, branchSpans, branchLengths, addressToIndexMap2, nil, currentBranchSplit-branchLengths.theData [tIndex]);
1661     if (cotBranch->parent) {
1662         hyFloat adjuster = branchLengths.theData [tIndex]-currentBranchSplit;
1663         branchSpans.Store (branchCount+leafCount,1,adjuster);
1664         node <long>* cotParent = cotBranch->parent;
1665         if (cotParent->parent) {
1666             adjuster -= branchLengths.theData[addressToIndexMap2.GetXtra(addressToIndexMap2.Find((BaseRef)cotParent))];
1667         }
1668         FindCOTHelper2  (cotParent, branchSpans, branchLengths, addressToIndexMap2, cotBranch, adjuster);
1669     }
1670 
1671     _List        timeSplits;
1672     _AVLListX    timeSplitsAVL (&timeSplits);
1673 
1674     for (long sc=0; sc <= branchCount+leafCount; sc++) {
1675         char       buffer[256];
1676         snprintf  (buffer, 255, "%.15f", branchSpans(sc,0));
1677         _String nodeName = buffer;
1678         branchSpans.Store(sc,0,nodeName.to_float());
1679         timeSplitsAVL.Insert (nodeName.makeDynamic(),0,false,true);
1680         snprintf  (buffer, 255, "%.15f", branchSpans(sc,1));
1681         nodeName = buffer;
1682         branchSpans.Store(sc,1,nodeName.to_float());
1683         timeSplitsAVL.Insert (nodeName.makeDynamic(),0,false,true);
1684     }
1685 
1686     _Matrix       cotCDFPoints (timeSplitsAVL.countitems(),3,false,true);
1687 
1688     _AssociativeList  * ctl = new _AssociativeList ();
1689 
1690     _SimpleList tcache;
1691 
1692     long        iv,
1693                 k = timeSplitsAVL.Traverser (tcache, iv, timeSplitsAVL.GetRoot());
1694 
1695     for (long pc = 0; k>=0; k = timeSplitsAVL.Traverser (tcache, iv), pc++) {
1696         timeSplitsAVL.SetXtra (k, pc);
1697         cotCDFPoints.Store (pc,0,((_String*)(*((_List*)timeSplitsAVL.dataList))(k))->to_float());
1698     }
1699 
1700 
1701     for (long mxc=0; mxc <= branchCount+leafCount; mxc++) {
1702         hyFloat T0 =  branchSpans(mxc,0),
1703                    T1 =  branchSpans(mxc,1);
1704 
1705         if (mxc<branchCount+leafCount) {
1706             _String nodeName = GetNodeName     ((node<long>*)listOfNodes(mxc));
1707             ctl->MStore (nodeName, new _Constant (T1), false);
1708         }
1709 
1710         char       buffer[256];
1711         snprintf  (buffer, 256, "%.15f", T0);
1712         _String nn = buffer;
1713         tcache.Clear();
1714         long       startingPos =  timeSplitsAVL.Find (&nn,tcache),
1715                    k           = timeSplitsAVL.Next (startingPos,tcache);
1716 
1717         for (long pc = timeSplitsAVL.GetXtra (k); k>=0; k = timeSplitsAVL.Next (k,tcache), pc++) {
1718             hyFloat ub = ((_String*)(*((_List*)timeSplitsAVL.dataList))(k))->to_float();
1719             if (ub < T1 || CheckEqual (T1, ub)) {
1720                 cotCDFPoints.Store (pc,1,cotCDFPoints(pc,1)+1);
1721             } else {
1722                 break;
1723             }
1724         }
1725     }
1726 
1727     for (long mxc=1; mxc<cotCDFPoints.GetHDim() ; mxc++) {
1728         cotCDFPoints.Store (mxc,1,cotCDFPoints(mxc,1)*(cotCDFPoints(mxc,0)-cotCDFPoints(mxc-1,0))/totalTreeLength);
1729         cotCDFPoints.Store (mxc,2,cotCDFPoints(mxc,1)+cotCDFPoints(mxc-1,2));
1730     }
1731     timeSplitsAVL.Clear(true);
1732 
1733 
1734     resList->MStore (kCOTCDF, &cotCDFPoints, true);
1735     resList->MStore (kCOTToNode, ctl, false);
1736 
1737     //  sample  random branch placement
1738     if (totalTreeLength > 0.0) {
1739         hyFloat     sampler = EnvVariableGetNumber(kCOTSamples,0.0);
1740 
1741         tIndex = sampler;
1742         if (tIndex >= 1) {
1743             _Matrix sampledDs  (tIndex, 1, false, true);
1744 
1745             for (long its = 0; its < tIndex; its ++) {
1746                 hyFloat tSample = (genrand_real2 () * totalTreeLength);
1747                 _String nn = tSample;
1748                 long branchIndex = 0;
1749                 if (lengthToIndexMap.FindBest (&nn,branchIndex)<=0) {
1750                     branchIndex --;
1751                 }
1752 
1753                 hyFloat    T         = branchLengths.theData[branchIndex];
1754                 _SimpleList * childLeaves = (_SimpleList*)childLists(branchIndex);
1755 
1756                 hyFloat dTT = 0.0;
1757                 tSample -= ((_String*)(*(_List*)lengthToIndexMap.dataList)(branchIndex))->to_float();
1758 
1759                 long          ci2 = 0;
1760                 for (long ci3 = 0; ci3 < leafCount; ci3++) {
1761                     hyFloat tt = distances(branchIndex,ci3);
1762                     if (ci3 == childLeaves->list_data[ci2]) {
1763                         if (ci2 < childLeaves->lLength-1) {
1764                             ci2++;
1765                         }
1766 
1767                         dTT   += pow(tt+tSample,power);
1768                     } else {
1769                         dTT   += pow(tt+T-tSample,power);
1770                     }
1771                 }
1772 
1773                 sampledDs.Store (its,0,dTT);
1774             }
1775             EnvVariableSet(kCOTSampler, &sampledDs, true);
1776         }
1777 
1778     }
1779     return resList;
1780 }
1781 
1782 //__________________________________________________________________________________
1783 
Compare(HBLObjectRefConst p,HBLObjectRef cache) const1784 _FString*    _TreeTopology::Compare (HBLObjectRefConst p, HBLObjectRef cache) const {
1785 // compare tree topologies
1786     long objClass = p->ObjectClass();
1787 
1788     if (objClass==TREE || objClass==TOPOLOGY) {
1789         _String cmp = CompareTrees ((_TreeTopology*)p);
1790         if (cmp.BeginsWith(kCompareEqualWithReroot)) {
1791             return (_FString*)_returnStringOrUseCache(cmp.Cut(kCompareEqualWithReroot.length() + ((_TreeTopology*)p)->GetName()->length() + 1, cmp.length()-2), cache);
1792         } else if (cmp == kCompareEqualWithoutReroot) {
1793             return (_FString*)_returnStringOrUseCache (_String (' '), cache);
1794         }
1795     }
1796     return (_FString*)_returnStringOrUseCache (kEmptyString, cache);
1797 }
1798 
1799 
1800 //__________________________________________________________________________________
1801 
internalNodeCompare(node<long> * n1,node<long> * n2,_SimpleList & subTreeMap,_SimpleList * reindexer,bool cangoup,long totalSize,node<long> * n22,_TreeTopology const * tree2,bool isPattern) const1802 bool     _TreeTopology::internalNodeCompare (node<long>* n1, node<long>* n2, _SimpleList& subTreeMap, _SimpleList* reindexer, bool cangoup, long totalSize, node<long>* n22, _TreeTopology const* tree2, bool isPattern) const {
1803     // compares whether the nodes create the same subpartition
1804 
1805     // TODO SLKP 20171224: this needs review
1806 
1807     // count the number of children
1808     long  nc1 = n1->get_num_nodes(),
1809           nc2 = n2->get_num_nodes() + (cangoup&&n2->parent) - (n22&&(!n2->parent));
1810 
1811 
1812     //printf ("Comparing nodes %ld %ld\n", nc1, nc2);
1813 
1814     if ( nc1 == nc2 || isPattern && nc2<nc1 ) {
1815         // now see if the descendants in all directions can be matched
1816         // first prepare the list of subtrees in the 2nd tree
1817 
1818         _List           nodeMap;
1819 
1820         _SimpleList*    complement = nil;
1821         _SimpleList     stSizes;
1822 
1823         if ((cangoup||n22)&&n2->parent) {
1824             complement = new _SimpleList ((unsigned long)totalSize, 1L, 0L);
1825         }
1826 
1827         nc2 = n2->get_num_nodes();
1828 
1829         long  skipped_child = -1,
1830         complementCorrection = 0;
1831 
1832 
1833         for (long k=1; k <= nc2; k++) {
1834             node<long>* kth_child = n2->go_down(k);
1835             _SimpleList * childLeaves   = (_SimpleList*)kth_child->in_object;
1836             if (kth_child == n22) {
1837                 if (complement) {
1838                     if (reindexer) {
1839                          childLeaves->Each ([&] (long value, unsigned long index) -> void {
1840                             long lookup = reindexer->get (value);
1841                             if (lookup >= 0) {
1842                                 (*complement) [lookup] = 0L;
1843                             } else {
1844                                 complementCorrection++;
1845                             }
1846                         });
1847                    }
1848                    else {
1849                         childLeaves->Each ([&] (long value, unsigned long index) -> void {
1850                             (*complement) [value] = 0L;
1851                         });
1852                    }
1853                 }
1854 
1855                 skipped_child = k-1;
1856             } else {
1857                 _SimpleList * all_descendants = new _SimpleList ((unsigned long)totalSize, 0L, 0L);
1858 
1859                 try {
1860                     childLeaves->Each ([&] (long value, unsigned long) -> void {
1861                         long lookup = reindexer ? reindexer->get (value) : value;
1862                         if (lookup >= 0L) {
1863                             (*all_descendants) [lookup] = 1L;
1864                             if (complement) {
1865                                 (*complement) [lookup] = 0L;
1866                             }
1867                         } else {
1868                             throw 0;
1869                         }
1870                     });
1871                 } catch (int e) {
1872                     if (complement) delete complement;
1873                     delete all_descendants;
1874                     return 0;
1875                 }
1876 
1877 
1878                 stSizes << childLeaves->countitems();
1879                 nodeMap < all_descendants;
1880             }
1881         }
1882 
1883         if (complement) {
1884             nodeMap < complement;
1885             stSizes << totalSize;
1886 
1887             for (long k4 = 0; k4 < stSizes.lLength-1; k4++) {
1888                 stSizes.list_data[stSizes.lLength-1] -= stSizes.list_data[k4];
1889             }
1890 
1891             if (n22) {
1892                 stSizes.list_data[stSizes.lLength-1] -= ((_SimpleList*)n22->in_object)->lLength-complementCorrection;
1893             }
1894         }
1895 
1896         // now go through the subtrees of the first tree and match up (if we can)
1897         // with the 2nd 1
1898 
1899         _SimpleList     unmatchedPatterns;
1900 
1901         for (long k5=1; k5 <= nc1; k5++) {
1902             _SimpleList * childNodes = (_SimpleList*) n1->go_down(k5)->in_object;
1903             long k6;
1904             for (k6=0; k6 < stSizes.lLength; k6++)
1905                 if (stSizes.list_data[k6] == childNodes->lLength)
1906                     // potential subtree match
1907                 {
1908                     _SimpleList* potMap = (_SimpleList*)nodeMap(k6);
1909                     long k7;
1910                     for (k7=0; k7<childNodes->lLength; k7++)
1911                         if (potMap->list_data[childNodes->list_data[k7]] == 0) {
1912                             break;
1913                         }
1914 
1915                     if (k7 == childNodes->lLength) {
1916                         stSizes.list_data[k6] = -1;
1917 
1918                         if (complement && (k6 == stSizes.lLength-1)) {
1919                             subTreeMap << -1;
1920                         } else {
1921                             if (n22&&(k6>=skipped_child)) {
1922                                 subTreeMap << k6+1;
1923                             } else {
1924                                 subTreeMap << k6;
1925                             }
1926                         }
1927                         break;
1928                     }
1929                 }
1930 
1931             if (k6 == stSizes.lLength) {
1932                 if (isPattern) {
1933                     unmatchedPatterns << k5;
1934                     subTreeMap << -2;
1935                 } else {
1936                     return 0;
1937                 }
1938             }
1939         }
1940 
1941         if (isPattern&&unmatchedPatterns.lLength) {
1942             _SimpleList rematchedPatterns;
1943             for (long k7 = 0; k7 < unmatchedPatterns.lLength; k7++) {
1944                 rematchedPatterns << -1;
1945             }
1946 
1947             for (long k8 = 0; k8 < stSizes.lLength; k8++) {
1948                 if (stSizes.list_data[k8]>0) {
1949                     _SimpleList* potMap = (_SimpleList*)nodeMap(k8);
1950                     for (long k9 = 0; k9 < unmatchedPatterns.lLength; k9++) {
1951                         if (rematchedPatterns.list_data[k9] < 0) {
1952                             _SimpleList * childNodes = (_SimpleList*) n1->go_down(unmatchedPatterns.list_data[k9])->in_object;
1953                             long k10 = 0;
1954                             for (; k10 < childNodes->lLength; k10++)
1955                                 if (potMap->list_data[childNodes->list_data[k10]] == 0) {
1956                                     break;
1957                                 }
1958                             if (k10 == childNodes->lLength) {
1959                                 rematchedPatterns.list_data[k9] = k8;
1960                                 if (complement && (k8 == stSizes.lLength-1)) {
1961                                     subTreeMap.list_data [unmatchedPatterns.list_data[k9]-1] = -(0xfffffff);
1962                                 } else {
1963                                     if (n22&&(k8>=skipped_child)) {
1964                                         subTreeMap.list_data [unmatchedPatterns.list_data[k9]-1] = -k8-3;
1965                                     } else {
1966                                         subTreeMap.list_data [unmatchedPatterns.list_data[k9]-1] = -k8-2;
1967                                     }
1968                                 }
1969                             }
1970                         }
1971                     }
1972                 } else {
1973                     stSizes.list_data[k8] = 0;
1974                 }
1975             }
1976 
1977             if (rematchedPatterns.Find(-1)>=0) {
1978                 return 0;
1979             }
1980 
1981             for (long k11 = 0; k11 < rematchedPatterns.lLength; k11++) {
1982                 stSizes.list_data[rematchedPatterns.list_data[k11]] -= ((_SimpleList*) n1->go_down(unmatchedPatterns.list_data[k11])->in_object)->lLength;
1983             }
1984 
1985             for (long k12 = 0; k12 < stSizes.lLength; k12++)
1986                 if (stSizes.list_data[k12]) {
1987                     return 0;
1988                 }
1989         }
1990         return 1;
1991     }
1992 
1993     return 0;
1994 }
1995 
1996 
1997 //long   itcCount = 0;
1998 //__________________________________________________________________________________
1999 
internalTreeCompare(node<long> * n1,node<long> * n2,_SimpleList * reindexer,char compMode,long totalSize,node<long> * n22,_TreeTopology const * tree2,bool isPattern) const2000 char     _TreeTopology::internalTreeCompare (node<long>* n1, node<long>* n2, _SimpleList* reindexer, char compMode, long totalSize, node<long>* n22, _TreeTopology const* tree2, bool isPattern) const
2001 // compare tree topologies
2002 // return values mean
2003 {
2004     if (n1->get_num_nodes() == 0) {
2005         return 1;
2006     } else {
2007         _SimpleList mapper;
2008         if (internalNodeCompare (n1,n2, mapper, reindexer, compMode, totalSize, n22, tree2, isPattern)) {
2009             long nc1 = n1->get_num_nodes();
2010 
2011             _SimpleList     furtherMatchedPatterns;
2012             _List           patternList;
2013 
2014             for (long k1=0; k1 < nc1; k1++) {
2015                 if (mapper.list_data[k1]>=0) {
2016                     if (internalTreeCompare (n1->go_down(k1+1),n2->go_down(mapper.list_data[k1]+1), reindexer, 0, totalSize, nil, tree2, isPattern)<1) {
2017                         return -1;
2018                     }
2019                 } else if (mapper.list_data[k1] == -1) {
2020                     if (internalTreeCompare (n1->go_down(k1+1),n2->parent, reindexer, 0, totalSize, n2, tree2, isPattern)<1) {
2021                         return -1;
2022                     }
2023                 } else {
2024                     long idx = -mapper.list_data[k1]-2,
2025                          k;
2026 
2027                     _SimpleList * patched;
2028                     if ((k=furtherMatchedPatterns.Find(idx))<0) {
2029                         k = furtherMatchedPatterns.lLength;
2030                         furtherMatchedPatterns << idx;
2031                         patternList < new _SimpleList;
2032                     }
2033 
2034                     patched = (_SimpleList*)patternList(k);
2035                     (*patched) << k1;
2036                 }
2037             }
2038             for (long k2=0; k2 < furtherMatchedPatterns.lLength; k2++) {
2039                 node <long>* dummy = new node<long>;
2040                 dummy->parent = n1->parent;
2041                 _SimpleList * children = (_SimpleList*)patternList (k2),
2042                               * newLeaves = new _SimpleList;
2043 
2044                 dummy->in_object = (long)newLeaves;
2045 
2046 
2047                 for (long k3 = 0; k3 < children->lLength; k3++) {
2048                     node<long>* aChild = n1->go_down(children->list_data[k3]+1);
2049                     dummy->add_node(*aChild);
2050                     _SimpleList  t;
2051                     t.Union (*newLeaves, *(_SimpleList*)aChild->in_object);
2052                     newLeaves->Clear();
2053                     newLeaves->Duplicate(&t);
2054                 }
2055 
2056 
2057                 long k4 = furtherMatchedPatterns.list_data[k2];
2058                 char res = 1;
2059 
2060                 if (newLeaves->lLength > 1) {
2061                     if (k4<n2->get_num_nodes()) {
2062                         res = internalTreeCompare(dummy, n2->go_down(k4+1),  reindexer, 0, totalSize, nil, tree2, isPattern);
2063                     } else {
2064                         res = internalTreeCompare (dummy,n2->parent, reindexer, 0, totalSize, n2, tree2, isPattern);
2065                     }
2066                 }
2067 
2068                 DeleteObject (newLeaves);
2069                 delete (dummy);
2070 
2071                 if (res<1) {
2072                     return -1;
2073                 }
2074 
2075             }
2076         } else {
2077             return 0;
2078         }
2079     }
2080     return 1;
2081 }
2082 
2083 
2084 //__________________________________________________________________________________
EdgeCount(long & leaves,long & internals) const2085 void _TreeTopology::EdgeCount (long& leaves, long& internals) const {
2086     leaves    = 0L;
2087     internals = 0L;
2088 
2089     node_iterator<long> ni (theRoot, _HY_TREE_TRAVERSAL_POSTORDER);
2090     while (node<long>* iterator = ni.Next()) {
2091         if (iterator->is_leaf()) {
2092             leaves ++;
2093         } else {
2094             internals ++;
2095         }
2096     }
2097 
2098 }
2099 
2100 
2101 //__________________________________________________________________________________
TipCount(HBLObjectRef cache)2102 HBLObjectRef _TreeTopology::TipCount (HBLObjectRef cache) {
2103     long leaves, ints;
2104     EdgeCount (leaves, ints);
2105     return _returnConstantOrUseCache(leaves, cache);
2106 }
2107 
2108 //__________________________________________________________________________________
BranchCount(HBLObjectRef cache)2109 HBLObjectRef _TreeTopology::BranchCount (HBLObjectRef cache) {
2110     long leaves, ints;
2111     EdgeCount (leaves, ints);
2112     return _returnConstantOrUseCache(ints-1., cache);
2113 }
2114 
2115 //__________________________________________________________________________________
FlatRepresentation(HBLObjectRef cache)2116 HBLObjectRef _TreeTopology::FlatRepresentation (HBLObjectRef cache) {
2117     _SimpleList     flatTree;
2118 
2119     unsigned long      count = 0UL;
2120     node_iterator<long> ni (theRoot, _HY_TREE_TRAVERSAL_POSTORDER);
2121 
2122     while     (node<long>* iterator = ni.Next()) {
2123         flatTree << iterator->in_object;
2124         iterator->in_object = count++;
2125     }
2126 
2127 
2128     _Matrix * res = (_Matrix*)_returnMatrixOrUseCache(1,count, _NUMERICAL_TYPE, false, cache);
2129 
2130     ni.Reset (theRoot);
2131     count = 0UL;
2132 
2133     while     (node<long>* iterator = ni.Next()) {
2134         if (iterator->is_root()) {
2135            res->theData[count] = -1;
2136         } else {
2137            res->theData[count] = iterator->parent->in_object;
2138         }
2139 
2140         iterator->in_object = flatTree.list_data[count++];
2141     }
2142     return res;
2143 }
2144 
2145 //__________________________________________________________________________________
AVLRepresentation(HBLObjectRef layoutOption,HBLObjectRef cache)2146 HBLObjectRef _TreeTopology::AVLRepresentation (HBLObjectRef layoutOption, HBLObjectRef cache) {
2147 
2148     if (layoutOption->ObjectClass () == NUMBER) {
2149         bool               preOrder = layoutOption->Compute()->Value()>0.5;
2150 
2151         _AssociativeList * masterList = new _AssociativeList ();
2152         //             arrayKey;
2153 
2154 
2155         long         rootIndex = 0;
2156 
2157         _SimpleList  nodeList;
2158         _AVLListX    nodeIndexList (&nodeList);
2159 
2160         node_iterator<long> ni (theRoot, preOrder ? _HY_TREE_TRAVERSAL_PREORDER : _HY_TREE_TRAVERSAL_POSTORDER);
2161 
2162         while     (node<long>* iterator = ni.Next()) {
2163             nodeIndexList.Insert ((BaseObj*)iterator, nodeIndexList.countitems()+1L);
2164 
2165             if (iterator->is_root()) {
2166                 rootIndex = nodeIndexList.countitems();
2167             }
2168          }
2169 
2170         ni.Reset (theRoot);
2171 
2172         while     (node<long>* iterator = ni.Next()) {
2173             _AssociativeList * nodeList = new _AssociativeList ();
2174             nodeList->MStore ("Name", new _FString (GetNodeName (iterator)), false);
2175             nodeList->MStore ("Length", new _Constant (GetBranchLength (iterator)), false);
2176             nodeList->MStore ("Depth", new _Constant (ni.Level()), false);
2177             if (! iterator->is_root()) {
2178                 nodeList->MStore ("Parent", new _Constant(nodeIndexList.GetXtra (nodeIndexList.Find((BaseObj*)iterator->parent))), false);
2179             }
2180 
2181             long nCount = iterator->get_num_nodes();
2182             if (nCount) {
2183                 _AssociativeList * childList = new _AssociativeList ();
2184                 for (long k = 1; k<=nCount; k++ ) {
2185                     childList->MStore (_String((long)(k-1)),new _Constant(nodeIndexList.GetXtra (nodeIndexList.Find((BaseObj*)iterator->go_down(k)))) , false);
2186                 }
2187                 nodeList->MStore ("Children", childList, false);
2188             }
2189             masterList->MStore (_String((long)nodeIndexList.GetXtra (nodeIndexList.Find((BaseObj*)iterator))), nodeList, false);
2190         }
2191 
2192         _AssociativeList * headerList = new _AssociativeList ();
2193 
2194         headerList->MStore ("Name", new _FString (*GetName()), false);
2195         headerList->MStore ("Root", new _Constant(rootIndex), false);
2196         masterList->MStore ("0", headerList, false);
2197 
2198         return masterList;
2199     }
2200     return new _MathObject;
2201 }
2202 
2203 //__________________________________________________________________________________
TipName(HBLObjectRef p,HBLObjectRef cache)2204 HBLObjectRef _TreeTopology::TipName (HBLObjectRef p, HBLObjectRef cache) {
2205 
2206     if (p&& p->ObjectClass()==NUMBER) {
2207         long tip_index        = p->Value(),
2208              count            = -1L;
2209 
2210         if (tip_index < 0L) {
2211           return new _Matrix (RetrieveNodeNames(true, false, _HY_TREE_TRAVERSAL_POSTORDER));
2212         }
2213 
2214         node_iterator<long> ni (theRoot, _HY_TREE_TRAVERSAL_POSTORDER);
2215         while (node<long>* iterator = ni.Next()) {
2216           if (iterator->is_leaf()) {
2217             count++;
2218             if (count == tip_index) {
2219               return _returnStringOrUseCache (GetNodeName (iterator), cache);
2220             }
2221           }
2222         }
2223     }
2224     return _returnStringOrUseCache(kEmptyString, cache);
2225 }
2226 
2227 //__________________________________________________________________________________
2228 
_recurse_and_reshuffle(node<long> * root,long & from,long & to,long & leaf_index,_SimpleList & leaf_order,_TreeTopology const & T,hyFloat def_rate,_AssociativeList * node_rates)2229 bool _recurse_and_reshuffle (node<long>* root, long& from, long &to, long &leaf_index, _SimpleList& leaf_order, _TreeTopology const& T, hyFloat def_rate, _AssociativeList* node_rates) {
2230     int children = root->get_num_nodes();
2231     if (children) {
2232 
2233         hyFloat shuffle_rate = def_rate;
2234 
2235         if (node_rates) {
2236             try {
2237                 shuffle_rate = node_rates->GetNumberByKey(T.GetNodeName(root));
2238             } catch (const _String& e) {
2239             }
2240         }
2241 
2242         long my_start = leaf_index;
2243         for (int k = 1; k <= root->get_num_nodes(); k ++) {
2244             long start, end;
2245             node<long>* kth_child = root->go_down (k);
2246             if (_recurse_and_reshuffle (kth_child, start, end, leaf_index, leaf_order,T, def_rate, node_rates)) {
2247                 // reshuffled this child, lock the leaves
2248                 for (long element = start; element <= end; element ++) {
2249                     if (leaf_order.get (element) >= 0) {
2250                         leaf_order[element] = -leaf_order.get (element) - 1;
2251                     }
2252                 }
2253             }
2254         }
2255         from = my_start;
2256         to = leaf_index-1;
2257 
2258         if (to-from >= 2 && genrand_real1() <= shuffle_rate) { // not a cherry and selected
2259             //printf ("Shuffling at %s with rate %g\n", T.GetNodeName(root).get_str(), shuffle_rate);
2260             _SimpleList reshuffled_subset;
2261             for (long element = from; element <= to; element ++) {
2262                 if (leaf_order.get (element) >= 0L) {
2263                     reshuffled_subset << leaf_order.get (element);
2264                 }
2265             }
2266             reshuffled_subset.Permute(1L);
2267             long hits = 0;
2268             for (long element = from; element <= to; element ++) {
2269                 if (leaf_order.get (element) >= 0L) {
2270                     leaf_order[element] = reshuffled_subset.get (hits++);
2271                 }
2272             }
2273             return true;
2274         }
2275         return false;
2276     }
2277     leaf_index ++;
2278     return false;
2279 }
2280 //__________________________________________________________________________________
RandomizeTips(HBLObjectRef rate,HBLObjectRef cache)2281 HBLObjectRef _TreeTopology::RandomizeTips (HBLObjectRef rate, HBLObjectRef cache) {
2282     _TreeTopology * reshuffled = nil;
2283 
2284     try {
2285         if (CheckArgumentType (rate, NUMBER | ASSOCIATIVE_LIST, true)) {
2286 
2287             hyFloat default_shuffle_rate = 0.1;
2288             _AssociativeList* node_level_shuffle_rates = nil;
2289             if (rate->ObjectClass() == NUMBER) {
2290                 default_shuffle_rate = rate->Compute()->Value();
2291             } else {
2292                 node_level_shuffle_rates = (_AssociativeList*)rate;
2293                 try {
2294                     default_shuffle_rate = node_level_shuffle_rates->GetNumberByKey("default");
2295                 } catch (const _String& err) { // no default shuffle rate
2296 
2297                 }
2298             }
2299 
2300 
2301 
2302 
2303             if (CheckRange (default_shuffle_rate, 0, 1, true)) {
2304                 reshuffled = new _TreeTopology (*this);
2305 
2306                 long leaves, ints;
2307                 EdgeCount (leaves, ints);
2308 
2309                 _SimpleList leaf_order;
2310 
2311                 /* this will store the reshuffled order of leaves
2312                  leaf_order[i] = the index of the original leaf i (in post-order traversal) in the reshuffled tree
2313                  a negative value indicates that the particular leaf has been permuted already, and is locked for subsequent permutations
2314                  */
2315 
2316                 node_iterator<long> ni (reshuffled->theRoot, _HY_TREE_TRAVERSAL_POSTORDER);
2317                 _Vector node_shuffle_rates;
2318 
2319                 while (node<long>* iterator = ni.Next()) {
2320                     if (iterator->is_leaf()) {
2321                         leaf_order << iterator->in_object;
2322                     }
2323                 }
2324 
2325                 //node_shuffle_rates.Trim();
2326                 //printf ("%s\n", _String((_String*)node_shuffle_rates.toStr()).get_str());
2327 
2328                 long leaf_index = 0;
2329                 long f,t;
2330                 _recurse_and_reshuffle (&reshuffled->GetRoot(),f,t,leaf_index,leaf_order,*this,default_shuffle_rate,node_level_shuffle_rates);
2331                 ni.Reset(reshuffled->theRoot);
2332                 f = 0;
2333                 //node_iterator<long> ni (theRoot, _HY_TREE_TRAVERSAL_POSTORDER);
2334                 while (node<long>* iterator = ni.Next()) {
2335                     if (iterator->is_leaf()){
2336                         iterator->in_object = leaf_order.get (f++);
2337                         if (iterator->in_object < 0) {
2338                             iterator->in_object = -iterator->in_object - 1;
2339                         }
2340                     }
2341                 }
2342 
2343 
2344 
2345                 return reshuffled;
2346             }
2347         }
2348 
2349     } catch (const _String& e) {
2350         HandleApplicationError(e);
2351     }
2352     return new _MathObject;
2353 }
2354 
2355 
2356 //__________________________________________________________________________________
BranchLength(HBLObjectRef p,HBLObjectRef cache)2357 HBLObjectRef _TreeTopology::BranchLength (HBLObjectRef p, HBLObjectRef cache) {
2358   hyFloat branch_length = HY_INVALID_RETURN_VALUE;
2359 
2360   if (p) {
2361     node_iterator<long> ni (theRoot, _HY_TREE_TRAVERSAL_POSTORDER);
2362     if (p->ObjectClass()==NUMBER) {
2363       long res        = p->Value();
2364 
2365       if (res < 0L) {
2366           // get ALL branch lengths
2367         _Vector * branch_lengths = new _Vector;
2368 
2369         bool two = IsDegenerate();
2370 
2371         while (node<long>* iterator = ni.Next()) {
2372           if (!iterator->is_root() || two){
2373             branch_lengths->Store(GetBranchLength (iterator));
2374           }
2375         }
2376         branch_lengths->Store (0.0); // backward compatibility with storing 0 at the root
2377         branch_lengths->Trim();
2378         branch_lengths->Transpose();
2379         return branch_lengths;
2380       } else {
2381           // get a specific branch length
2382         long count = -1L;
2383         while (node<long>* iterator = ni.Next()) {
2384           if (!iterator->is_root()) {
2385             if (++count == res) {
2386               branch_length = GetBranchLength(iterator);
2387               break;
2388             }
2389           }
2390         }
2391       }
2392     } else {
2393       if (p->ObjectClass()==STRING) {
2394         _List twoIDs = ((_FString*)p->Compute())->get_str().Tokenize(";");
2395 
2396         if (twoIDs.countitems() == 2UL || twoIDs.countitems() == 1UL) {
2397 
2398           _String * nodes[2] = {(_String*)twoIDs.GetItem (0L),
2399                                  twoIDs.countitems()>1UL?(_String*)twoIDs.GetItem (1L):nil};
2400 
2401 
2402           node<long>* node_objects[2] = {nil,nil};
2403           long levels[2] = {0L,0L};
2404 
2405 
2406           while (node<long>* iterator = ni.Next()) {
2407             _String nn = GetNodeName(iterator);
2408             for (unsigned long i = 0L; i < 2UL; i++) {
2409               if (nodes[i] && nn == *nodes[i]) {
2410                 node_objects[i] = iterator;
2411                 levels[i] = ni.Level();
2412                 if (node_objects[1-i]) {
2413                   break;
2414                 }
2415               }
2416             }
2417           }
2418 
2419           if (node_objects[0] && node_objects[1]) {
2420             branch_length = 0.;
2421 
2422             for (long i = 0L; i < 2L; i++) { // walk up to the same depth
2423               while (levels[1-i] < levels[i]) {
2424                 branch_length      += GetBranchLength(node_objects[i]);
2425                 node_objects[i] = node_objects[i]->get_parent();
2426                 levels[i]--;
2427               }
2428             }
2429 
2430             while (node_objects[0] != node_objects[1]) {
2431               branch_length += GetBranchLength (node_objects[0]) + GetBranchLength (node_objects[1]);
2432               node_objects[0] = node_objects[0]->parent;
2433               node_objects[1] = node_objects[1]->parent;
2434             }
2435           } else if (node_objects[0]) {
2436             if (nodes[1]) {
2437               if (*nodes[0] == *nodes[1]) {
2438                 branch_length = 0.0;
2439               } else if (*nodes[1] == kExpectedNumberOfSubstitutions) {
2440                 _String bl (GetBranchLengthString (node_objects[0], true));
2441                 if (bl.nonempty()) {
2442                   return new _FString (bl);
2443                 }
2444               }
2445             } else {
2446               branch_length = GetBranchLength(node_objects[0]);
2447             }
2448           }
2449         }
2450       }
2451     }
2452   }
2453 
2454   if (isnan (branch_length)) {
2455     return new _MathObject ();
2456   }
2457 
2458   return _returnConstantOrUseCache(branch_length, cache);
2459 }
2460 
2461 //__________________________________________________________________________________
2462 
TreeBranchName(HBLObjectRef node_ref,bool get_subtree,HBLObjectRef mapping_mode,HBLObjectRef cache)2463 HBLObjectRef _TreeTopology::TreeBranchName (HBLObjectRef node_ref, bool get_subtree, HBLObjectRef mapping_mode, HBLObjectRef cache) {
2464   _StringBuffer branch_name;
2465 
2466 
2467   if (node_ref) {
2468     if (node_ref->ObjectClass()==NUMBER) { // get by index
2469 
2470       long argument      = node_ref->Value(),
2471            count         = -1L;
2472 
2473       if (argument>=0L) { // get a specific internal node name/subtree
2474 
2475         node<long>* ith_internal_node = ConditionalTraverser (
2476               [&] (node <long>* iterator, node_iterator<long>& ni) -> bool {
2477                   if (iterator->is_leaf () == false) {
2478                       return (++count == argument);
2479                   }
2480                   return false;
2481               },
2482         false);
2483 
2484         if (!ith_internal_node) {
2485           ith_internal_node = theRoot;
2486         }
2487 
2488         if (ith_internal_node) {
2489             if (get_subtree) {
2490               hyTopologyBranchLengthMode  mapMode  = kTopologyBranchLengthNone;
2491               if (mapping_mode) {
2492                 _String * t = (_String*)mapping_mode->Compute()->toStr();
2493                 DetermineBranchLengthMappingMode (t,mapMode);
2494                 DeleteObject (t);
2495               }
2496               SubTreeString         (ith_internal_node, branch_name,CollectParseSettings(), true,mapMode);
2497             } else {
2498               branch_name = GetNodeName (ith_internal_node);
2499             }
2500         }
2501       } else {
2502         _List branch_lengths;
2503          node_iterator<long> ni (theRoot, _HY_TREE_TRAVERSAL_POSTORDER);
2504          while (node<long>* iterator = ni.Next()) {
2505            branch_lengths.AppendNewInstance(new _String (GetNodeName (iterator)));
2506          }
2507         return new _Matrix (branch_lengths);
2508       }
2509     } else {
2510       if (node_ref->ObjectClass()==STRING) {
2511         _List twoIDs = ((_FString*)node_ref->Compute())->get_str().Tokenize(";");
2512 
2513         if (twoIDs.countitems() == 2UL || twoIDs.countitems() == 1UL) {
2514 
2515           _String * nodes[2] = {(_String*) twoIDs.GetItem(0),
2516                                 (_String*)(twoIDs.countitems() > 1L?twoIDs.GetItem(1):nil)};
2517 
2518 
2519 
2520           if (twoIDs.countitems() == 1UL) {
2521             _AssociativeList * resList = new _AssociativeList;
2522             long              masterLevel = 0L;
2523 
2524             node_iterator<long> ni (theRoot, _HY_TREE_TRAVERSAL_PREORDER);
2525             while (node<long>* iterator = ni.Next()) {
2526               _String node_name = GetNodeName   (iterator);
2527               if (node_name == *nodes[0]) {
2528                 masterLevel = ni.Level();
2529                   //resList->MStore(node_name,new _Constant (iterator->get_num_nodes()));
2530                 do {
2531                   // 20151203: SLKP this will store the root name twice; commented the line above
2532                   resList->MStore(GetNodeName   (iterator),new _Constant (1L+(iterator->get_num_nodes()>0L)), false);
2533                   iterator = ni.Next();
2534                 } while (iterator && ni.Level() > masterLevel);
2535                 break;
2536               }
2537             }
2538             if (resList->countitems() == 0L) {
2539               // did not find the target node
2540               DeleteObject (resList);
2541               return new _MathObject;
2542             }
2543             return resList;
2544           } else {
2545               // this returns the sequence of nodes between node 1 and node 2
2546             node<long>* node_objects[2]= {nil, nil};
2547             long levels[2] = {0L, 0L};
2548 
2549             node_iterator<long> ni (theRoot, _HY_TREE_TRAVERSAL_POSTORDER);
2550 
2551             while (node<long>* iterator = ni.Next()) {
2552               _String nn = GetNodeName(iterator);
2553               for (long i = 0L; i < 2L; i++) {
2554                 if (nodes[i] && nn == *nodes[i]) {
2555                   node_objects[i] = iterator;
2556                   levels[i] = ni.Level();
2557                   if (node_objects[1-i]) {
2558                     break;
2559                   }
2560                 }
2561               }
2562             }
2563 
2564             if (node_objects [0] && node_objects [1]) {
2565               _List partial_paths [2];
2566 
2567               for (long i = 0L; i < 2L; i++) { // walk up to the same depth
2568                 while (levels[1-i] < levels[i]) {
2569                   partial_paths[i] < new _String (GetNodeName(node_objects[i]));
2570                   node_objects[i] = node_objects[i]->get_parent();
2571                   levels[i]--;
2572                 }
2573               }
2574 
2575               while (node_objects[0] != node_objects[1]) {
2576                 for (long i = 0L; i < 2L; i++) {
2577                   partial_paths[i] < new _String (GetNodeName(node_objects[i]));
2578                   node_objects[i] = node_objects[i]->parent;
2579                }
2580              }
2581               partial_paths[1].Flip();
2582               partial_paths[0] << partial_paths[1];
2583               return new _Matrix(partial_paths[0]);
2584             } else if (node_objects[0]) {
2585               return new _Matrix();
2586             }
2587             return new _MathObject();
2588           }
2589         }
2590       }
2591     }
2592   }
2593   //return new _FString (branch_name, false);
2594   return _returnStringOrUseCache(branch_name, cache);
2595 }
2596 
2597   //__________________________________________________________________________________
SubTreeString(node<long> * root,_StringBuffer & result,_TreeTopologyParseSettings const & settings,bool all_names,hyTopologyBranchLengthMode mode,long branch_length_variable,_AVLListXL * substitutions) const2598 void _TreeTopology::SubTreeString (node<long>* root, _StringBuffer &result, _TreeTopologyParseSettings const& settings, bool all_names, hyTopologyBranchLengthMode mode, long branch_length_variable, _AVLListXL* substitutions) const {
2599 
2600 
2601   auto handle_node = [&] (node<long>*  iterator, bool is_degenerate) -> void {
2602     _String node_name = GetNodeName (iterator);
2603       if (substitutions) {
2604             if (BaseRefConst replacement = substitutions->GetDataByKey(&node_name)) {
2605                 node_name = *(_String*)replacement;
2606             }
2607         }
2608 
2609         if (is_degenerate || !iterator->is_root()) {
2610           if (all_names || !node_name.BeginsWith(settings.inode_prefix)) {
2611             result << node_name;
2612           }
2613           PasteBranchLength (iterator,result,mode,branch_length_variable);
2614         }
2615   };
2616 
2617   if (root->parent == NULL && IsDegenerate()) {
2618     node_iterator<long> ni (root, _HY_TREE_TRAVERSAL_POSTORDER);
2619     result << '(';
2620 
2621     while (node<long>* iterator = ni.Next()) {
2622         handle_node (iterator, true);
2623         result << (iterator->is_root() ? ')' : ',');
2624     }
2625   } else {
2626       long    last_level        = 0L;
2627 
2628       node_iterator<long> ni (root, _HY_TREE_TRAVERSAL_POSTORDER);
2629 
2630       while (node <long>* iterator = ni.Next()) {
2631 
2632         if (ni.Level() > last_level) {
2633           if (last_level) {
2634             result<<',';
2635           }
2636           result.AppendNCopies('(', ni.Level()-last_level);
2637         } else if (ni.Level() < last_level) {
2638           result.AppendNCopies(')', last_level-ni.Level());
2639         } else if (last_level) {
2640           result<<',';
2641         }
2642 
2643         last_level = ni.Level();
2644         handle_node (iterator, false);
2645       }
2646   }
2647 }
2648 
2649 
2650 //__________________________________________________________________________________
RerootTreeInternalTraverser(node<long> * iterator,long originator,bool passedRoot,_StringBuffer & res,_TreeTopologyParseSettings const & settings,hyTopologyBranchLengthMode branch_length_mode,long variable_ref,bool first_time) const2651 void _TreeTopology::RerootTreeInternalTraverser (node<long>* iterator, long originator, bool passedRoot, _StringBuffer &res, _TreeTopologyParseSettings const& settings, hyTopologyBranchLengthMode branch_length_mode, long variable_ref, bool first_time) const {
2652 
2653   //printf ("[RerootTreeInternalTraverser]%s %ld %ld\n", GetNodeName(iterator).sData, originator, passedRoot);
2654 
2655     if (passedRoot) {
2656         SubTreeString (iterator, res, settings, false, branch_length_mode, variable_ref);
2657     } else {
2658         // move to parent now
2659         node<long>*     iterator_parent = iterator->get_parent();
2660 
2661         /*
2662         StringToConsole(GetNodeName(iterator)); NLToConsole();
2663         if (iterator_parent) {
2664             StringToConsole(GetNodeName(iterator_parent)); NLToConsole();
2665         }
2666         */
2667 
2668         if (iterator_parent) { // not root yet
2669             bool is_root_next = iterator_parent->get_parent() == NULL;
2670             if (!is_root_next) {
2671                 res<<'(';
2672             }
2673             long the_index_of_this_child = iterator->get_child_num();
2674             RerootTreeInternalTraverser (iterator_parent, the_index_of_this_child ,false,res,settings,branch_length_mode,variable_ref,first_time);
2675 
2676             if (iterator_parent->get_parent()) {
2677 
2678               for (long i = 1L; i<=iterator_parent->get_num_nodes(); i++) {
2679                   if (i!=the_index_of_this_child) {
2680                     res<<',';
2681                     SubTreeString (iterator_parent->go_down(i),res, settings, false, branch_length_mode,variable_ref);
2682                   }
2683               }
2684              }
2685 
2686             if (!is_root_next) {
2687                 res<<')';
2688 
2689                 if (!first_time) {
2690                   _String node_name = GetNodeName (iterator);
2691                   if (!node_name.BeginsWith(settings.inode_prefix)) {
2692                     res<<node_name;
2693                   }
2694                 }
2695                 PasteBranchLength (iterator,res,branch_length_mode, variable_ref);
2696             }
2697         } else {
2698             /* passing old root
2699                create a new root with >=2 children nodes - this node,
2700                and one more containing all other children (>=2 of them)
2701             */
2702 
2703             long count               = 0L,
2704                  root_children_count = theRoot->get_num_nodes();
2705 
2706             if (root_children_count > 2) {
2707                 res << '(';
2708             }
2709 
2710             node<long>* stash_originator = nil;
2711 
2712             for (long k = 1; k<=theRoot->get_num_nodes(); k++) {
2713                 if (k==originator) {
2714                     stash_originator = theRoot->go_down(k);
2715                     continue;
2716                 }
2717                 if (count) {
2718                     res<<',';
2719                 }
2720                 count++;
2721                 SubTreeString (theRoot->go_down(k), res,settings, false, branch_length_mode,variable_ref);
2722             }
2723 
2724             if (!stash_originator) {
2725               HandleApplicationError ("Internal error in RerootTreeInternalTraverser");
2726               return;
2727             }
2728 
2729             if (root_children_count > 2) {
2730                 res<<')';
2731             }
2732 
2733             PasteBranchLength (stash_originator,res, branch_length_mode, variable_ref);
2734         }
2735     }
2736 }
2737 
2738 
2739 //__________________________________________________________________________________
PasteBranchLength(node<long> * node,_StringBuffer & result,hyTopologyBranchLengthMode const mode,long variable_reference,hyFloat factor) const2740 void            _TreeTopology::PasteBranchLength (node<long>* node, _StringBuffer& result,
2741                                                   hyTopologyBranchLengthMode const mode, long variable_reference , hyFloat factor) const {
2742 
2743     auto decorate_string = [&] (_String const& value) -> void {
2744         if (value.nonempty()) {
2745             result << ':' << _String (value.to_float()*factor);
2746         }
2747     };
2748 
2749     //printf ("_TreeTopology::PasteBranchLength %d %g %s\n", mode, GetNodeName(node).get_str(), GetBranchLength(node));
2750 
2751     switch (mode) {
2752         case kTopologyBranchLengthExpectedSubs: {
2753             result << ':' << _String(GetBranchLength (node)*factor);
2754             break;
2755         }
2756         case kTopologyBranchLengthUserLengths: {
2757             decorate_string (GetBranchValue (node));
2758             break;
2759         }
2760         case kTopologyBranchLengthLocalParameter: {
2761             decorate_string (GetBranchVarValue (node, variable_reference));
2762         }
2763     }
2764 }
2765 
2766 //__________________________________________________________________________________
GetBranchLengthString(node<long> * n,bool get_expression) const2767 const _String            _TreeTopology::GetBranchLengthString (node<long> * n, bool get_expression) const {
2768     if (get_expression) {
2769         return kEmptyString;
2770     } else {
2771         return compExp->theData[n->in_object];
2772     }
2773 }
2774 
2775 //__________________________________________________________________________________
GetBranchLength(node<long> * n) const2776 hyFloat            _TreeTopology::GetBranchLength (node<long> * n) const {
2777     return compExp->theData[n->in_object];
2778 }
2779 
2780 
2781 //__________________________________________________________________________________
GetBranchValue(node<long> *) const2782 const _String            _TreeTopology::GetBranchValue     (node<long> *) const{
2783     return kEmptyString;
2784 }
2785 //__________________________________________________________________________________
GetBranchVarValue(node<long> *,long) const2786 const _String            _TreeTopology::GetBranchVarValue  (node<long> *, long) const {
2787     return kEmptyString;
2788 }
2789 
2790 
2791 //__________________________________________________________________________________
RerootTree(HBLObjectRef new_root,HBLObjectRef cache)2792 HBLObjectRef _TreeTopology::RerootTree (HBLObjectRef new_root, HBLObjectRef cache) {
2793     _StringBuffer * res = new _StringBuffer (256UL);
2794 
2795     _TreeTopologyParseSettings settings = CollectParseSettings();
2796 
2797     if (new_root && new_root->ObjectClass()==STRING) {
2798         if (rooted == UNROOTED) {
2799             ReportWarning ("Reroot was called with an unrooted tree. Rerooting was still performed.");
2800         }
2801 
2802         _String * new_root_name = (_String*)new_root->toStr();
2803         node<long>* reroot_at = FindNodeByName (new_root_name);
2804         DeleteObject (new_root_name);
2805 
2806         if (reroot_at) { // good node name, can reroot
2807             if (reroot_at->is_root()) {
2808                 SubTreeString (theRoot, *res,settings,kTopologyBranchLengthUserLengths);
2809             } else {
2810               (*res)<<'('; // opening tree (
2811               RerootTreeInternalTraverser (reroot_at, reroot_at->get_child_num(),false,*res,settings,kTopologyBranchLengthUserLengths,-1,true);
2812               (*res)<<',';
2813               SubTreeString (reroot_at, *res,settings,false, kTopologyBranchLengthUserLengths);
2814               (*res)<<')';
2815             }
2816         }
2817     } else {
2818         HandleApplicationError ("Reroot Tree was passed an invalid branch argument.");
2819     }
2820 
2821     res->TrimSpace();
2822     return _returnStringOrUseCache(res, cache);
2823 }
2824 
2825 
2826 //__________________________________________________________________________________
2827 
DetermineBranchLengthMappingMode(_String const * param,hyTopologyBranchLengthMode & map_mode) const2828 _String  const    _TreeTopology::DetermineBranchLengthMappingMode (_String const * param, hyTopologyBranchLengthMode& map_mode) const {
2829     map_mode = kTopologyBranchLengthNone;
2830     if (param) {
2831         if (*param == hy_env::kExpectedNumberOfSubstitutions) {
2832             map_mode = kTopologyBranchLengthExpectedSubs;
2833         } else if (*param == hy_env::kStringSuppliedLengths) {
2834             map_mode = kTopologyBranchLengthUserLengths;
2835         } else {
2836             map_mode = kTopologyBranchLengthLocalParameter;
2837             return _String('.') & *param;
2838         }
2839     }
2840     return kEmptyString;
2841 }
2842 
2843 //_______________________________________________________________________________________________
2844 
prepTree4Comparison(_List & leafNames,_SimpleList & mapping,node<long> * topNode) const2845 node<long>* _TreeTopology::prepTree4Comparison (_List& leafNames, _SimpleList& mapping, node<long>* topNode) const {
2846 
2847     /**
2848         Creates a tree structure which facilitates comparison to other trees
2849 
2850         The in_object of each node is a numeric list of indices of all the leaves that are at or below this node
2851         The indices are in post-order traversal order
2852 
2853         `leafNames` stores lexicographically sorted leaf names
2854         `mapping` stores the mapping between the original post-order index of a leaf and its location in `leafNames`
2855 
2856      */
2857 
2858     node<long>* res     = topNode?topNode->duplicate_tree():theRoot->duplicate_tree();
2859 
2860     node_iterator<long> ni (res, _HY_TREE_TRAVERSAL_POSTORDER);
2861 
2862     _SimpleList     indexer;
2863 
2864     while (node<long> * iterator = ni.Next()) {
2865 
2866         _SimpleList * descendants = new _SimpleList;
2867 
2868         if (!iterator->is_leaf()) {
2869             for (unsigned long k = 1UL; k <= iterator->get_num_nodes(); k++) {
2870                 (*descendants) << *(_SimpleList*)iterator->go_down(k)->in_object;
2871             }
2872         } else {
2873             (*descendants) << leafNames.countitems();
2874             indexer        << leafNames.countitems();
2875             leafNames < new _String (GetNodeName (iterator));
2876         }
2877 
2878         iterator->in_object = (long)descendants;
2879     }
2880 
2881     // now sort leaf names and build the indexer
2882 
2883     mapping.Clear();
2884     mapping.Duplicate (&indexer);
2885 
2886     SortLists (&leafNames, &indexer);
2887     SortLists (&indexer, &mapping);
2888 
2889     return res;
2890 }
2891 
2892 //_______________________________________________________________________________________________
2893 
destroyCompTree(node<long> * compRoot) const2894 void    _TreeTopology::destroyCompTree (node<long>* compRoot) const {
2895     for (unsigned long i=1UL; i<=compRoot->get_num_nodes(); i++) {
2896         destroyCompTree (compRoot->go_down(i));
2897     }
2898     DeleteObject ((BaseRef)compRoot->in_object);
2899     delete (compRoot);
2900 }
2901 
2902 //_______________________________________________________________________________________________
2903 
CompareTrees(_TreeTopology * compareTo) const2904 _String             _TreeTopology::CompareTrees      (_TreeTopology* compareTo) const {
2905 
2906     _List           myLeaves,
2907                     otherLeaves;
2908 
2909     _SimpleList     indexer,
2910                     otherIndexer;
2911 
2912     node<long>      *myCT,
2913                     *otherCT;
2914 
2915     _String         rerootAt;
2916 
2917     myCT            = prepTree4Comparison(myLeaves, indexer);
2918     otherCT         = compareTo->prepTree4Comparison(otherLeaves, otherIndexer);
2919 
2920     // first compare the set of leaf labels
2921 
2922     if (myLeaves.Equal (otherLeaves)) {
2923         _SimpleList * reindexer = nil;
2924 
2925         if (indexer.Equal (otherIndexer)) {
2926             otherIndexer.Populate (_SimpleList ((unsigned long)myLeaves.countitems()).Populate (indexer, _SimpleList::action_invert),
2927                                    _SimpleList::action_compose);
2928             reindexer = &otherIndexer;
2929         }
2930 
2931         char compRes;
2932 
2933         if (internalTreeCompare (myCT, otherCT, reindexer, 1, myLeaves.lLength, nil, compareTo)>0) {
2934             rerootAt = kCompareEqualWithoutReroot;
2935         } else {
2936             long   tCount = 0;
2937 
2938             node_iterator<long> ni (otherCT, _HY_TREE_TRAVERSAL_POSTORDER);
2939             node<long> * meNode;
2940 
2941             while (meNode = ni.Next()) {
2942               if (meNode == otherCT) {
2943                 break;
2944               }
2945               if (meNode->get_num_nodes()) {
2946                   compRes = internalTreeCompare (myCT, meNode, reindexer, 1, myLeaves.lLength, nil, compareTo);
2947                   if (compRes>0) {
2948                       break;
2949                   } else if (compRes) {
2950                       meNode = otherCT;
2951                       break;
2952                   }
2953               }
2954 
2955               tCount ++;
2956             }
2957 
2958             if (meNode!=otherCT) {
2959                 node_iterator<long> ni (compareTo->theRoot, _HY_TREE_TRAVERSAL_POSTORDER);
2960                 while (node<long> * meNode = ni.Next()) {
2961                     if (meNode == theRoot) {
2962                       break;
2963                     }
2964                     if (tCount==1) {
2965                         rerootAt = kCompareEqualWithReroot & *LocateVar (meNode->in_object)->GetName() & '.';
2966                         break;
2967                     } else {
2968                         tCount --;
2969                     }
2970                 }
2971             }
2972         }
2973         if (rerootAt.empty()) {
2974             rerootAt = kCompareUnequalToplogies;
2975         }
2976     } else {
2977         rerootAt = kCompareUnequalLabelSets;
2978     }
2979 
2980     destroyCompTree (myCT);
2981     destroyCompTree (otherCT);
2982 
2983     return          rerootAt;
2984 }
2985 
2986 
2987 
2988 
2989 //_______________________________________________________________________________________________
2990 
SplitTreeIntoClustersInt(node<long> *,_List *,_AVLListX &,long,long) const2991 _List*  _TreeTopology::SplitTreeIntoClustersInt (node<long>* , _List* , _AVLListX& , long , long ) const
2992 // TODO SLKP 20180202: clearly this needs work (or needs to be deprecated)
2993 // returns the list of nodes included in the splits BELOW trav1
2994 {
2995     /*_List * dependancies = nil;
2996 
2997     for (long k = trav1->get_num_nodes(); k; k--)
2998     {
2999         _List * me = SplitTreeIntoClustersInt(trav1->go_down(k), res, counts, size, tol);
3000         if (me)
3001         {
3002             if (!dependancies)
3003                 checkPointer (dependancies = new _List);
3004 
3005             (*dependancies) << (*me);
3006             DeleteObject (me);
3007         }
3008     }
3009 
3010     long distinctElements = counts.GetXtra (counts.Find((BaseRef)trav1->in_object)),
3011          dLength = dependancies?dependancies->lLength:0;
3012 
3013     if (((trav1->parent==nil)||(distinctElements+dLength>=size-tol))&&(distinctElements+dLength<=size+tol))
3014     {
3015         _List   entry;
3016         _String nName;
3017         GetNodeName (trav1, nName);
3018         entry && & nName;
3019         entry && & _Constant (distinctElements);
3020         if (dependancies)
3021             entry << (*dependancies);
3022         (*res) && & entry;
3023 
3024         trav1 = trav1->parent;
3025         while (trav1)
3026         {
3027             long aidx = counts.Find((BaseRef)trav1->in_object);
3028             counts.SetXtra(aidx,counts.GetXtra (aidx) - distinctElements);
3029             trav1 = trav1->parent;
3030         }
3031 
3032         if (dependancies)
3033             dependancies->Clear();
3034         else
3035             checkPointer (dependancies = new _List);
3036 
3037         (*dependancies) && & nName;
3038     }
3039 
3040     return dependancies;*/
3041 
3042     // decide if child or i-node
3043 
3044     /*if (trav1->get_num_nodes())
3045     {
3046 
3047     }
3048     else
3049     {
3050         node <long>* pn = trav1->parent;
3051 
3052         while (pn)
3053         {
3054             long pidx = counts.Find((BaseRef)pn->in_object);
3055 
3056         }
3057     }*/
3058     // TBI
3059     return new _List;
3060 }
3061 
3062 //_______________________________________________________________________________________________
3063 
SplitTreeIntoClusters(unsigned long size,unsigned long tol) const3064 _List*  _TreeTopology::SplitTreeIntoClusters (unsigned long size, unsigned long tol) const {
3065 // TODO SLKP 20180202: clearly this needs work (or needs to be deprecated)
3066 // assume fixed rooting for now
3067 // returns the list of list speccing pointers to calcnodes rooting the splits
3068 // the member list contains 2 or more entries for each node:
3069 // itself, the number of (independent)leaves encompassed by that node,
3070 // and an optional references to the node whose
3071 // results it depends on.
3072 
3073 // assumes that size-tol>=2
3074 
3075     _SimpleList     counts;
3076     _AVLListX       cavl (&counts);
3077 
3078     node_iterator<long> ni (theRoot, _HY_TREE_TRAVERSAL_POSTORDER);
3079     while (node<long> * iterator = ni.Next()) {
3080         long nC = iterator->get_num_nodes();
3081         if (nC) {
3082             long c = 0;
3083             for (long k=1; k<=nC; k++) {
3084                 c += counts.list_data[iterator->go_down(k)->in_object];
3085             }
3086             cavl.Insert((BaseRef)iterator->in_object,c);
3087         } else {
3088             cavl.Insert((BaseRef)iterator->in_object,1);
3089         }
3090     }
3091 
3092     _List*                    result = new _List;
3093     DeleteObject(SplitTreeIntoClustersInt (theRoot, result, cavl, size, tol));
3094     return           result;
3095 }
3096 
3097   //_______________________________________________________________________________________________
3098 
MatchTreePattern(_TreeTopology const * compareTo) const3099 const _String _TreeTopology::MatchTreePattern (_TreeTopology const* compareTo) const {
3100     // TODO: 20180202, SLKP, this needs to be confirmed as working
3101     // the pattern is this
3102     // compare to is the potential match
3103   _List           myLeaves,
3104                   otherLeaves,
3105                   overlappedLeaves;
3106 
3107   _SimpleList     indexer,
3108                   otherIndexer;
3109 
3110   node<long>      *myCT,
3111                   *otherCT;
3112 
3113   _String         rerootAt;
3114 
3115   myCT            = prepTree4Comparison(myLeaves, indexer);
3116   otherCT         = compareTo->prepTree4Comparison(otherLeaves, otherIndexer);
3117 
3118 
3119   _SimpleList      matchedLeaves;
3120   overlappedLeaves.Intersect (myLeaves, otherLeaves,&matchedLeaves);
3121 
3122   unsigned long my_leaf_count = myLeaves.countitems();
3123 
3124   if ((my_leaf_count >=otherLeaves.countitems())&&(overlappedLeaves.countitems() == otherLeaves.countitems())) {
3125     if (my_leaf_count >otherLeaves.countitems()) {
3126         // map leaves back to ordering space
3127 
3128         // trim the pattern tree
3129       _SimpleList     allowedLeaves  (my_leaf_count, 0, 0),
3130                       recordTransfer (my_leaf_count),
3131                       inverted (my_leaf_count);
3132 
3133       inverted.Populate (indexer, _SimpleList::action_invert);
3134 
3135       for (long padresSuck = 0; padresSuck < matchedLeaves.lLength; padresSuck ++) {
3136         allowedLeaves.list_data[inverted.get(matchedLeaves.get(padresSuck))] = 1;
3137       }
3138 
3139       long slider = 0L;
3140 
3141       for (long dodgersSuck = 0L; dodgersSuck < my_leaf_count; dodgersSuck ++)
3142         if (allowedLeaves.get (dodgersSuck)) {
3143           recordTransfer.list_data[dodgersSuck] = slider++;
3144         }
3145 
3146         // pass 1 - delete all the superfluous leaves
3147 
3148       node_iterator<long> ni (myCT, _HY_TREE_TRAVERSAL_POSTORDER);
3149       while (node<long>* iterator = ni.Next()) {
3150         if (iterator != myCT && !iterator->is_leaf()) {
3151           _SimpleList*  descendants = ((_SimpleList*)iterator->in_object);
3152 
3153           if ((descendants&&(allowedLeaves.list_data[descendants->list_data[0]] == 0))||(!descendants)) {
3154               // mark this node for deletion
3155             if (descendants) {
3156               DeleteObject(descendants);
3157             }
3158 
3159             node<long>* sacLamb = iterator;
3160             ni.Next();
3161 
3162             if (sacLamb->parent->get_num_nodes()==1) {
3163               DeleteObject((BaseRef)sacLamb->parent->in_object);
3164               sacLamb->parent->in_object = 0L;
3165             }
3166 
3167             sacLamb->parent->detach_child (sacLamb->get_child_num());
3168             delete (sacLamb);
3169             continue;
3170           }
3171         }
3172       }
3173 
3174         // pass 2 - prune internal nodes with exactly one child
3175 
3176         // >O
3177       ni.Reset (myCT);
3178 
3179       while (node<long>* iterator = ni.Next()) {
3180         long nn = iterator->get_num_nodes();
3181         if (nn == 1) {
3182           node<long>* loneChild = iterator->go_down(1);
3183           DeleteObject((_SimpleList*)iterator->in_object);
3184           iterator->in_object = loneChild->in_object;
3185           iterator->detach_child (1);
3186           delete (loneChild);
3187         } else {
3188           if (nn > 1) {
3189             _SimpleList * myDescs = (_SimpleList*)iterator->in_object;
3190             myDescs->Clear();
3191 
3192             for (long cc = 1; cc <= nn; cc++) {
3193               _SimpleList temp;
3194 
3195               temp.Union (*myDescs, *(_SimpleList*)iterator->go_down(cc)->in_object);
3196 
3197               myDescs->Clear();
3198               myDescs->Duplicate (&temp);
3199             }
3200           } else {
3201             ((_SimpleList*)iterator->in_object)->list_data[0] = recordTransfer.list_data[((_SimpleList*)iterator->in_object)->list_data[0]];
3202           }
3203 
3204         }
3205       }
3206 
3207       _List   newLeaves;
3208       recordTransfer.Clear();
3209       inverted.Clear();
3210 
3211       for (long lc = 0; lc < allowedLeaves.lLength; lc++)
3212         if (allowedLeaves.get(lc)) {
3213           recordTransfer << newLeaves.lLength;
3214           inverted << newLeaves.lLength;
3215           newLeaves << myLeaves (indexer.get(lc));
3216         }
3217 
3218       SortLists (&newLeaves, &recordTransfer);
3219       SortLists (&recordTransfer,&inverted);
3220       indexer.Clear();
3221       indexer.Duplicate (&inverted);
3222       myLeaves.Clear();
3223       myLeaves.Duplicate (&newLeaves);
3224 
3225         // finally check whether the root still has 3 or more children
3226 
3227       if (myCT->get_num_nodes()==2) {
3228         node <long>* promoteMe = nil;
3229 
3230         if (myCT->go_down(1)->get_num_nodes ()) {
3231           promoteMe = myCT->go_down(1);
3232         } else {
3233           promoteMe = myCT->go_down(2);
3234         }
3235 
3236         long nn = promoteMe->get_num_nodes();
3237         if (nn) {
3238           for (long cc = 1; cc <= nn; cc++) {
3239             myCT->add_node (*promoteMe->go_down(cc));
3240           }
3241 
3242           myCT->detach_child (promoteMe->get_child_num());
3243           DeleteObject ((BaseRef)promoteMe->in_object);
3244           delete promoteMe;
3245         } else {
3246           HandleApplicationError ("Internal tree pattern error in MatchTreePattern");
3247           return   "Unequal: Error";
3248         }
3249       }
3250     }
3251 
3252     _SimpleList * reindexer = nil;
3253 
3254     if (!indexer.Equal (otherIndexer)) {
3255       otherIndexer.Populate (_SimpleList ((unsigned long)myLeaves.countitems()).Populate (indexer, _SimpleList::action_invert),
3256                              _SimpleList::action_compose);
3257       reindexer = &otherIndexer;
3258     }
3259 
3260     char compRes;
3261 
3262     if (internalTreeCompare (myCT, otherCT, reindexer, 1, myLeaves.lLength, nil, compareTo, true)>0) {
3263       rerootAt = kCompareEqualWithoutReroot;
3264     } else {
3265       long   tCount = 0;
3266 
3267 
3268       node_iterator <long> ni (otherCT, _HY_TREE_TRAVERSAL_POSTORDER);
3269       node<long> * iterator = ni.Next();
3270 
3271       while (iterator!=otherCT) {
3272         if (!iterator->is_leaf()) {
3273           compRes = internalTreeCompare (myCT, iterator, reindexer, 1, myLeaves.lLength, nil, compareTo, true);
3274           if (compRes>0) {
3275             break;
3276           } else if (compRes) {
3277             iterator = otherCT;
3278             break;
3279           }
3280         }
3281         iterator = ni.Next();
3282 
3283         tCount ++;
3284       }
3285 
3286       if (iterator!=otherCT) {
3287         ni.Reset (compareTo->theRoot);
3288         iterator = ni.Next();
3289         while (iterator!=theRoot) {
3290           if (tCount==1) {
3291             rerootAt = kCompareEqualWithReroot & *map_node_to_calcnode (iterator)->GetName() & '.';
3292             break;
3293           } else {
3294             tCount --;
3295           }
3296 
3297           iterator = ni.Next();
3298         }
3299       }
3300     }
3301     if (rerootAt.empty()) {
3302       rerootAt = kCompareUnequalToplogies;
3303     }
3304   } else {
3305     rerootAt = kCompareUnequalLabelSets;
3306   }
3307 
3308   destroyCompTree (myCT);
3309   destroyCompTree (otherCT);
3310   return          rerootAt;
3311 }
3312 
3313 //__________________________________________________________________________________
3314 
ComputeClusterTable(_SimpleList & result,_SimpleList & pswRepresentation) const3315 void        _TreeTopology::ComputeClusterTable (_SimpleList& result, _SimpleList& pswRepresentation) const {
3316 
3317     long            leaf_count = pswRepresentation.Element(-2L),
3318     upper_bound = pswRepresentation.countitems()-2L,
3319     leafCode  = 0L,
3320     L = 0L,
3321     R = 0L;
3322 
3323     result.Clear    ();
3324     result.Populate (3*leaf_count,-1,0);
3325 
3326 
3327     for (long k = 0; k < upper_bound; k+=2L) {
3328         if (pswRepresentation.get(k) < leaf_count) { // is a leaf
3329             R = leafCode++;
3330         } else {
3331             long row;
3332             L = pswRepresentation.get (k-2*pswRepresentation.get(k+1));
3333             if (k + 2L == upper_bound /* root */
3334                 || pswRepresentation.get(k+3L) == 0L) {
3335                 row = R;
3336             } else {
3337                 row = L;
3338             }
3339 
3340             result[row*3]   = L;
3341             result[row*3+1L] = R;
3342         }
3343     }
3344 }
3345 //__________________________________________________________________________________
ConvertFromPSW(_AVLListX & nodeMap,_SimpleList & pswRepresentation) const3346 _String*        _TreeTopology::ConvertFromPSW                       (_AVLListX& nodeMap,_SimpleList& pswRepresentation) const {
3347     _StringBuffer * result = new _StringBuffer (128L);
3348 
3349     if (pswRepresentation.countitems() > 4L) {
3350         long leafCount = pswRepresentation.Element (-2);
3351         // traverse backwards
3352         bool lastLeaf = false;
3353         _SimpleList     bounds;
3354 
3355         for (long k = pswRepresentation.countitems()-4L; k>=0L; k-=2L) {
3356             if (lastLeaf) {
3357                 (*result) << ',';
3358             }
3359             if (pswRepresentation.list_data[k] >= leafCount) { //
3360                 (*result) << ')';
3361                 lastLeaf = false;
3362                 bounds   << k-2*pswRepresentation.get(k+1);
3363             } else {
3364                 _String nodeName (*(_String*)nodeMap.Retrieve(pswRepresentation.get(k)));
3365                 nodeName.Flip();
3366                 (*result) << nodeName;
3367                 while (bounds.Element(-1) == k && bounds.nonempty()) {
3368                     (*result) << '(';
3369                     bounds.Pop();
3370                 }
3371                 lastLeaf = true;
3372             }
3373         }
3374     }
3375     result->TrimSpace();
3376     result->Flip();
3377     return result;
3378 }
3379 /*
3380  if (reference == false) {
3381  nodeMap.Clear();
3382  }
3383 
3384  pswRepresentation.Clear();
3385 
3386  long    leafIndex  = 0,
3387  iNodeCount = -1;
3388 
3389  _SimpleList levelBuffer;
3390 
3391  node_iterator<long> ni (theRoot, _HY_TREE_TRAVERSAL_POSTORDER);
3392 
3393  while (node<long> * currentNode = ni.Next (&levelBuffer)) {
3394  _String nodeName = GetNodeName (currentNode);
3395 
3396  while (levelBuffer.countitems() <= ni.Level()) {
3397  levelBuffer << 0;
3398  }
3399 
3400  if (currentNode->is_leaf()) {
3401  pswRepresentation << leafIndex;
3402  pswRepresentation << 0;
3403  if (reference) {
3404  long remapped = nodeMap.Find(&nodeName);
3405  if (remapped < 0) {
3406  return false;
3407  } else {
3408  remapped = nodeMap.GetXtra (remapped);
3409  if (remapped >= 0) {
3410  pswRepresentation << remapped;
3411  } else {
3412  return false;
3413  }
3414  }
3415 
3416  leafIndex++;
3417  } else {
3418  nodeMap.Insert(nodeName.makeDynamic(), leafIndex++, false);
3419  }
3420  } else {
3421  pswRepresentation << iNodeCount;
3422  pswRepresentation << levelBuffer.list_data[ni.Level()];
3423  if (reference) {
3424  pswRepresentation << 0;
3425  } else {
3426  (*inames) && &nodeName;
3427  }
3428 
3429  iNodeCount--;
3430  }
3431  if (ni.Level()) {
3432  levelBuffer.list_data[ni.Level()-1] += levelBuffer.list_data[ni.Level()]+1;
3433  }
3434  levelBuffer.list_data[ni.Level()]   = 0;
3435  }
3436 
3437  for (long k = 0; k < pswRepresentation.lLength; k+=(reference?3:2))
3438  if (pswRepresentation.list_data[k] < 0) {
3439  pswRepresentation.list_data[k] = leafIndex-pswRepresentation.list_data[k]-1;
3440  }
3441 
3442  pswRepresentation << leafIndex;
3443  pswRepresentation << (-iNodeCount-1);
3444 
3445  return true;
3446 
3447 */
3448 //__________________________________________________________________________________
ConvertToPSW(_AVLListX & nodeMap,_List * inames,_SimpleList & pswRepresentation,bool reference) const3449 bool        _TreeTopology::ConvertToPSW (_AVLListX& nodeMap, _List* inames, _SimpleList& pswRepresentation, bool reference) const {
3450 
3451     if (reference == false) {
3452         nodeMap.Clear();
3453     }
3454 
3455     pswRepresentation.Clear();
3456 
3457     long    leafIndex  = 0L,
3458             iNodeCount = -1L;
3459 
3460     _SimpleList levelBuffer;
3461 
3462     node_iterator<long> ni (theRoot, _HY_TREE_TRAVERSAL_POSTORDER);
3463 
3464     while (node<long> * currentNode = ni.Next ()) {
3465         _String nodeName = GetNodeName (currentNode);
3466 
3467 
3468         if (levelBuffer.countitems() <= ni.Level()) {
3469             levelBuffer.AppendRange(ni.Level() - levelBuffer.countitems() + 1 , 0, 0);
3470         }
3471 
3472         if (currentNode->is_leaf()) {
3473             pswRepresentation << leafIndex << 0;
3474             if (reference) {
3475                 long remapped = nodeMap.Find(&nodeName);
3476                 if (remapped == kNotFound) {
3477                     return false;
3478                 } else {
3479                     remapped = nodeMap.GetXtra (remapped);
3480                     if (remapped >= 0) {
3481                         pswRepresentation << remapped;
3482                     } else {
3483                         return false;
3484                     }
3485                 }
3486 
3487                 leafIndex++;
3488             } else {
3489                 nodeMap.Insert(nodeName.makeDynamic(), leafIndex++, false);
3490             }
3491         } else {
3492             pswRepresentation << iNodeCount << levelBuffer.get(ni.Level());
3493             if (reference) {
3494                 pswRepresentation << 0L;
3495             } else {
3496                 (*inames) && &nodeName;
3497             }
3498             iNodeCount--;
3499         }
3500         if (ni.Level()) {
3501             levelBuffer[ni.Level()-1] += levelBuffer.get(ni.Level())+1;
3502         }
3503         levelBuffer[ni.Level()]   = 0;
3504     }
3505 
3506     for (long k = 0; k < pswRepresentation.lLength; k+=(reference?3:2)) {
3507         if (pswRepresentation.get(k) < 0L) {
3508             pswRepresentation[k] = leafIndex-pswRepresentation.get(k)-1;
3509         }
3510     }
3511 
3512     pswRepresentation << leafIndex << (-iNodeCount-1);
3513 
3514     return true;
3515 }
3516 
3517 
3518 //__________________________________________________________________________________
3519 
SplitsIdentity(HBLObjectRef p,HBLObjectRef cache) const3520 _AssociativeList *   _TreeTopology::SplitsIdentity (HBLObjectRef p, HBLObjectRef cache)  const {
3521     // compare tree topologies
3522     _Matrix     * result  = new _Matrix (2,1,false,true),
3523                 * result2 = nil;
3524 
3525     _String    * tree_repr  = nil;
3526 
3527     long leaves, internals;
3528     EdgeCount (leaves, internals);
3529 
3530     result->theData[0] = internals - 1.;
3531     result->theData[1] = -1.;
3532 
3533     if (p && (p->ObjectClass() == TOPOLOGY || p->ObjectClass() == TREE)) {
3534         _List           avlSupport,
3535         iNames;
3536 
3537         _AVLListX       nameMap  (&avlSupport);
3538 
3539         _SimpleList     psw, psw2, clusters, inodeList;
3540 
3541 
3542         ConvertToPSW            (nameMap, &iNames, psw);
3543         ComputeClusterTable     (clusters, psw);
3544 
3545         if (((_TreeTopology*)p)->ConvertToPSW           (nameMap, nil, psw2, true)) {
3546             _SimpleList           workSpace;
3547             long leafCount      = psw.Element (-2),
3548                  upper_bound    = psw2.countitems() - 3L;
3549 
3550             for (long k = 0L; k < upper_bound; k+=3L) {
3551 
3552                 if (psw2.list_data[k] < leafCount) {
3553                     workSpace << 1L
3554                               << 1L
3555                               << psw2.get(k+2L)
3556                               << psw2.get(k+2L);
3557                 } else {
3558                     _SimpleList quad;
3559 
3560                     quad << leafCount+1 << 0 << 0 << 1;
3561 
3562                     long  w = psw2.get (k+1);
3563                     while (w > 0) {
3564                         _SimpleList quad2;
3565                         quad2 << workSpace.Pop() << workSpace.Pop() << workSpace.Pop() << workSpace.Pop();
3566                         w -= quad2.get (3);
3567                         quad.list_data[0] = Minimum (quad2.get(0),quad.get(0));
3568                         quad.list_data[1] = Maximum (quad2.get(1),quad.get(1));
3569                         quad.list_data[2] += quad2.get(2);
3570                         quad.list_data[3] += quad2.get(3);
3571                     }
3572 
3573                     if (quad.list_data[2] == quad.list_data[1] - quad.list_data[0] + 1) {
3574                         if (clusters.list_data[3*quad.list_data[0]] == quad.list_data[0] && clusters.list_data[3*quad.list_data[0]+1] == quad.list_data[1]) {
3575                             clusters.list_data[3*quad.list_data[0]+2] = 1;
3576                         } else if (clusters.list_data[3*quad.list_data[1]] == quad.list_data[0] && clusters.list_data[3*quad.list_data[1]+1] == quad.list_data[1]) {
3577                             clusters.list_data[3*quad.list_data[1]+2] = 1;
3578                         }
3579                     }
3580                     quad.Flip();
3581                     workSpace << quad;
3582                 }
3583             }
3584 
3585             psw2.Clear();
3586             long matchCount = 0,
3587             iNodeCount = 0;
3588 
3589             long L, R = -1L;
3590 
3591             _SimpleList leafSpans (leafCount,0,0),
3592             iNodesTouched;
3593 
3594             for (unsigned long k = 0; k < psw.lLength-2; k+=2) {
3595                 if (psw.list_data[k] < leafCount) {
3596                     R = psw.list_data[k];
3597                     psw2 << R;
3598                     psw2 << 0;
3599                     leafSpans.list_data[R] = (psw2.lLength>>1);
3600                 } else {
3601                     long ll = k-2*psw.list_data[k+1];
3602                     L       = psw.list_data[ll];
3603                     if ((clusters.list_data[3*L] == L && clusters.list_data[3*L+1] == R && clusters.list_data[3*L+2] > 0)
3604                         || (clusters.list_data[3*R] == L && clusters.list_data[3*R+1] == R && clusters.list_data[3*R+2] > 0)) {
3605                         L = (psw2.lLength>>1) - leafSpans.list_data[L] + 1;
3606                         psw2 << leafCount+iNodeCount++;
3607                         psw2 << L;
3608 
3609                         iNodesTouched << psw.list_data[k];
3610                     }
3611                 }
3612             }
3613 
3614             for (unsigned long k = 0; k < psw2.lLength; k+=2)
3615                 if (psw2.list_data[k] < leafCount) {
3616                     psw2.list_data[k+1] = 0;
3617                 } else {
3618                     matchCount++;
3619                 }
3620 
3621             psw2 << leafCount;
3622             psw2 << iNodeCount;
3623 
3624             result->theData[0] = psw.Element (-1);
3625             result->theData[1] = matchCount;
3626 
3627 
3628             tree_repr  = ConvertFromPSW (nameMap, psw2);
3629 
3630             _List sharedNames;
3631             for (long k = 0; k < iNodesTouched.lLength; k++) {
3632                 sharedNames << iNames (iNodesTouched(k)-leafCount);
3633             }
3634 
3635             result2 = new _Matrix (sharedNames);
3636         }
3637 
3638     }
3639 
3640     //DeleteObject (bc);
3641 
3642     _AssociativeList * resultList = new _AssociativeList;
3643     resultList->MStore ("CLUSTERS", result, false);
3644     if (result2) {
3645         resultList->MStore ("NODES",    result2, false);
3646     }
3647     resultList->MStore ("CONSENSUS", tree_repr ? new _FString (tree_repr) : new _FString(), false);
3648     return resultList;
3649 }
3650 
3651 //__________________________________________________________________________________
3652 
FindMaxCommonSubTree(_TreeTopology const * compare_to,long & size_tracker,_List * forest) const3653 const _String  _TreeTopology::FindMaxCommonSubTree (_TreeTopology const*  compare_to, long& size_tracker, _List* forest) const{
3654     _List           myLeaves,
3655                     otherLeaves,
3656                     sharedLeaves;
3657 
3658     _SimpleList     indexer,
3659                     otherIndexer,
3660                     sharedLeavesIDin1,
3661                     sharedLeavesIDin2;
3662 
3663 
3664     _String         rerootAt;
3665 
3666     node<long>      *myCT    = prepTree4Comparison(myLeaves, indexer),
3667                     *otherCT = compare_to->prepTree4Comparison(otherLeaves, otherIndexer);
3668 
3669     sharedLeaves.Intersect (otherLeaves,myLeaves,&sharedLeavesIDin1,&sharedLeavesIDin2);
3670 
3671     if (sharedLeaves.countitems() > 1UL) { // more than one common leaf
3672                                            // now we need to map shared leaves to a common indexing space
3673 
3674         _SimpleList     my_inverted_index, // was lidx1
3675                         other_inverted_index; // was lidx2
3676 
3677         my_inverted_index.Populate(indexer, _SimpleList::action_invert);
3678         other_inverted_index.Populate(otherIndexer, _SimpleList::action_invert);
3679 
3680         _SimpleList     reindexer (otherLeaves.countitems(),-1L,0L),
3681                         ldx1,
3682                         ldx2;
3683 
3684 
3685         for (long k2=0L; k2<sharedLeaves.countitems(); k2++) {
3686             reindexer [other_inverted_index.get (sharedLeavesIDin2.get (k2))] = my_inverted_index.get (sharedLeavesIDin1.get (k2));
3687             //          the original index of shared leaf k2  -> the original index of shared lead k1 (in post-order traveral)
3688         }
3689 
3690         // now we map actual leaf structures to their respective leaf indices
3691 
3692         node_iterator<long> ni (myCT, _HY_TREE_TRAVERSAL_POSTORDER);
3693 
3694         while (node<long>* iterator = ni.Next()) {
3695             if (iterator->is_leaf()) {
3696                 ldx1 << (long)iterator;
3697             }
3698         }
3699 
3700         ni.Reset (otherCT);
3701         while (node<long>* iterator = ni.Next()) {
3702             if (iterator->is_leaf()) {
3703                 ldx2 << (long)iterator;
3704             }
3705         }
3706 
3707         // now we loop through the list of leaves and try to match them all up
3708 
3709         _SimpleList     matchedTops,
3710                         matchedSize;
3711 
3712         for (long k3=0UL; k3<sharedLeaves.countitems()-1L; k3++) {
3713             long try_leaf_index = other_inverted_index.get (sharedLeavesIDin2.get (k3));
3714 
3715             if (reindexer.get (try_leaf_index) >= 0L) {
3716                 // leaf still available
3717                 node<long>*      ln1 = (node<long>*)ldx1.get (my_inverted_index.get (sharedLeavesIDin1.get (k3))),
3718                          *       ln2 = (node<long>*)ldx2.get (try_leaf_index),
3719                          *       p1  = ln1->parent,
3720                          *       p2  = ln2->parent;
3721 
3722                 bool             comparison_result = false;
3723 
3724                 while ((internalTreeCompare (p1,p2,&reindexer,0,myLeaves.lLength,p2->parent?nil:ln2,compare_to) == 1)&&p1&&p2) {
3725                     ln1 = p1;
3726                     ln2 = p2;
3727                     p1=p1->parent;
3728                     p2=p2->parent;
3729 
3730                     comparison_result = true;
3731                 }
3732 
3733                 if (comparison_result) {
3734                     _SimpleList* matchedLeaves = (_SimpleList*)ln2->in_object;
3735 
3736                     matchedTops << (long)ln1;
3737                     matchedSize << matchedLeaves->countitems();
3738 
3739                     matchedLeaves->Each ([&] (long value, unsigned long) -> void {
3740                         reindexer[value] = -1;
3741                     });
3742                 }
3743             }
3744         }
3745 
3746         if (matchedSize.nonempty()) {
3747 
3748             auto handle_subtree = [&] (node<long> const * top_node) -> _String * {
3749                 ni.Reset(myCT);
3750 
3751                 long internal_node_count = 0L;
3752 
3753                 while (node<long>* iterator = ni.Next()) {
3754                     if (!iterator->is_leaf()) {
3755                         if (iterator == top_node) {
3756                             break;
3757                         }
3758                         internal_node_count ++;
3759                     }
3760                 }
3761                 ni.Reset(theRoot);
3762                 while (node<long>* iterator = ni.Next()) {
3763                     if (!iterator->is_leaf()) {
3764                         if (internal_node_count == 0) {
3765                             return LocateVar(iterator->in_object)->GetName();
3766                         }
3767                         internal_node_count--;
3768                     }
3769                 }
3770                 return nil;
3771             };
3772 
3773             if (forest) {
3774                 size_tracker = 0L;
3775                 for (long k6=0L; k6<matchedSize.lLength; k6++) {
3776                     (*forest) << handle_subtree ((node<long>*)matchedTops.get (k6));
3777                     size_tracker += matchedSize.get (k6);
3778                 }
3779             } else {
3780                 long maxSz   = -1L,
3781                      maxIdx  =  0L;
3782 
3783                 matchedSize.Each ([&] (long value, unsigned long index) -> void {
3784                     if (StoreIfGreater(maxSz, value)) {
3785                         maxIdx = index;
3786                     }
3787                 });
3788 
3789                 size_tracker = maxSz;
3790                 _String const * res = handle_subtree ((node<long>*)matchedTops.list_data[maxIdx]);
3791                 destroyCompTree(myCT);
3792                 destroyCompTree(otherCT);
3793                 return *res;
3794             }
3795         }
3796     }
3797     destroyCompTree(myCT);
3798     destroyCompTree(otherCT);
3799     return kEmptyString;
3800 }
3801 
3802 
3803 //__________________________________________________________________________________
3804 
CompareSubTrees(_TreeTopology const * compareTo,node<long> * topNode) const3805 const _String  _TreeTopology::CompareSubTrees (_TreeTopology const* compareTo, node<long>* topNode) const {
3806     // compare tree topologies
3807 
3808     _List           myLeaves,
3809     otherLeaves,
3810     sharedLeaves;
3811 
3812     _SimpleList     indexer,
3813     otherIndexer,
3814     sharedLeavesID;
3815 
3816     node<long>      *myCT,
3817     *otherCT;
3818 
3819     _String         rerootAt;
3820 
3821     myCT            = prepTree4Comparison(myLeaves, indexer);
3822     otherCT         = compareTo->prepTree4Comparison(otherLeaves, otherIndexer, topNode);
3823 
3824     sharedLeaves.Intersect (myLeaves, otherLeaves,&sharedLeavesID);
3825 
3826     // first compare the inclusion for the set of leaf labels
3827 
3828     if (sharedLeavesID.countitems() == otherLeaves.countitems()) {
3829         _SimpleList ilist;
3830 
3831         ilist.Populate(myLeaves.countitems(), -1L, 0L);
3832 
3833         for (long k2 = 0L; k2 < otherIndexer.countitems(); k2++) {
3834             ilist [sharedLeavesID.get(otherIndexer.get(k2))] = k2;
3835         }
3836 
3837         for (long k2 = 0L; k2<indexer.countitems(); k2++) {
3838             long         lidx      = ilist.list_data[indexer.get(k2)];
3839             indexer[k2] = lidx >=0L ? lidx : -1L;
3840         }
3841 
3842         _SimpleList *reindexer = &indexer;
3843 
3844         // now compare explore possible subtree matchings
3845         // for all internal nodes of this tree except the root
3846 
3847         char   compRes = 0;
3848 
3849         long   tCount = 1L,
3850         nc2 = topNode->get_num_nodes();
3851 
3852         node_iterator<long> ni (myCT, _HY_TREE_TRAVERSAL_POSTORDER);
3853         ni.Next();
3854         node<long>* iterator = ni.Next();
3855 
3856         while (iterator!=myCT) {
3857             long nc = iterator->get_num_nodes();
3858             if (nc == nc2) {
3859                 long kk;
3860                 for (kk = 1; kk <= nc; kk++) {
3861                     compRes = internalTreeCompare (otherCT,iterator, reindexer, 0, otherLeaves.lLength, iterator->go_down(kk),compareTo);
3862                     if (compRes) {
3863                         if (compRes == -1) {
3864                             iterator = myCT;
3865                         }
3866                         break;
3867                     }
3868                 }
3869                 if (kk>nc) {
3870                     compRes = internalTreeCompare (otherCT,iterator, reindexer, 0, otherLeaves.lLength, nil, compareTo);
3871                     if (compRes) {
3872                         if (compRes == -1) {
3873                             iterator = myCT;
3874                         }
3875                         break;
3876                     }
3877                 } else {
3878                     break;
3879                 }
3880             }
3881             tCount ++;
3882             iterator = ni.Next();
3883         }
3884 
3885         if (iterator != myCT) {
3886             ni.Reset (theRoot);
3887             iterator = ni.Next ();
3888             while (iterator != theRoot) {
3889                 if (tCount==0) {
3890                     rerootAt = _String("Matched at the ") & *map_node_to_calcnode (iterator)->GetName() & '.';
3891                     break;
3892                 } else {
3893                     tCount --;
3894                 }
3895 
3896                 iterator = ni.Next();
3897             }
3898         } else {
3899             long nc = myCT->get_num_nodes();
3900 
3901             if (nc == nc2+1)
3902                 for (long kk = 1; kk <= nc; kk++) {
3903                     compRes = internalTreeCompare (otherCT,myCT, reindexer, 0, otherLeaves.lLength, myCT->go_down(kk),compareTo);
3904                     if (compRes==1) {
3905                         break;
3906                     }
3907                 }
3908 
3909             if (compRes == 1) {
3910                 rerootAt = "Matched at the root.";
3911             }
3912         }
3913 
3914         if (rerootAt.empty()) {
3915             rerootAt = "No match: Different topologies (matching label sets).";
3916         }
3917     } else {
3918         rerootAt = "No match: Unequal label sets.";
3919     }
3920 
3921     destroyCompTree (myCT);
3922     destroyCompTree (otherCT);
3923 
3924     return          rerootAt;
3925 }
3926 
3927 
3928 
3929 
3930 
3931 
3932