1 /*
2
3 HyPhy - Hypothesis Testing Using Phylogenies.
4
5 Copyright (C) 1997-now
6 Core Developers:
7 Sergei L Kosakovsky Pond (sergeilkp@icloud.com)
8 Art FY Poon (apoon42@uwo.ca)
9 Steven Weaver (sweaver@temple.edu)
10
11 Module Developers:
12 Lance Hepler (nlhepler@gmail.com)
13 Martin Smith (martin.audacis@gmail.com)
14
15 Significant contributions from:
16 Spencer V Muse (muse@stat.ncsu.edu)
17 Simon DW Frost (sdf22@cam.ac.uk)
18
19 Permission is hereby granted, free of charge, to any person obtaining a
20 copy of this software and associated documentation files (the
21 "Software"), to deal in the Software without restriction, including
22 without limitation the rights to use, copy, modify, merge, publish,
23 distribute, sublicense, and/or sell copies of the Software, and to
24 permit persons to whom the Software is furnished to do so, subject to
25 the following conditions:
26
27 The above copyright notice and this permission notice shall be included
28 in all copies or substantial portions of the Software.
29
30 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
31 OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
32 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
33 IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
34 CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
35 TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
36 SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
37
38 */
39
40 #include "function_templates.h"
41
42 #include "topology.h"
43 #include "tree.h"
44 #include "vector.h"
45
46 #include "global_things.h"
47 #include "hbl_env.h"
48
49 #include <ctype.h>
50
51 using namespace hy_global;
52 using namespace hy_env;
53
54 const _String _TreeTopology::kCompareEqualWithReroot = "Equal with reroot at ",
55 _TreeTopology::kCompareEqualWithoutReroot = "Equal without rerooting",
56 _TreeTopology::kCompareUnequalToplogies = "Unequal topologies (matching label sets).",
57 _TreeTopology::kCompareUnequalLabelSets = "Unequal label sets.",
58 _TreeTopology::kMeta = "__meta";
59
60 //_______________________________________________________________________________________________
61
_TreeTopology()62 _TreeTopology::_TreeTopology () {
63 rooted = UNROOTED;
64 }
65
66 //_______________________________________________________________________________________________
_TreeTopology(_TheTree const * top)67 _TreeTopology::_TreeTopology (_TheTree const *top):_CalcNode (*top->GetName(), kEmptyString) {
68 _TreeTopology::PreTreeConstructor (false);
69 if (top->theRoot) {
70 isDefiningATree = kTreeIsBeingParsed;
71 theRoot = top->theRoot->duplicate_tree ();
72
73 _TreeTopologyParseSettings parse_settings = CollectParseSettings ();
74
75 ConditionalTraverser (
76 [&] (node<long>* iterator, node_iterator<long> const& ni) -> bool {
77 _String nodeVS (top->GetBranchValue (iterator)),
78 *nodeSpec = map_node_to_calcnode(iterator)->GetBranchSpec();
79
80 FinalizeNode (iterator, 0, top->GetNodeName (iterator), *nodeSpec, nodeVS, parse_settings);
81 DeleteObject (nodeSpec);
82 return false;
83 },
84 false);
85 isDefiningATree = kTreeNotBeingDefined;
86 } else {
87 HandleApplicationError ("Can't create an empty tree");
88 return;
89 }
90 }
91
92 //_______________________________________________________________________________________________
_TreeTopology(_TreeTopology const & top)93 _TreeTopology::_TreeTopology (_TreeTopology const &top) {
94 //PreTreeConstructor (false);
95 *this = top;
96 }
97
98 //_______________________________________________________________________________________________
operator =(_TreeTopology const & top)99 _TreeTopology const & _TreeTopology::operator = (_TreeTopology const &top) {
100 if (top.theRoot) {
101 theRoot = top.theRoot->duplicate_tree ();
102 flatTree.Duplicate(&top.flatTree),
103 flatCLeaves.Duplicate(&top.flatCLeaves);
104 rooted = top.rooted;
105 } else {
106 HandleApplicationError ("Can't create an empty tree");
107 }
108 return *this;
109 }
110
111
112 //_______________________________________________________________________________________________
_TreeTopology(_String const & name,_String const & parms,bool dupMe,_AssociativeList * mapping)113 _TreeTopology::_TreeTopology (_String const & name, _String const & parms, bool dupMe, _AssociativeList* mapping):_CalcNode (name,kEmptyString)
114 // builds a tree from a string
115 {
116 _TreeTopology::PreTreeConstructor (dupMe);
117 _TreeTopologyParseSettings parse_settings = CollectParseSettings();
118
119 if (_AssociativeList * meta = MainTreeConstructor (parms, parse_settings, false, mapping)) {
120 _TreeTopology::PostTreeConstructor (dupMe, meta);
121 } else {
122 DeleteObject (compExp);
123 compExp = nil;
124 }
125 }
126
127 //_______________________________________________________________________________________________
_TreeTopology(_String const * name)128 _TreeTopology::_TreeTopology (_String const * name):_CalcNode (*name,kEmptyString) {}
129
130 //_______________________________________________________________________________________________
131
~_TreeTopology(void)132 _TreeTopology::~_TreeTopology (void) {
133 if (theRoot) {
134 theRoot->delete_tree();
135 delete (theRoot);
136 theRoot = nil;
137 }
138 if (compExp) {
139 DeleteAndZeroObject(compExp);
140 }
141 }
142
143 //_______________________________________________________________________________________________
144
PreTreeConstructor(bool)145 void _TreeTopology::PreTreeConstructor (bool) {
146 rooted = UNROOTED;
147 compExp = new _Vector;
148 }
149
150 //_______________________________________________________________________________________________
151
PostTreeConstructor(bool make_copy,_AssociativeList * meta)152 void _TreeTopology::PostTreeConstructor (bool make_copy, _AssociativeList * meta) {
153
154 auto variable_handler = [&] (void) -> void {
155 /** TODO SLKP 20171211, make sure the semantics are unchanged */
156 variablePtrs.Replace (get_index(), make_copy ? this->makeDynamic() : this, false);
157 setParameter (WrapInNamespace (_TreeTopology::kMeta, GetName()), meta ? meta : new _MathObject, nil, false);
158
159
160 /*
161 BaseRef temp = variablePtrs(theIndex);
162 variablePtrs[theIndex]=make_copy ? this->makeDynamic() : this;
163 DeleteObject(temp);
164 */
165 };
166
167 bool accept_rooted = EnvVariableTrue(accept_rooted_trees);
168
169 if (theRoot->get_num_nodes() <= 2) { // rooted tree - check
170 if (accept_rooted == false) {
171
172 long node_index = theRoot->get_data();
173
174 bool recurse = false;
175
176 if (theRoot->get_num_nodes() == 2) {
177 for (int i = 1; i<=2; i++) {
178 node<long> *node_temp = theRoot->go_down(i);
179 if (node_temp->get_num_nodes()) { // an internal node - make it a root
180 node_temp->detach_parent();
181 node_temp->add_node(*theRoot->go_down(3-i));
182 delete theRoot;
183 theRoot = node_temp;
184 rooted = i == 1 ? ROOTED_LEFT : ROOTED_RIGHT;
185 ReportWarning (_String("Rooted topology. Removing one branch - the ") & (i==1 ? "left" : "right") & " root child has been promoted to be the new root");
186 break;
187 }
188 }
189
190 if (rooted==UNROOTED) {
191 ReportWarning ("One branch topology supplied - hopefully this IS what you meant to do.");
192 node<long> *node_temp = theRoot->go_down(1);
193 node_temp->detach_parent();
194 node_temp->add_node(*theRoot->go_down(2));
195 delete theRoot;
196 theRoot = node_temp;
197 rooted = ROOTED_LEFT;
198 }
199 } else {
200 if (theRoot->get_num_nodes() == 0) {
201 ReportWarning ("An empty topology has been constructed");
202 variable_handler ();
203 return;
204 }
205 node<long> *node_temp = theRoot->go_down(1);
206 node_temp->detach_parent();
207 delete theRoot;
208 theRoot = node_temp;
209 ReportWarning ("The root has a single child, which is be promoted to the root");
210 recurse = false;
211 }
212
213 flatTree.Delete (node_index);
214 flatCLeaves.Delete (node_index);
215 ((_Vector*)compExp)->Delete (node_index);
216
217 node_iterator<long> tree_iterator (theRoot, _HY_TREE_TRAVERSAL_POSTORDER);
218 while (node<long>*topTraverser = tree_iterator.Next()) {
219 if (topTraverser->get_data () > node_index) {
220 topTraverser->init (topTraverser->get_data () - 1);
221 }
222 }
223
224 if (recurse) {
225 PostTreeConstructor (make_copy, meta);
226 return;
227 }
228 }
229 }
230 variable_handler ();
231
232 }
233
234 //_______________________________________________________________________________________________
235
236 struct _any_char_in_set {
237
238 bool valid [256];
_any_char_in_set_any_char_in_set239 _any_char_in_set (char * accept) {
240 InitializeArray(valid, 256, false);
241 long c = strlen (accept);
242 for (unsigned long i = 0UL; i < c; i++) {
243 valid [accept[i]] = true;
244 }
245 }
246
247
invert_any_char_in_set248 void invert () {
249 ArrayForEach(valid, 256, [] (bool v, unsigned long) -> bool {return !v;});
250 }
251
operator ==_any_char_in_set252 bool operator == (char c) {
253 return valid [(unsigned char)c];
254 }
255
256 };
257
258 //_______________________________________________________________________________________________
259
CollectParseSettings(void)260 const _TreeTopologyParseSettings _TreeTopology::CollectParseSettings (void) {
261 _String const kInternalNodePrefix("INTERNAL_NODE_PREFIX"),
262 kIgnoreUserINames ("IGNORE_INTERNAL_NODE_LABELS");
263
264 _TreeTopologyParseSettings parse_settings;
265
266 parse_settings.auto_convert_lengths = EnvVariableTrue(automatically_convert_branch_lengths);
267 parse_settings.accept_user_lengths = EnvVariableTrue(accept_branch_lengths);
268 parse_settings.ingore_user_inode_names = EnvVariableTrue(kIgnoreUserINames);
269
270 HBLObjectRef user_node_name = EnvVariableGet(kInternalNodePrefix, STRING);
271 if (user_node_name) {
272 parse_settings.inode_prefix = ((_FString*)user_node_name)->get_str();
273 }
274
275 return parse_settings;
276
277 }
278
279 //_______________________________________________________________________________________________
MainTreeConstructor(_String const & parms,_TreeTopologyParseSettings & parse_settings,bool checkNames,_AssociativeList * mapping)280 _AssociativeList* _TreeTopology::MainTreeConstructor (_String const& parms, _TreeTopologyParseSettings & parse_settings, bool checkNames, _AssociativeList* mapping) {
281
282 /** TODO SLKP 20171211 this parser needs to be checked
283 It admits invalid strings like
284 ( a (1,2) , d (4,5), 6)
285 strings like ( (,), (,))
286 are parsed incorrectly
287 */
288
289
290 long i,
291 nodeCount=0,
292 lastNode;
293
294 static _String kBootstrap ("bootstrap"),
295 kComment ("comment");
296
297 static long kBLLookahead = 128;
298
299 _SimpleList nodeStack,
300 nodeNumbers;
301
302 _String nodeParameters,
303 nodeValue,
304 nodeComment,
305 nodeBootstrap;
306
307 _StringBuffer nodeName;
308 _AssociativeList * node_comments = new _AssociativeList;
309
310 _List local_var_manager;
311 local_var_manager < node_comments;
312
313 _any_char_in_set non_space (" \t\r\n"),
314 newick_delimiter (" \t\r\n(),{}[];:");
315 non_space.invert();
316 newick_delimiter.valid [0] = true;
317
318
319 char lastChar = '\0';
320 bool isInLiteral = false;
321
322 node<long>* currentNode = theRoot = nil,
323 * newNode = nil,
324 * parentNode = nil;
325
326 isDefiningATree = kTreeIsBeingParsed;
327
328
329 try {
330 auto push_new_node = [&] (bool make_new) -> void {
331 currentNode = newNode;
332 nodeStack << (long) currentNode;
333 nodeNumbers << nodeCount;
334 nodeCount ++;
335 if (make_new) {
336 newNode = new node <long>;
337 }
338 };
339
340 auto pop_node = [&] () -> void {
341 nodeStack.Pop();
342 nodeNumbers.Pop();
343 //nodeName = kEmptyString;
344 nodeName.Reset();
345 };
346
347 for (i=0; i<parms.length(); i++) {
348
349 char look_at_me = parms.char_at(i);
350
351 if (isspace (look_at_me)) {
352 continue;
353 }
354
355 switch (look_at_me) {
356 case '(': { // creating a new internal node one level down
357 // a new node
358
359 newNode = new node<long>;
360
361 if (lastChar == '(' || lastChar == ',') {
362 if (!currentNode) {
363 delete newNode;
364 throw _String ("Unexpected '") & lastChar & "'";
365 }
366 currentNode->add_node (*newNode);
367 } else {
368 if (theRoot) {
369 parentNode = currentNode->get_parent();
370 if (!parentNode) {
371 throw _String ("Unexpected '") & lastChar & "'";
372 } else {
373 parentNode->add_node (*newNode);
374 }
375 } else {
376 theRoot = newNode;
377 }
378 push_new_node (true);
379 currentNode->add_node (*newNode);
380 }
381 push_new_node (false);
382 break;
383 }
384
385 case ',':
386 case ')': { // creating a new node on the same level and finishes updating the list of parameters
387 if (nodeStack.empty()) {
388 throw _String ("Unexpected '") & look_at_me & "'";
389 }
390 lastNode = nodeStack.countitems ()-1;
391 parentNode = (node<long>*)nodeStack.get (lastNode);
392 if (mapping) {
393 _FString * mapped_name = (_FString*)mapping->GetByKey (nodeName, STRING);
394 if (!mapped_name) {
395 mapped_name = (_FString*)mapping->GetByKey (nodeName.ChangeCase(kStringUpperCase), STRING);
396 }
397 if (mapped_name) {
398 //printf ("%s => %s\n", nodeName.get_str(), mapped_name->get_str().get_str());
399 nodeName = _String (mapped_name->get_str());
400 }
401 }
402
403 // handle bootstrap support for this node
404
405 if (parentNode->get_num_nodes()) {
406 // internal node, check to see if the node name is a bootstrap value
407 _SimpleList numerical_match (nodeName.RegExpMatch(hy_float_regex, 0));
408 if (numerical_match.nonempty() && numerical_match(0) == 0) {
409 nodeBootstrap = nodeName.Cut (0, numerical_match(1));
410 nodeName.Reset();
411 }
412 }
413
414 nodeName = FinalizeNode (parentNode, nodeNumbers.get (lastNode), nodeName, nodeParameters, nodeValue, parse_settings);
415
416 if (nodeComment.nonempty() || nodeBootstrap.nonempty()) {
417 _AssociativeList * node_info = new _AssociativeList;
418 if (nodeComment.nonempty()) {
419 if (node_info->MStore(&kComment, new _FString (nodeComment), false)) {
420 kComment.AddAReference();
421 // SLKP 20190507: this is not thread safe
422 }
423 }
424 if (nodeBootstrap.nonempty()) {
425 if (node_info->MStore(&kBootstrap, new _Constant (nodeBootstrap.to_float()), false)) {
426 kBootstrap.AddAReference();
427 // SLKP 20190507: this is not thread safe
428 }
429 }
430 _String * dict_key = new _String (nodeName);
431 if (!node_comments->MStore (dict_key, node_info, false)) {
432 delete dict_key;
433 }
434 }
435
436 nodeParameters.Clear();
437 nodeComment.Clear();
438 nodeBootstrap.Clear();
439 pop_node ();
440
441 if (look_at_me == ',') { // also create a new node on the same level
442 newNode = new node<long>;
443 if (!(parentNode = parentNode->get_parent())) {
444 throw _String ("Unexpected '") & look_at_me & "'";
445 }
446 push_new_node (false);
447 parentNode->add_node(*currentNode);
448 }
449 break;
450 }
451
452 case '{' : { // parameter list definition
453 long closing_brace = parms.FindTerminator(i+1, '}');
454
455 if (closing_brace<0) {
456 throw _String ("'{' has no matching '}'");
457 }
458 nodeParameters = parms.Cut (i+1,closing_brace-1);
459 i = closing_brace;
460 break;
461 }
462
463 case '[' : { // hackish Newick annotation
464 long closing_bracket = parms.FindTerminator(i+1, ']');
465 if (closing_bracket<0) {
466 throw _String ("'[' has no matching ']'");
467 }
468 nodeComment = parms.Cut (i+1,closing_bracket-1);
469 i = closing_bracket;
470 break;
471 }
472
473 case ':' : { // tree branch definition
474
475 // TODO SLKP 20171211: confirm that this will handle expressions with spaces like '..): 0.1'
476 /*_SimpleList numerical_match (parms.RegExpMatch(hy_float_regex, i+1));
477 if (numerical_match.empty()) {
478 throw _String("Failed to read a number for the branch length following ':'");
479 }
480 nodeValue = parms.Cut (numerical_match(0), numerical_match(1));
481 i = numerical_match(1);*/
482
483 /** SLKP 20200813
484 sscanf is going to call strlen every time, which on long strings could be QUITE expensive!
485 to limit the cost of this, we are going to assume that the float, if it's there is present within kBLLookahead characters of the current position, copy out the next kBLLookahead chars, and read from there.
486 */
487
488 int end_at;
489 hyFloat length = 0.;
490
491 if (i + kBLLookahead < parms.length()) {
492 char buffer [kBLLookahead+1];
493 memcpy (buffer, parms.get_str() + i + 1, kBLLookahead);
494 buffer [kBLLookahead] = 0;
495 if (sscanf (buffer, "%lf%n", &length, &end_at) != 1) {
496 throw _String("Failed to read a number for the branch length following ':'");
497 }
498 } else {
499 if (sscanf (parms.get_str()+i+1, "%lf%n", &length, &end_at) != 1) {
500 throw _String("Failed to read a number for the branch length following ':'");
501 }
502 }
503 nodeValue = parms.Cut (i+1, i+end_at);
504 i += end_at;
505 break;
506 }
507
508 case ';' : { // done with tree definition
509 break;
510 }
511
512 default: { // node name
513
514 if (nodeName.nonempty()) {
515 throw _String ("Unexpected node name");
516 }
517
518 long end_of_id = parms.ExtractEnclosedExpression(i, non_space, newick_delimiter, fExtractRespectQuote | fExtractRespectEscape | fExtractOneLevelOnly);
519 if (end_of_id == kNotFound) {
520 throw _String ("Unexpected end of tree string while searching for a node name");
521 } else {
522 nodeName = parms.Cut (i, end_of_id-1);
523 nodeName.StripQuotes("'\"","'\"");
524 }
525
526 i = end_of_id - 1;
527 break;
528 }
529 }
530
531 lastChar = look_at_me;
532 }
533 } catch (const _String& error) {
534 isDefiningATree = kTreeNotBeingDefined;
535 HandleApplicationError ( error & ", in the following string context " &
536 parms.Cut(i>31L?i-32L:0L,i)&
537 " <ERROR HERE> "&
538 parms.Cut(i+1L,parms.length()-i>32L?i+32L:-1L)
539 );
540
541 return nil;
542 }
543
544 if (!theRoot) {
545 isDefiningATree = kTreeNotBeingDefined;
546 HandleApplicationError ("Can't create empty trees.");
547 return nil;
548 }
549
550 if (nodeStack.countitems() > 1) {
551 HandleApplicationError ("Unbalanced () in tree string");
552 }
553 if (nodeStack.countitems() == 1) {
554 parentNode = (node<long>*)nodeStack(0);
555 if (mapping) {
556 _FString * mapped_name = (_FString*)mapping->GetByKey (nodeName, STRING);
557 if (mapped_name) {
558 nodeName = _String(mapped_name->get_str());
559 }
560 }
561 FinalizeNode (parentNode, nodeNumbers.get (lastNode), nodeName, nodeParameters, nodeValue, parse_settings);
562 }
563
564 isDefiningATree = kTreeNotBeingDefined;
565
566 //printf ("%s\n", _String ((_String*)node_comments->toStr()).get_str());
567 node_comments->AddAReference();
568 return node_comments;
569
570 }
571
572 //_______________________________________________________________________________________________
573
FinalizeNode(node<long> * nodie,long number,_String nodeName,_String const & nodeParameters,_String & nodeValue,_TreeTopologyParseSettings const & settings)574 _String _TreeTopology::FinalizeNode (node<long>* nodie, long number , _String nodeName, _String const& nodeParameters, _String& nodeValue, _TreeTopologyParseSettings const& settings) {
575
576 if (nodeName.empty() || (settings.ingore_user_inode_names && nodie->get_num_nodes()>0)) {
577 nodeName = settings.inode_prefix & number;
578 }
579
580 _String node_parameters = nodeParameters;
581
582 if (nodie==theRoot) {
583 node_parameters = kEmptyString;
584 nodeValue = kEmptyString;
585 }
586
587 nodie->in_object = flatTree.countitems();
588 flatTree && & nodeName;
589 flatCLeaves.AppendNewInstance (new _String (&node_parameters, false));
590
591 ((_Vector*)compExp)->Store (ProcessTreeBranchLength(nodeValue));
592
593 node_parameters.Clear();// = kEmptyString;
594 nodeValue.Clear(); // = kEmptyString;
595
596 return nodeName;
597 }
598
599 //_______________________________________________________________________________________________
600
FindNodeByName(_String const * match) const601 node<long>* _TreeTopology::FindNodeByName (_String const* match) const {
602 node_iterator<long> ni (theRoot, _HY_TREE_TRAVERSAL_POSTORDER);
603 while (node<long>* iterator = ni.Next()) {
604 if (*match == GetNodeName (iterator)) {
605 return iterator;
606 }
607 }
608 return nil;
609
610 }
611
612 //_______________________________________________________________________________________________
613
_RemoveNodeList(_SimpleList const & clean_indices)614 void _TreeTopology::_RemoveNodeList (_SimpleList const& clean_indices) {
615 flatTree.DeleteList(clean_indices);
616 flatCLeaves.DeleteList(clean_indices);
617
618
619 node_iterator<long> ni (theRoot, _HY_TREE_TRAVERSAL_POSTORDER);
620
621 while (node<long>* iterator = ni.Next()) {
622 if ((iterator->in_object = clean_indices.CorrectForExclusions(iterator->in_object, -1)) < 0L) {
623 throw _String ("Internal error: index remap failed");
624 }
625 }
626 }
627
628 //_______________________________________________________________________________________________
629
RemoveANode(HBLObjectRef nodeName)630 void _TreeTopology::RemoveANode (HBLObjectRef nodeName) {
631
632 // TODO SLKP 20171213; why did the original implementation delete the entire path
633 // up to the root?
634
635 try {
636 _SimpleList clean_indices;
637
638 auto RemoveNodeByName = [this, &clean_indices] (_FString* remove_me) -> void {
639
640 node<long>* remove_this_node = FindNodeByName (&remove_me->get_str()),
641 * parent_of_removed_node;
642
643 if (!remove_this_node || ( parent_of_removed_node = remove_this_node->get_parent()) == nil) {
644 throw _String ("Node ") & remove_me->get_str().Enquote() & " not found in the tree or is the root node";
645 }
646
647
648 while (parent_of_removed_node) {
649 clean_indices << remove_this_node->in_object;
650 // printf ("Removing %s\n", GetNodeName(remove_this_node).get_str());
651 parent_of_removed_node->detach_child(remove_this_node->get_child_num());
652
653
654 for (int orphans = 1; orphans <= remove_this_node->get_num_nodes(); orphans++) {
655 parent_of_removed_node->add_node(*remove_this_node->go_down(orphans));
656 }
657
658 delete remove_this_node;
659 remove_this_node = parent_of_removed_node;
660 parent_of_removed_node = parent_of_removed_node->get_parent();
661
662 // SLKP 20171213: only delete nodes up the chain if they have a single child
663 if (remove_this_node->get_num_nodes() == 1) {
664 if (parent_of_removed_node == nil) {
665 /* we are promoting the single remaining child of the current root to be the root */
666 theRoot = remove_this_node->go_down (1);
667 theRoot->detach_parent();
668 clean_indices << remove_this_node->in_object;
669 delete remove_this_node;
670
671
672 //parent_of_removed_node = remove_this_node->get_parent();
673 break;
674 }
675 } else {
676 break;
677 }
678 }
679 };
680
681 if (nodeName->ObjectClass () == STRING) {
682 RemoveNodeByName ((_FString*)nodeName);
683 clean_indices.Sort();
684 _RemoveNodeList (clean_indices);
685
686 } else if (nodeName->ObjectClass () == MATRIX) {
687 _Matrix * names = (_Matrix* ) nodeName;
688 if (names->IsAStringMatrix()) {
689 names->ForEach([RemoveNodeByName] (HBLObjectRef n, unsigned long, unsigned long) -> void {if (n) RemoveNodeByName ((_FString*)n);},
690 [names] (unsigned long i) -> HBLObjectRef {return names->GetFormula(i, -1)->Compute();});
691 } else {
692 throw _String ("Matrix-valued argument was expected to contain strings");
693
694 }
695 } else {
696 throw _String ("An invalid argument (not a string or a string matrix) supplied");
697 }
698 } catch (const _String& err) {
699 HandleApplicationError (err & " in " & __PRETTY_FUNCTION__);
700 }
701
702 }
703
704
705 //_______________________________________________________________________________________________
706
AddANode(HBLObjectRef newNode)707 void _TreeTopology::AddANode (HBLObjectRef newNode) {
708
709 static const _String kNewNodeGraftName ("NAME"),
710 kNewNodeGraftWhere ("WHERE"),
711 kNewNodeGraftParent ("PARENT"),
712 kNewNodeGraftLength ("LENGTH"),
713 kNewNodeGraftParentLength ("PARENT_LENGTH");
714
715 try {
716
717 if (newNode->ObjectClass () == ASSOCIATIVE_LIST) {
718
719 _AssociativeList * newNodeSpec = (_AssociativeList*)newNode;
720 _FString * newName = (_FString*)newNodeSpec->GetByKey (kNewNodeGraftName, STRING),
721 * newLocation = (_FString*)newNodeSpec->GetByKey (kNewNodeGraftWhere, STRING),
722 * newParent = (_FString*)newNodeSpec->GetByKey (kNewNodeGraftParent, STRING);
723
724 _Constant * branchLengthSelf = (_Constant*)newNodeSpec->GetByKey (kNewNodeGraftLength, NUMBER),
725 * branchLengthParent = (_Constant*)newNodeSpec->GetByKey (kNewNodeGraftParentLength, NUMBER);
726
727
728 if (!newLocation) {
729 throw _String("Missing/invalid mandatory argument ") & kNewNodeGraftWhere.Enquote();
730 }
731
732
733 if (! (newName || newParent)) {
734 throw _String("Either ") & kNewNodeGraftName.Enquote() & " or " & kNewNodeGraftParent.Enquote() & " (or both) must be defined";
735 }
736
737 node<long>* graftAt = FindNodeByName (&newLocation->get_str());
738 if (!graftAt || graftAt->get_parent() == nil) {
739 throw _String ("Attachment node must be an exiting non-root node ");
740 }
741
742 node<long>* newp = newParent ? new node<long> : nil,
743 * curp = graftAt->get_parent();
744
745 if (newp) {
746 newp->set_parent (*curp);
747 newp->add_node (*graftAt);
748 curp->replace_node(graftAt,newp);
749 }
750
751 if (newName && !newName->empty()) {
752 node<long>* newt = new node<long>;
753 if (newp) {
754 newp->add_node(*newt);
755 } else {
756 graftAt->add_node (*newt);
757 }
758 _String bl (branchLengthSelf ? branchLengthSelf->Value() : kEmptyString);
759 FinalizeNode (newt, 0, newName->get_str(), kEmptyString, bl, CollectParseSettings());
760 }
761
762 if (newp && ! newParent->empty()) {
763 _String bl (branchLengthParent ? branchLengthParent->Value() : kEmptyString);
764 FinalizeNode (newp, 0, newParent->get_str(), kEmptyString, bl, CollectParseSettings());
765 }
766
767 } else {
768 throw _String ("An invalid argument (not an associative array) supplied");
769 }
770 } catch (const _String& err) {
771 HandleApplicationError (err & " in " & __PRETTY_FUNCTION__);
772 }
773
774 }
775 //_______________________________________________________________________________________________
776
makeDynamic(void) const777 BaseRef _TreeTopology::makeDynamic (void) const
778 {
779 _TreeTopology* res = new _TreeTopology;
780 res->_CalcNode::Duplicate (this);
781 res->flatTree.Duplicate (&flatTree);
782 res->flatCLeaves.Duplicate (&flatCLeaves);
783 if (compExp) {
784 res->compExp = (_Matrix*)compExp->makeDynamic();
785 } else {
786 res->compExp = nil;
787 }
788
789 res->rooted = rooted;
790 res->theRoot = CopyTreeStructure (theRoot,true);
791 return res;
792 }
793
794
795 //_______________________________________________________________________________________________
796
CopyTreeStructure(node<long> * theNode,bool) const797 node<long>* _TreeTopology::CopyTreeStructure (node <long>* theNode, bool) const {
798 node<long>* locNode = new node<long>;
799 for (long i=1L; i<=theNode->get_num_nodes(); i++) {
800 locNode->add_node(*CopyTreeStructure (theNode->go_down(i), false));
801 }
802 locNode->init (theNode->in_object);
803 return locNode;
804 }
805
806 //_______________________________________________________________________________________________
807
GetNodeName(node<long> * n,bool fullName) const808 const _String _TreeTopology::GetNodeName (node<long>* n, bool fullName) const {
809 if (fullName) {
810 return *GetName()&'.'&*(_String*)(flatTree.GetItem(n->in_object));
811 }
812 return *(_String*)(flatTree.GetItem (n->in_object));
813 }
814
815
816 //_______________________________________________________________________________________________
817
GetNodeModel(node<long> * n) const818 _String const* _TreeTopology::GetNodeModel (node<long>* n) const {
819 return (_String*)flatCLeaves.GetItem(n->in_object);
820 }
821
822 //_______________________________________________________________________________________________
823
MapNodesToModels(void)824 _List* _TreeTopology::MapNodesToModels (void) {
825 _List* map = new _List;
826
827 node_iterator<long> ni (theRoot, _HY_TREE_TRAVERSAL_POSTORDER);
828
829 while (node<long>* iterator = ni.Next()) {
830 if (iterator->is_root()) {
831 break;
832 }
833 _List * node_record = new _List;
834 (*node_record) < new _String (GetNodeName(iterator)) < new _String(*GetNodeModel(iterator));
835 (*map) < node_record;
836 }
837 return map;
838 }
839
840 //_______________________________________________________________________________________________
841
GetNodeStringForTree(node<long> * n,int flags) const842 _String const _TreeTopology::GetNodeStringForTree (node<long> * n , int flags) const {
843
844 _String node_desc;
845
846 if (flags & fGetNodeStringForTreeName) {
847 node_desc = GetNodeName (n);
848 // check to see if the node name has any chars that are a part of the Newick spec, and if so, enquote the string
849 _any_char_in_set newick_delimiter (" (),{}[];:'\"");
850 if (node_desc.Any ([&newick_delimiter](char c, unsigned long i) -> bool {
851 return newick_delimiter==c;
852 }
853 ) != kNotFound) {
854 node_desc = _StringBuffer ().SanitizeAndAppend(node_desc).Enquote('"');
855 }
856
857 }
858
859 if (flags & fGetNodeStringForTreeModel) {
860 _String const *mSpec = GetNodeModel (n);
861 if (mSpec->nonempty()) {
862 node_desc = node_desc & mSpec->Enquote('{','}');
863 }
864 }
865 return node_desc;
866 }
867
868 //_______________________________________________________________________________________________
869
toStr(unsigned long)870 BaseRef _TreeTopology::toStr (unsigned long) {
871
872 static const _String kNoInternalLabels ("NO_INTERNAL_LABELS");
873
874 _StringBuffer * res = new _StringBuffer((unsigned long)128),
875 num;
876
877 bool skip_internal_labels = EnvVariableTrue(kNoInternalLabels),
878 include_model_info = EnvVariableTrue(include_model_spec);
879
880
881 int leaf_flag = fGetNodeStringForTreeName,
882 inode_flag = skip_internal_labels ? 0 : fGetNodeStringForTreeName;
883
884 if (include_model_info) {
885 leaf_flag |= fGetNodeStringForTreeModel;
886 inode_flag |= fGetNodeStringForTreeModel;
887 }
888
889 if (IsDegenerate ()) {
890 node_iterator<long> ni (theRoot, _HY_TREE_TRAVERSAL_POSTORDER);
891 (*res)<<'(';
892
893 while (node<long>* iterator = ni.Next()) {
894 (*res) << GetNodeStringForTree (iterator, leaf_flag )
895 << (iterator->is_root() ? ')' : ',');
896 }
897 } else {
898
899 long level = 0L,
900 lastLevel = 0L;
901
902 node_iterator<long> ni (theRoot, _HY_TREE_TRAVERSAL_POSTORDER);
903
904 node<long>* curNode = ni.Next(),
905 * nextNode;
906
907 level = ni.Level();
908
909 nextNode = ni.Next();
910
911 while (nextNode) {
912 if (level>lastLevel) {
913 if (lastLevel) {
914 (*res)<<',';
915 }
916 res->AppendNCopies('(', level-lastLevel);
917 } else if (level<lastLevel) {
918 res->AppendNCopies(')', lastLevel-level);
919 } else {
920 (*res)<<',';
921 }
922
923 (*res) << GetNodeStringForTree (curNode, curNode->is_leaf() ? leaf_flag : inode_flag);
924
925
926 lastLevel = level;
927 curNode = nextNode;
928 level = ni.Level();
929 nextNode = ni.Next();
930 }
931
932 res->AppendNCopies(')', lastLevel-level);
933 }
934 (*res)<<';';
935 res->TrimSpace();
936 return res;
937 }
938
939 //__________________________________________________________________________________
toFileStr(FILE * f,unsigned long padding)940 void _TreeTopology::toFileStr(FILE* f, unsigned long padding) {
941 _String * s = (_String*)toStr(padding);
942 fprintf (f, "%s", s->get_str());
943 DeleteObject(s);
944 }
945
946
947 //__________________________________________________________________________________
948
ExecuteSingleOp(long opCode,_List * arguments,_hyExecutionContext * context,HBLObjectRef cache)949 HBLObjectRef _TreeTopology::ExecuteSingleOp (long opCode, _List* arguments, _hyExecutionContext* context,HBLObjectRef cache) {
950 const static _String kSplitNodeNames ("SPLIT_NODE_NAMES");
951
952 switch (opCode) { // first check operations without arguments
953 case HY_OP_CODE_ABS: // Abs
954 return FlatRepresentation(cache);
955 case HY_OP_CODE_BRANCHCOUNT: //BranchCount
956 return BranchCount(cache);
957 case HY_OP_CODE_TIPCOUNT: // TipCount
958 return TipCount(cache);
959 case HY_OP_CODE_TYPE: // Type
960 return Type(cache);
961 }
962
963 _MathObject * arg0 = _extract_argument (arguments, 0UL, false);
964
965 try {
966 switch (opCode) { // next check operations without arguments or with one argument
967 case HY_OP_CODE_ADD: // +
968 if (!arg0) {
969 return Sum(cache);
970 }
971 AddANode (arg0);
972 return _returnConstantOrUseCache(0.0, cache);
973
974 case HY_OP_CODE_SUB:
975 if (!arg0) {
976 return new _MathObject;
977 }
978 RemoveANode (arg0);
979 return _returnConstantOrUseCache(0.0, cache);
980 }
981
982 if (arg0) {
983 switch (opCode) { // operations that require exactly one argument
984 case HY_OP_CODE_MUL: // compute the strict consensus between T1 and T2
985 return SplitsIdentity (arg0, cache);
986
987 case HY_OP_CODE_LEQ: { // MatchPattern (<=)
988
989 if (arg0->ObjectClass()!=TREE && arg0->ObjectClass()!=TOPOLOGY) {
990 throw _String ("Invalid (not a tree/topology) 2nd argument is call to <= (MatchPattern) for trees/topologies.");
991 }
992 _String res (((_TreeTopology*)arg0)->MatchTreePattern (this));
993 return _returnConstantOrUseCache (!res.BeginsWith("Unequal"), cache);
994 }
995 case HY_OP_CODE_EQ: // ==
996 return Compare(arg0,cache);
997 case HY_OP_CODE_BRANCHLENGTH: //BranchLength
998 return BranchLength(arg0,cache);
999 case HY_OP_CODE_BRANCHNAME: //BranchName
1000 return TreeBranchName(arg0,false,nil,cache);
1001 case HY_OP_CODE_TIPNAME: //BranchName
1002 return TipName(arg0,cache);
1003 case HY_OP_CODE_MIN: // COT (Min)
1004 return FindCOT (arg0,cache);
1005 case HY_OP_CODE_MAX: // Maximum parsimony
1006 return MaximumParsimony (arg0,cache);
1007 case HY_OP_CODE_REROOTTREE: // RerootTree
1008 return RerootTree(arg0, cache);
1009 case HY_OP_CODE_POWER: //^
1010 return AVLRepresentation (arg0, cache);
1011 case HY_OP_CODE_RANDOM:
1012 return RandomizeTips (arg0, cache);
1013 case HY_OP_CODE_IDIV: { // Split ($) - 2nd argument
1014 if (arg0->ObjectClass()!=NUMBER) {
1015 throw _String ("Invalid (not a number) 2nd argument is call to $ (split)for trees.");
1016 }
1017
1018 _List ref_manager;
1019
1020 _Constant* cc = (_Constant*)TipCount(nil);
1021
1022 ref_manager < cc;
1023
1024 long size = cc->Value()/arg0->Value();
1025
1026 if (size<=4 || size * 2 >cc->Value()) {
1027 throw _String("Poor choice of the 2nd numeric argument in to $ (split) for tree. Either the resulting cluster size is too big(>half of the tree), or too small (<4)!");
1028 }
1029
1030 long checkSize = 1L,
1031 tol = 0L;
1032
1033 while (tol<size-2) {
1034 _List* resL = SplitTreeIntoClusters (size,tol);
1035
1036 ref_manager < resL;
1037
1038 checkSize = cc->Value();
1039
1040 if (resL->nonempty()) {
1041 _Matrix* mRes = new _Matrix (resL->lLength, 2, false, true);
1042 resL->ForEach(
1043 [&] (BaseRefConst cluster, unsigned long index) -> void {
1044 _List* cluster_list = (_List*)cluster;
1045 long node_count = ((_Constant*)cluster_list->GetItem(1))->Value();
1046 mRes->Store (index,0, node_count);
1047 mRes->Store (index,1, -2L + cluster_list->countitems());
1048 }
1049 );
1050
1051
1052
1053 if (checkSize == 0UL) {
1054 _List sorted_list;
1055 resL->ForEach ([&] (BaseRefConst list, unsigned long) -> void {
1056 sorted_list << ((const _List*)list)->GetItem(0L);
1057 });
1058 sorted_list.Sort();
1059 EnvVariableSet (kSplitNodeNames, new _Matrix (sorted_list, false), false);
1060 return mRes;
1061 }
1062
1063 DeleteObject (mRes);
1064 }
1065 tol ++;
1066 }
1067 return new _Matrix (1,1,false, true);
1068 }
1069 }
1070
1071 _MathObject * arg1 = _extract_argument (arguments, 1UL, false);
1072
1073 switch (opCode) {
1074 case HY_OP_CODE_MACCESS: // MAccess
1075 return TreeBranchName (arg0,true, arg1, cache);
1076 }
1077
1078
1079 if (arg1) {
1080 switch (opCode) {
1081 case HY_OP_CODE_FORMAT: { // Format
1082 _StringBuffer *tStr = new _StringBuffer (1024UL);
1083 SubTreeString (theRoot, *tStr, CollectParseSettings(), arg0->Compute()->Value() > 0.1 , arg1->Compute()->Value() > 0.1 ? kTopologyBranchLengthExpectedSubs : kTopologyBranchLengthNone, -1, nil);
1084 tStr->TrimSpace();
1085 return _returnStringOrUseCache(tStr,cache);
1086 }
1087
1088 }
1089 }
1090
1091 }
1092
1093 switch (opCode) {
1094 case HY_OP_CODE_MUL: // compute the strict consensus between T1 and T2
1095 case HY_OP_CODE_LEQ: // MatchPattern (<=)
1096 case HY_OP_CODE_EQ: // ==
1097 case HY_OP_CODE_BRANCHLENGTH: //BranchLength
1098 case HY_OP_CODE_BRANCHNAME: //BranchName
1099 case HY_OP_CODE_MIN: // COT (Min)
1100 case HY_OP_CODE_REROOTTREE: // RerootTree
1101 case HY_OP_CODE_POWER: //^
1102 case HY_OP_CODE_IDIV: // Split ($) - 2nd argument
1103 case HY_OP_CODE_MACCESS: // MAccess
1104 case HY_OP_CODE_FORMAT: // Format
1105 WarnWrongNumberOfArguments (this, opCode,context, arguments);
1106 break;
1107 default:
1108 WarnNotDefined (this, opCode,context);
1109 }
1110 } catch (const _String& err) {
1111 context->ReportError(err);
1112 }
1113
1114 return new _MathObject;
1115 }
1116
1117
1118 //_______________________________________________________________________________________________
1119
RetrieveNodeNames(bool doTips,bool doInternals,int traversalType) const1120 const _List _TreeTopology::RetrieveNodeNames (bool doTips, bool doInternals, int traversalType) const {
1121 _List result;
1122
1123 node_iterator<long> ni (theRoot, traversalType);
1124
1125 while (node<long> * iterator = ni.Next()) {
1126 bool is_leaf = iterator->is_leaf();
1127 if (is_leaf && doTips || doInternals && !is_leaf) {
1128 result < new _String (GetNodeName(iterator));
1129 }
1130 }
1131
1132 return result;
1133 }
1134
1135 //__________________________________________________________________________________
1136
FindCOTHelper(node<long> * aNode,long parentIndex,_Matrix & distances,_Matrix & rootDistances,_Matrix & branchLengths,_List & childLists,_AVLListX & addressToIndexMap2,hyFloat d)1137 void _TreeTopology::FindCOTHelper (node<long>* aNode, long parentIndex, _Matrix& distances, _Matrix& rootDistances, _Matrix& branchLengths, _List& childLists, _AVLListX& addressToIndexMap2, hyFloat d)
1138 {
1139
1140 /** TODO: SLKP 20171218, the whole FindCOT stack needs review **/
1141
1142 long myIndex = addressToIndexMap2.GetDataByKey((BaseRef)aNode),
1143 leafCount = distances.GetVDim();
1144
1145
1146 _SimpleList * childLeaves = (_SimpleList*)childLists(myIndex);
1147
1148 _Matrix * lookup = parentIndex>=0?&distances:&rootDistances;
1149
1150 if (parentIndex < 0L) {
1151 parentIndex = 0L;
1152 }
1153
1154 long ci2 = 0L;
1155
1156 hyFloat myLength = branchLengths.get_direct(myIndex);
1157
1158 for (long ci = 0L; ci < leafCount; ci++) {
1159 if (ci == childLeaves->list_data[ci2]) {
1160 if (ci2 < childLeaves->lLength - 1) {
1161 ci2++;
1162 }
1163 } else {
1164 distances.Store (myIndex, ci, (*lookup)(parentIndex,ci) + myLength);
1165 }
1166 }
1167
1168
1169 for (long ci3 = aNode->get_num_nodes(); ci3; ci3--) {
1170 FindCOTHelper (aNode->go_down (ci3), myIndex, distances, rootDistances, branchLengths, childLists, addressToIndexMap2, myLength);
1171 }
1172 }
1173
1174 //__________________________________________________________________________________
1175
FindCOTHelper2(node<long> * aNode,_Matrix & branchSpans,_Matrix & branchLengths,_AVLListX & addressToIndexMap2,node<long> * referrer,hyFloat d)1176 void _TreeTopology::FindCOTHelper2 (node<long>* aNode, _Matrix& branchSpans, _Matrix& branchLengths, _AVLListX& addressToIndexMap2, node<long>* referrer, hyFloat d)
1177 {
1178 /** TODO: SLKP 20171218, the whole FindCOT stack needs review **/
1179 long myIndex = aNode->parent?addressToIndexMap2.GetXtra(addressToIndexMap2.Find((BaseRef)aNode)):-1;
1180 hyFloat myLength = myIndex>=0?branchLengths.theData [myIndex]:0.0;
1181
1182 for (long ci = aNode->get_num_nodes(); ci; ci--) {
1183 node <long>* daChild = aNode->go_down (ci);
1184 if (daChild != referrer) {
1185 FindCOTHelper2 (daChild, branchSpans, branchLengths, addressToIndexMap2, nil, d+myLength);
1186 }
1187 }
1188 if (aNode->parent) { // not the root
1189 if (d>=0.0) {
1190 branchSpans.Store (myIndex,0,d);
1191 } else {
1192 branchSpans.Store (myIndex,0,0.);
1193 }
1194
1195 branchSpans.Store (myIndex,1,d+myLength);
1196 if (referrer) {
1197 FindCOTHelper2 (aNode->parent, branchSpans, branchLengths, addressToIndexMap2, aNode, d+myLength);
1198 }
1199 }
1200 }
1201
1202 //__________________________________________________________________________________
1203
MaximumParsimony(HBLObjectRef parameters,HBLObjectRef cache)1204 HBLObjectRef _TreeTopology::MaximumParsimony (HBLObjectRef parameters,HBLObjectRef cache) {
1205 static const _String
1206 kMPScore ("score"),
1207 kMPLabels ("labels"),
1208 kMPOutNodeScore ("node-scores"),
1209 kMPOutSubstitutions ("substitutions");
1210
1211 try {
1212 CheckArgumentType(parameters, ASSOCIATIVE_LIST, true);
1213 _AssociativeList * arguments = (_AssociativeList *)parameters;
1214
1215 _AssociativeList * labels = (_AssociativeList *)arguments->GetByKeyException(kMPLabels, ASSOCIATIVE_LIST);
1216 //* scores = (_AssociativeList *)arguments->GetByKey(kMPScore, ASSOCIATIVE_LIST);
1217
1218 _List id2name; // integer label -> string label
1219
1220 _Trie unique_labels, // label -> unique integer ID
1221 mapped_labels; // node_name -> integer label
1222
1223 _Matrix * score_matrix = nil;
1224
1225 auto score_pair = [&] (unsigned long from, unsigned long to) -> hyFloat {
1226 if (score_matrix) {
1227 return (*score_matrix)(from,to);
1228 }
1229 return from == to ? 0.0 : 1.0;
1230 };
1231
1232 for (AVLListXLIteratorKeyValue key_value : labels->ListIterator()) {
1233 HBLObjectRef node_label = (HBLObjectRef)key_value.get_object();
1234 _String const * node_name = key_value.get_key();
1235 if (node_label->ObjectClass() != STRING) {
1236 throw _String ("Not a string-valued label for node ") & node_name->Enquote();
1237 }
1238 _String const * label_value = &((_FString*)node_label)->get_str();
1239 bool did_insert = false;
1240 long label_index = unique_labels.GetValue (unique_labels.InsertExtended (*label_value, unique_labels.countitems(), false, &did_insert));
1241 if (did_insert) {
1242 id2name < new _String (*label_value);
1243 }
1244 mapped_labels.Insert (*node_name, label_index);
1245 }
1246
1247 unsigned long unique_label_count = unique_labels.countitems();
1248
1249 _Vector conditional_scores;
1250 _SimpleList conditional_labels;
1251
1252 /*
1253 for each node, in post-order traversal, these lists store K (== unique_label_count)
1254 in a serialized vector (for scores), and list (for integer labels)
1255 */
1256
1257 _SimpleList has_labels;
1258 /*
1259 for each node, in post-order traversal
1260 stores whether or not the subtree starting at this node can be fully labeled
1261 (-1 : not labeled)
1262 only such trees are used for labeling
1263 */
1264
1265 _SimpleList store_values;
1266 /*
1267 store node<long> values, because they will be overwritten
1268 temporarily during MP inference
1269 */
1270
1271 node_iterator<long> ni (theRoot, _HY_TREE_TRAVERSAL_POSTORDER);
1272
1273 // perform the first pass to
1274
1275 long post_order_index = 0L;
1276
1277 while (node<long>* iterator = ni.Next()) {
1278
1279 if (iterator->is_leaf()) {
1280
1281 long index = mapped_labels.FindKey (GetNodeName (iterator));
1282 if (index >= 0) {
1283 index = mapped_labels.GetValue(index);
1284 for (unsigned long parent_state = 0UL; parent_state < unique_label_count; parent_state++) {
1285 conditional_scores << score_pair (parent_state, index);
1286 }
1287 } else {
1288 conditional_scores.AppendRange(unique_label_count, 0., 0.);
1289 }
1290 has_labels << index;
1291 conditional_labels.AppendRange(unique_label_count, index, 0);
1292 } else {
1293 conditional_labels.AppendRange(unique_label_count, 0, 0);
1294 conditional_scores.AppendRange(unique_label_count, 0., 0.);
1295 bool node_has_labels = true;
1296 for (unsigned long i = 1UL; i <= iterator->get_num_nodes(); i++) {
1297 node_has_labels = node_has_labels && (has_labels.get (iterator->go_down(i)->in_object) >= 0);
1298 }
1299 has_labels << (node_has_labels ? 1 : -1);
1300 }
1301 store_values << iterator->in_object;
1302 iterator->in_object = post_order_index++;
1303 }
1304
1305 ni.Reset(theRoot);
1306
1307 while (node<long>* iterator = ni.Next()) {
1308 long my_index = iterator->in_object;
1309 long label = has_labels.get (my_index);
1310 if (label >= 0) {
1311 if (!iterator->is_leaf()) {
1312 long offset = my_index * unique_label_count;
1313 for (unsigned long parent_state = 0UL; parent_state < unique_label_count; parent_state++) {
1314 hyFloat best_score = 1.e100;
1315 long best_state = -1L;
1316
1317 for (unsigned long my_state = 0UL; my_state < unique_label_count; my_state++) {
1318 hyFloat running_score = 0.;
1319 for (unsigned long i = 1UL; i <= iterator->get_num_nodes(); i++) {
1320 long child_postorder_offset = iterator->go_down(i)->in_object * unique_label_count;
1321 running_score += conditional_scores.get(0, child_postorder_offset + my_state);
1322 }
1323 if (StoreIfLess (best_score, running_score + score_pair (parent_state, my_state))) {
1324 best_state = my_state;
1325 }
1326 }
1327 conditional_labels [offset+parent_state] = best_state;
1328 conditional_scores [offset+parent_state] = best_score;
1329 }
1330 }
1331 }
1332 }
1333
1334 node_iterator<long> ni_pre (theRoot, _HY_TREE_TRAVERSAL_PREORDER);
1335
1336 hyFloat total_score = 0.;
1337 _AssociativeList * inferred_labels = new _AssociativeList;
1338 _AssociativeList * inferred_substitutions = new _AssociativeList;
1339 _AssociativeList * node_scores = new _AssociativeList;
1340
1341
1342
1343 while (node<long>* iterator = ni_pre.Next()) {
1344 long my_index = iterator->in_object;
1345 long label = has_labels.get (my_index);
1346
1347
1348 if (!iterator->is_leaf () && label >= 0) { // labeled node
1349 long my_label = 0L;
1350 hyFloat best_score = 1.e100;
1351 if (!iterator->parent || has_labels.get (iterator->parent->in_object) < 0) {
1352 // either a root or not a part of the labeled tree
1353
1354 long offset = my_index * unique_label_count;
1355 for (unsigned long k = 0UL; k < unique_label_count; k++) {
1356 if (StoreIfLess (best_score, conditional_scores.get (0,offset + k))) {
1357 my_label = k;
1358 }
1359 }
1360
1361 total_score +=best_score;
1362
1363 } else {
1364 // read off the label for this node based on the state of the parent
1365 my_label = conditional_labels.get (my_index * unique_label_count + has_labels.get (iterator->parent->in_object));
1366 best_score = conditional_scores.get (my_index * unique_label_count + has_labels.get (iterator->parent->in_object),0);
1367
1368 }
1369 has_labels[my_index] = my_label;
1370 iterator->in_object = store_values.get (iterator->in_object);
1371
1372 inferred_labels->MStore (GetNodeName(iterator), (*(_String*)id2name.GetItem(my_label)));
1373 iterator->in_object = my_index;
1374 node_scores->MStore (GetNodeName (iterator), new _Constant (best_score), false);
1375 }
1376
1377 if (label >= 0 && iterator->parent) {
1378 long parent_label = has_labels.get (iterator->parent->in_object);
1379 if (parent_label >= 0) {
1380 // not root, has labeled parent
1381 long my_label = has_labels.get (my_index);
1382 if (my_label != parent_label) {
1383 iterator->in_object = store_values.get (iterator->in_object);
1384 _AssociativeList * substitution_record = new _AssociativeList;
1385 *substitution_record < (_associative_list_key_value){"from", new _FString ((*(_String*)id2name.GetItem(parent_label)))}
1386 < (_associative_list_key_value){"to", new _FString ((*(_String*)id2name.GetItem(my_label)))};
1387 inferred_substitutions->MStore (GetNodeName(iterator), substitution_record, false);
1388 iterator->in_object = my_index;
1389 }
1390 }
1391 }
1392
1393
1394 //iterator->in_object = store_values.get (iterator->in_object);
1395 }
1396
1397 _AssociativeList * result = new _AssociativeList;
1398 result->MStore (kMPScore, new _Constant (total_score), false);
1399 result->MStore (kMPLabels, inferred_labels, false);
1400 result->MStore (kMPOutSubstitutions, inferred_substitutions, false);
1401 result->MStore (kMPOutNodeScore, node_scores, false);
1402
1403 //printf ("%s\n", _String ((_String*)conditional_scores.toStr(0)).get_str());
1404
1405 ni.Reset(theRoot);
1406
1407 while (node<long>* iterator = ni.Next()) {
1408 iterator->in_object = store_values.get (iterator->in_object);
1409 // restore node values
1410 }
1411
1412 return result;
1413
1414 } catch (const _String& err) {
1415 HandleApplicationError(err);
1416 }
1417 return new _MathObject;
1418
1419
1420 }
1421 //__________________________________________________________________________________
1422
FindCOT(HBLObjectRef p,HBLObjectRef cache)1423 _AssociativeList* _TreeTopology::FindCOT (HBLObjectRef p, HBLObjectRef cache) {
1424 // Find the Center of the Tree (COT) location
1425 // using an L_p metric (L_2 works well)
1426
1427 /** TODO: SLKP 20171218, the whole FindCOT stack needs review **/
1428
1429 static const _String
1430 kCOTNode ( "COT_NODE"),
1431 kCOTSplit ("COT_SPLIT"),
1432 kCOTBranchLength ( "COT_BRANCH_LENGTH"),
1433 kCOTDistance ("COT_DISTANCE"),
1434 kCOTCDF ("COT_CDF"),
1435 kCOTSamples ("COT_SAMPLES"),
1436 kCOTSampler ("COT_SAMPLER"),
1437 kCOTToNode ("COT_TO_NODE");
1438
1439
1440 hyFloat power = p->Compute()->Value(),
1441 totalTreeLength = 0.0;
1442
1443 _AssociativeList * resList = new _AssociativeList;
1444
1445 if (power<=0.) {
1446 HandleApplicationError (_String("Invalid power argument in call to COT finder (Min on trees). Must be positive, had :") & power);
1447 return resList;
1448 }
1449
1450 _SimpleList avlSL,
1451 avlSL2,
1452 listOfNodes;
1453
1454 _List avlSL3,
1455 childLists;
1456
1457 _AVLListX addressToIndexMap (&avlSL), // leaves only
1458 addressToIndexMap2 (&avlSL2), // complete index
1459 lengthToIndexMap (&avlSL3);
1460
1461 long leafCount = 0,
1462 branchCount = 0,
1463 tIndex = 0;
1464
1465 node_iterator<long> ni (theRoot, _HY_TREE_TRAVERSAL_POSTORDER);
1466
1467 while (node<long>* iterator = ni.Next()) {
1468 if(iterator->is_root()) {
1469 break;
1470 } else if (iterator->is_leaf()) {
1471 addressToIndexMap.Insert ((BaseRef)iterator, leafCount++);
1472 } else {
1473 branchCount++;
1474 }
1475
1476 addressToIndexMap2.Insert ((BaseRef)iterator, tIndex++);
1477 }
1478
1479 // allocate the matrix of path lengths with hardwired (traversal order) indices
1480 // also allocate a list of sorted lists to store children nodes
1481 // and a map of (longed) node addresses to post order traversal indices
1482
1483 _Matrix distances (branchCount+leafCount, leafCount, false, true),
1484 rootDistances (1, leafCount, false, true),
1485 branchLengths (1, branchCount+leafCount, false, true),
1486 branchSpans (branchCount+leafCount+1,2,false,true);
1487
1488 // pass 1: fill up the nodes up to the root (i.e. below any internal node)
1489
1490 ni.Reset (theRoot);
1491 tIndex = 0;
1492
1493 while (node<long>* iterator = ni.Next()) {
1494 if (iterator->is_root()) {
1495 break;
1496 }
1497 hyFloat myLength = GetBranchLength (iterator);
1498 lengthToIndexMap.Insert (new _String(totalTreeLength), tIndex, false, true);
1499 totalTreeLength += myLength;
1500
1501 branchLengths.Store (0, tIndex++, myLength);
1502 listOfNodes << (long)iterator;
1503 _SimpleList *childIndices = new _SimpleList;
1504
1505 if (iterator->is_leaf()) {
1506 (*childIndices) << addressToIndexMap.GetXtra(addressToIndexMap.Find((BaseRef)iterator));
1507 } else {
1508 long myIndex = addressToIndexMap2.GetXtra(addressToIndexMap2.Find((BaseRef)iterator));
1509 _SimpleList mappedLeaves (leafCount,0,0);
1510
1511 for (long ci = iterator->get_num_nodes(); ci; ci--) {
1512 long childIndex = addressToIndexMap2.GetXtra(addressToIndexMap2.Find((BaseRef)iterator->go_down (ci)));
1513 _SimpleList * childLeaves = (_SimpleList*)childLists(childIndex);
1514
1515 myLength = branchLengths.theData[childIndex];
1516
1517 for (long ci2 = 0; ci2 < childLeaves->lLength; ci2++) {
1518 long ttIndex = childLeaves->list_data[ci2];
1519 mappedLeaves.list_data[ttIndex] = 1;
1520 distances.Store (myIndex, ttIndex, distances (childIndex, ttIndex) + myLength);
1521 }
1522 }
1523
1524 for (long ci2 = 0; ci2 < leafCount; ci2++) {
1525 if (mappedLeaves.list_data[ci2]) {
1526 (*childIndices) << ci2;
1527 }
1528 }
1529 }
1530
1531 childLists.AppendNewInstance(childIndices);
1532 }
1533
1534 // pass 2: fill the root vector
1535
1536 //nodeName = "COT_DM1";
1537 //setParameter (nodeName, &distances);
1538
1539 for (long ci = theRoot->get_num_nodes(); ci; ci--) {
1540 long childIndex = addressToIndexMap2.GetXtra(addressToIndexMap2.Find((BaseRef)theRoot->go_down (ci)));
1541 _SimpleList * childLeaves = (_SimpleList*)childLists(childIndex);
1542 hyFloat myLength = branchLengths.theData[childIndex];
1543 for (long ci2 = 0; ci2 < childLeaves->lLength; ci2++) {
1544 tIndex = childLeaves->list_data[ci2];
1545 rootDistances.Store (0, tIndex, distances (childIndex, tIndex) + myLength);
1546 //printf ("root->%s = %g\n", ((_String*)leafNames(tIndex))->sData, distances (childIndex, tIndex) + myLength);
1547 }
1548 }
1549
1550
1551 // pass 3: fill in the "other site" branch lengths
1552
1553 for (long ci3 = theRoot->get_num_nodes(); ci3; ci3--) {
1554 FindCOTHelper (theRoot->go_down (ci3), -1, distances, rootDistances, branchLengths, childLists, addressToIndexMap2, 0);
1555 }
1556
1557 //nodeName = "COT_DM2";
1558 //setParameter (nodeName, &distances);
1559
1560
1561
1562 // now traverse the tree and look for the maximum
1563
1564 tIndex = 0; // stores the index of current min
1565
1566 hyFloat currentMin = 1e100,
1567 currentBranchSplit = 0;
1568
1569
1570 for (long ci = distances.GetHDim()-1; ci>=0; ci--) {
1571 hyFloat T = branchLengths.theData[ci];
1572 _SimpleList * childLeaves = (_SimpleList*)childLists(ci);
1573 long ci2 = 0;
1574
1575 if (CheckEqual (power,2.0)) {
1576 hyFloat sumbT = 0.,
1577 sumbT2 = 0.,
1578 suma = 0.,
1579 suma2 = 0.;
1580
1581
1582
1583 for (long ci3 = 0; ci3 < leafCount; ci3++) {
1584 hyFloat tt = distances(ci,ci3);
1585
1586 /*
1587 printf ("%s->%s = %g\n", ttt2.sData, ((_String*)leafNames(ci3))->sData, tt);
1588 */
1589
1590 if (ci3 == childLeaves->list_data[ci2]) {
1591 if (ci2 < childLeaves->lLength-1) {
1592 ci2++;
1593 }
1594
1595 suma += tt;
1596 suma2 += tt*tt;
1597 } else {
1598 sumbT += tt;
1599 sumbT2 += tt*tt;
1600 }
1601 }
1602
1603
1604 hyFloat tt = (sumbT-suma)/leafCount;/*(sumbT-suma)/leafCount*/;
1605 if (tt < 0.0) {
1606 tt = 0.;
1607 } else if (tt > T) {
1608 tt = T;
1609 }
1610
1611 sumbT = tt*tt*leafCount + 2*tt*(suma-sumbT) + suma2 + sumbT2;
1612
1613 if (sumbT < currentMin) {
1614 tIndex = ci;
1615 currentBranchSplit = tt;
1616 currentMin = sumbT;
1617 }
1618 } else {
1619 hyFloat step = T>0.0?T*0.0001:0.1,
1620 currentT = 0.;
1621
1622 while (currentT<T) {
1623 hyFloat dTT = 0.0;
1624
1625 ci2 = 0;
1626
1627 for (long ci3 = 0; ci3 < leafCount; ci3++) {
1628 hyFloat tt = distances(ci,ci3);
1629 if (ci3 == childLeaves->get (ci2)) {
1630 if (ci2 < childLeaves->lLength-1) {
1631 ci2++;
1632 }
1633
1634 dTT += pow(tt+currentT,power);
1635 } else {
1636 dTT += pow(tt-currentT,power);
1637 }
1638 }
1639
1640 if (dTT < currentMin) {
1641 tIndex = ci;
1642 currentBranchSplit = currentT;
1643 currentMin = dTT;
1644 }
1645 currentT += step;
1646 }
1647 }
1648 }
1649
1650 node <long>* cotBranch = (node<long>*)listOfNodes.list_data[tIndex];
1651
1652
1653 resList->MStore (kCOTNode, new _FString( GetNodeName (cotBranch),false),false);
1654 resList->MStore (kCOTSplit, new _Constant (currentBranchSplit), false);
1655 resList->MStore (kCOTDistance, new _Constant (currentMin), false);
1656 resList->MStore (kCOTBranchLength, new _Constant (branchLengths.directIndex(tIndex)), false);
1657
1658 // compute the distribution of lengths away from COT
1659
1660 FindCOTHelper2 (cotBranch, branchSpans, branchLengths, addressToIndexMap2, nil, currentBranchSplit-branchLengths.theData [tIndex]);
1661 if (cotBranch->parent) {
1662 hyFloat adjuster = branchLengths.theData [tIndex]-currentBranchSplit;
1663 branchSpans.Store (branchCount+leafCount,1,adjuster);
1664 node <long>* cotParent = cotBranch->parent;
1665 if (cotParent->parent) {
1666 adjuster -= branchLengths.theData[addressToIndexMap2.GetXtra(addressToIndexMap2.Find((BaseRef)cotParent))];
1667 }
1668 FindCOTHelper2 (cotParent, branchSpans, branchLengths, addressToIndexMap2, cotBranch, adjuster);
1669 }
1670
1671 _List timeSplits;
1672 _AVLListX timeSplitsAVL (&timeSplits);
1673
1674 for (long sc=0; sc <= branchCount+leafCount; sc++) {
1675 char buffer[256];
1676 snprintf (buffer, 255, "%.15f", branchSpans(sc,0));
1677 _String nodeName = buffer;
1678 branchSpans.Store(sc,0,nodeName.to_float());
1679 timeSplitsAVL.Insert (nodeName.makeDynamic(),0,false,true);
1680 snprintf (buffer, 255, "%.15f", branchSpans(sc,1));
1681 nodeName = buffer;
1682 branchSpans.Store(sc,1,nodeName.to_float());
1683 timeSplitsAVL.Insert (nodeName.makeDynamic(),0,false,true);
1684 }
1685
1686 _Matrix cotCDFPoints (timeSplitsAVL.countitems(),3,false,true);
1687
1688 _AssociativeList * ctl = new _AssociativeList ();
1689
1690 _SimpleList tcache;
1691
1692 long iv,
1693 k = timeSplitsAVL.Traverser (tcache, iv, timeSplitsAVL.GetRoot());
1694
1695 for (long pc = 0; k>=0; k = timeSplitsAVL.Traverser (tcache, iv), pc++) {
1696 timeSplitsAVL.SetXtra (k, pc);
1697 cotCDFPoints.Store (pc,0,((_String*)(*((_List*)timeSplitsAVL.dataList))(k))->to_float());
1698 }
1699
1700
1701 for (long mxc=0; mxc <= branchCount+leafCount; mxc++) {
1702 hyFloat T0 = branchSpans(mxc,0),
1703 T1 = branchSpans(mxc,1);
1704
1705 if (mxc<branchCount+leafCount) {
1706 _String nodeName = GetNodeName ((node<long>*)listOfNodes(mxc));
1707 ctl->MStore (nodeName, new _Constant (T1), false);
1708 }
1709
1710 char buffer[256];
1711 snprintf (buffer, 256, "%.15f", T0);
1712 _String nn = buffer;
1713 tcache.Clear();
1714 long startingPos = timeSplitsAVL.Find (&nn,tcache),
1715 k = timeSplitsAVL.Next (startingPos,tcache);
1716
1717 for (long pc = timeSplitsAVL.GetXtra (k); k>=0; k = timeSplitsAVL.Next (k,tcache), pc++) {
1718 hyFloat ub = ((_String*)(*((_List*)timeSplitsAVL.dataList))(k))->to_float();
1719 if (ub < T1 || CheckEqual (T1, ub)) {
1720 cotCDFPoints.Store (pc,1,cotCDFPoints(pc,1)+1);
1721 } else {
1722 break;
1723 }
1724 }
1725 }
1726
1727 for (long mxc=1; mxc<cotCDFPoints.GetHDim() ; mxc++) {
1728 cotCDFPoints.Store (mxc,1,cotCDFPoints(mxc,1)*(cotCDFPoints(mxc,0)-cotCDFPoints(mxc-1,0))/totalTreeLength);
1729 cotCDFPoints.Store (mxc,2,cotCDFPoints(mxc,1)+cotCDFPoints(mxc-1,2));
1730 }
1731 timeSplitsAVL.Clear(true);
1732
1733
1734 resList->MStore (kCOTCDF, &cotCDFPoints, true);
1735 resList->MStore (kCOTToNode, ctl, false);
1736
1737 // sample random branch placement
1738 if (totalTreeLength > 0.0) {
1739 hyFloat sampler = EnvVariableGetNumber(kCOTSamples,0.0);
1740
1741 tIndex = sampler;
1742 if (tIndex >= 1) {
1743 _Matrix sampledDs (tIndex, 1, false, true);
1744
1745 for (long its = 0; its < tIndex; its ++) {
1746 hyFloat tSample = (genrand_real2 () * totalTreeLength);
1747 _String nn = tSample;
1748 long branchIndex = 0;
1749 if (lengthToIndexMap.FindBest (&nn,branchIndex)<=0) {
1750 branchIndex --;
1751 }
1752
1753 hyFloat T = branchLengths.theData[branchIndex];
1754 _SimpleList * childLeaves = (_SimpleList*)childLists(branchIndex);
1755
1756 hyFloat dTT = 0.0;
1757 tSample -= ((_String*)(*(_List*)lengthToIndexMap.dataList)(branchIndex))->to_float();
1758
1759 long ci2 = 0;
1760 for (long ci3 = 0; ci3 < leafCount; ci3++) {
1761 hyFloat tt = distances(branchIndex,ci3);
1762 if (ci3 == childLeaves->list_data[ci2]) {
1763 if (ci2 < childLeaves->lLength-1) {
1764 ci2++;
1765 }
1766
1767 dTT += pow(tt+tSample,power);
1768 } else {
1769 dTT += pow(tt+T-tSample,power);
1770 }
1771 }
1772
1773 sampledDs.Store (its,0,dTT);
1774 }
1775 EnvVariableSet(kCOTSampler, &sampledDs, true);
1776 }
1777
1778 }
1779 return resList;
1780 }
1781
1782 //__________________________________________________________________________________
1783
Compare(HBLObjectRefConst p,HBLObjectRef cache) const1784 _FString* _TreeTopology::Compare (HBLObjectRefConst p, HBLObjectRef cache) const {
1785 // compare tree topologies
1786 long objClass = p->ObjectClass();
1787
1788 if (objClass==TREE || objClass==TOPOLOGY) {
1789 _String cmp = CompareTrees ((_TreeTopology*)p);
1790 if (cmp.BeginsWith(kCompareEqualWithReroot)) {
1791 return (_FString*)_returnStringOrUseCache(cmp.Cut(kCompareEqualWithReroot.length() + ((_TreeTopology*)p)->GetName()->length() + 1, cmp.length()-2), cache);
1792 } else if (cmp == kCompareEqualWithoutReroot) {
1793 return (_FString*)_returnStringOrUseCache (_String (' '), cache);
1794 }
1795 }
1796 return (_FString*)_returnStringOrUseCache (kEmptyString, cache);
1797 }
1798
1799
1800 //__________________________________________________________________________________
1801
internalNodeCompare(node<long> * n1,node<long> * n2,_SimpleList & subTreeMap,_SimpleList * reindexer,bool cangoup,long totalSize,node<long> * n22,_TreeTopology const * tree2,bool isPattern) const1802 bool _TreeTopology::internalNodeCompare (node<long>* n1, node<long>* n2, _SimpleList& subTreeMap, _SimpleList* reindexer, bool cangoup, long totalSize, node<long>* n22, _TreeTopology const* tree2, bool isPattern) const {
1803 // compares whether the nodes create the same subpartition
1804
1805 // TODO SLKP 20171224: this needs review
1806
1807 // count the number of children
1808 long nc1 = n1->get_num_nodes(),
1809 nc2 = n2->get_num_nodes() + (cangoup&&n2->parent) - (n22&&(!n2->parent));
1810
1811
1812 //printf ("Comparing nodes %ld %ld\n", nc1, nc2);
1813
1814 if ( nc1 == nc2 || isPattern && nc2<nc1 ) {
1815 // now see if the descendants in all directions can be matched
1816 // first prepare the list of subtrees in the 2nd tree
1817
1818 _List nodeMap;
1819
1820 _SimpleList* complement = nil;
1821 _SimpleList stSizes;
1822
1823 if ((cangoup||n22)&&n2->parent) {
1824 complement = new _SimpleList ((unsigned long)totalSize, 1L, 0L);
1825 }
1826
1827 nc2 = n2->get_num_nodes();
1828
1829 long skipped_child = -1,
1830 complementCorrection = 0;
1831
1832
1833 for (long k=1; k <= nc2; k++) {
1834 node<long>* kth_child = n2->go_down(k);
1835 _SimpleList * childLeaves = (_SimpleList*)kth_child->in_object;
1836 if (kth_child == n22) {
1837 if (complement) {
1838 if (reindexer) {
1839 childLeaves->Each ([&] (long value, unsigned long index) -> void {
1840 long lookup = reindexer->get (value);
1841 if (lookup >= 0) {
1842 (*complement) [lookup] = 0L;
1843 } else {
1844 complementCorrection++;
1845 }
1846 });
1847 }
1848 else {
1849 childLeaves->Each ([&] (long value, unsigned long index) -> void {
1850 (*complement) [value] = 0L;
1851 });
1852 }
1853 }
1854
1855 skipped_child = k-1;
1856 } else {
1857 _SimpleList * all_descendants = new _SimpleList ((unsigned long)totalSize, 0L, 0L);
1858
1859 try {
1860 childLeaves->Each ([&] (long value, unsigned long) -> void {
1861 long lookup = reindexer ? reindexer->get (value) : value;
1862 if (lookup >= 0L) {
1863 (*all_descendants) [lookup] = 1L;
1864 if (complement) {
1865 (*complement) [lookup] = 0L;
1866 }
1867 } else {
1868 throw 0;
1869 }
1870 });
1871 } catch (int e) {
1872 if (complement) delete complement;
1873 delete all_descendants;
1874 return 0;
1875 }
1876
1877
1878 stSizes << childLeaves->countitems();
1879 nodeMap < all_descendants;
1880 }
1881 }
1882
1883 if (complement) {
1884 nodeMap < complement;
1885 stSizes << totalSize;
1886
1887 for (long k4 = 0; k4 < stSizes.lLength-1; k4++) {
1888 stSizes.list_data[stSizes.lLength-1] -= stSizes.list_data[k4];
1889 }
1890
1891 if (n22) {
1892 stSizes.list_data[stSizes.lLength-1] -= ((_SimpleList*)n22->in_object)->lLength-complementCorrection;
1893 }
1894 }
1895
1896 // now go through the subtrees of the first tree and match up (if we can)
1897 // with the 2nd 1
1898
1899 _SimpleList unmatchedPatterns;
1900
1901 for (long k5=1; k5 <= nc1; k5++) {
1902 _SimpleList * childNodes = (_SimpleList*) n1->go_down(k5)->in_object;
1903 long k6;
1904 for (k6=0; k6 < stSizes.lLength; k6++)
1905 if (stSizes.list_data[k6] == childNodes->lLength)
1906 // potential subtree match
1907 {
1908 _SimpleList* potMap = (_SimpleList*)nodeMap(k6);
1909 long k7;
1910 for (k7=0; k7<childNodes->lLength; k7++)
1911 if (potMap->list_data[childNodes->list_data[k7]] == 0) {
1912 break;
1913 }
1914
1915 if (k7 == childNodes->lLength) {
1916 stSizes.list_data[k6] = -1;
1917
1918 if (complement && (k6 == stSizes.lLength-1)) {
1919 subTreeMap << -1;
1920 } else {
1921 if (n22&&(k6>=skipped_child)) {
1922 subTreeMap << k6+1;
1923 } else {
1924 subTreeMap << k6;
1925 }
1926 }
1927 break;
1928 }
1929 }
1930
1931 if (k6 == stSizes.lLength) {
1932 if (isPattern) {
1933 unmatchedPatterns << k5;
1934 subTreeMap << -2;
1935 } else {
1936 return 0;
1937 }
1938 }
1939 }
1940
1941 if (isPattern&&unmatchedPatterns.lLength) {
1942 _SimpleList rematchedPatterns;
1943 for (long k7 = 0; k7 < unmatchedPatterns.lLength; k7++) {
1944 rematchedPatterns << -1;
1945 }
1946
1947 for (long k8 = 0; k8 < stSizes.lLength; k8++) {
1948 if (stSizes.list_data[k8]>0) {
1949 _SimpleList* potMap = (_SimpleList*)nodeMap(k8);
1950 for (long k9 = 0; k9 < unmatchedPatterns.lLength; k9++) {
1951 if (rematchedPatterns.list_data[k9] < 0) {
1952 _SimpleList * childNodes = (_SimpleList*) n1->go_down(unmatchedPatterns.list_data[k9])->in_object;
1953 long k10 = 0;
1954 for (; k10 < childNodes->lLength; k10++)
1955 if (potMap->list_data[childNodes->list_data[k10]] == 0) {
1956 break;
1957 }
1958 if (k10 == childNodes->lLength) {
1959 rematchedPatterns.list_data[k9] = k8;
1960 if (complement && (k8 == stSizes.lLength-1)) {
1961 subTreeMap.list_data [unmatchedPatterns.list_data[k9]-1] = -(0xfffffff);
1962 } else {
1963 if (n22&&(k8>=skipped_child)) {
1964 subTreeMap.list_data [unmatchedPatterns.list_data[k9]-1] = -k8-3;
1965 } else {
1966 subTreeMap.list_data [unmatchedPatterns.list_data[k9]-1] = -k8-2;
1967 }
1968 }
1969 }
1970 }
1971 }
1972 } else {
1973 stSizes.list_data[k8] = 0;
1974 }
1975 }
1976
1977 if (rematchedPatterns.Find(-1)>=0) {
1978 return 0;
1979 }
1980
1981 for (long k11 = 0; k11 < rematchedPatterns.lLength; k11++) {
1982 stSizes.list_data[rematchedPatterns.list_data[k11]] -= ((_SimpleList*) n1->go_down(unmatchedPatterns.list_data[k11])->in_object)->lLength;
1983 }
1984
1985 for (long k12 = 0; k12 < stSizes.lLength; k12++)
1986 if (stSizes.list_data[k12]) {
1987 return 0;
1988 }
1989 }
1990 return 1;
1991 }
1992
1993 return 0;
1994 }
1995
1996
1997 //long itcCount = 0;
1998 //__________________________________________________________________________________
1999
internalTreeCompare(node<long> * n1,node<long> * n2,_SimpleList * reindexer,char compMode,long totalSize,node<long> * n22,_TreeTopology const * tree2,bool isPattern) const2000 char _TreeTopology::internalTreeCompare (node<long>* n1, node<long>* n2, _SimpleList* reindexer, char compMode, long totalSize, node<long>* n22, _TreeTopology const* tree2, bool isPattern) const
2001 // compare tree topologies
2002 // return values mean
2003 {
2004 if (n1->get_num_nodes() == 0) {
2005 return 1;
2006 } else {
2007 _SimpleList mapper;
2008 if (internalNodeCompare (n1,n2, mapper, reindexer, compMode, totalSize, n22, tree2, isPattern)) {
2009 long nc1 = n1->get_num_nodes();
2010
2011 _SimpleList furtherMatchedPatterns;
2012 _List patternList;
2013
2014 for (long k1=0; k1 < nc1; k1++) {
2015 if (mapper.list_data[k1]>=0) {
2016 if (internalTreeCompare (n1->go_down(k1+1),n2->go_down(mapper.list_data[k1]+1), reindexer, 0, totalSize, nil, tree2, isPattern)<1) {
2017 return -1;
2018 }
2019 } else if (mapper.list_data[k1] == -1) {
2020 if (internalTreeCompare (n1->go_down(k1+1),n2->parent, reindexer, 0, totalSize, n2, tree2, isPattern)<1) {
2021 return -1;
2022 }
2023 } else {
2024 long idx = -mapper.list_data[k1]-2,
2025 k;
2026
2027 _SimpleList * patched;
2028 if ((k=furtherMatchedPatterns.Find(idx))<0) {
2029 k = furtherMatchedPatterns.lLength;
2030 furtherMatchedPatterns << idx;
2031 patternList < new _SimpleList;
2032 }
2033
2034 patched = (_SimpleList*)patternList(k);
2035 (*patched) << k1;
2036 }
2037 }
2038 for (long k2=0; k2 < furtherMatchedPatterns.lLength; k2++) {
2039 node <long>* dummy = new node<long>;
2040 dummy->parent = n1->parent;
2041 _SimpleList * children = (_SimpleList*)patternList (k2),
2042 * newLeaves = new _SimpleList;
2043
2044 dummy->in_object = (long)newLeaves;
2045
2046
2047 for (long k3 = 0; k3 < children->lLength; k3++) {
2048 node<long>* aChild = n1->go_down(children->list_data[k3]+1);
2049 dummy->add_node(*aChild);
2050 _SimpleList t;
2051 t.Union (*newLeaves, *(_SimpleList*)aChild->in_object);
2052 newLeaves->Clear();
2053 newLeaves->Duplicate(&t);
2054 }
2055
2056
2057 long k4 = furtherMatchedPatterns.list_data[k2];
2058 char res = 1;
2059
2060 if (newLeaves->lLength > 1) {
2061 if (k4<n2->get_num_nodes()) {
2062 res = internalTreeCompare(dummy, n2->go_down(k4+1), reindexer, 0, totalSize, nil, tree2, isPattern);
2063 } else {
2064 res = internalTreeCompare (dummy,n2->parent, reindexer, 0, totalSize, n2, tree2, isPattern);
2065 }
2066 }
2067
2068 DeleteObject (newLeaves);
2069 delete (dummy);
2070
2071 if (res<1) {
2072 return -1;
2073 }
2074
2075 }
2076 } else {
2077 return 0;
2078 }
2079 }
2080 return 1;
2081 }
2082
2083
2084 //__________________________________________________________________________________
EdgeCount(long & leaves,long & internals) const2085 void _TreeTopology::EdgeCount (long& leaves, long& internals) const {
2086 leaves = 0L;
2087 internals = 0L;
2088
2089 node_iterator<long> ni (theRoot, _HY_TREE_TRAVERSAL_POSTORDER);
2090 while (node<long>* iterator = ni.Next()) {
2091 if (iterator->is_leaf()) {
2092 leaves ++;
2093 } else {
2094 internals ++;
2095 }
2096 }
2097
2098 }
2099
2100
2101 //__________________________________________________________________________________
TipCount(HBLObjectRef cache)2102 HBLObjectRef _TreeTopology::TipCount (HBLObjectRef cache) {
2103 long leaves, ints;
2104 EdgeCount (leaves, ints);
2105 return _returnConstantOrUseCache(leaves, cache);
2106 }
2107
2108 //__________________________________________________________________________________
BranchCount(HBLObjectRef cache)2109 HBLObjectRef _TreeTopology::BranchCount (HBLObjectRef cache) {
2110 long leaves, ints;
2111 EdgeCount (leaves, ints);
2112 return _returnConstantOrUseCache(ints-1., cache);
2113 }
2114
2115 //__________________________________________________________________________________
FlatRepresentation(HBLObjectRef cache)2116 HBLObjectRef _TreeTopology::FlatRepresentation (HBLObjectRef cache) {
2117 _SimpleList flatTree;
2118
2119 unsigned long count = 0UL;
2120 node_iterator<long> ni (theRoot, _HY_TREE_TRAVERSAL_POSTORDER);
2121
2122 while (node<long>* iterator = ni.Next()) {
2123 flatTree << iterator->in_object;
2124 iterator->in_object = count++;
2125 }
2126
2127
2128 _Matrix * res = (_Matrix*)_returnMatrixOrUseCache(1,count, _NUMERICAL_TYPE, false, cache);
2129
2130 ni.Reset (theRoot);
2131 count = 0UL;
2132
2133 while (node<long>* iterator = ni.Next()) {
2134 if (iterator->is_root()) {
2135 res->theData[count] = -1;
2136 } else {
2137 res->theData[count] = iterator->parent->in_object;
2138 }
2139
2140 iterator->in_object = flatTree.list_data[count++];
2141 }
2142 return res;
2143 }
2144
2145 //__________________________________________________________________________________
AVLRepresentation(HBLObjectRef layoutOption,HBLObjectRef cache)2146 HBLObjectRef _TreeTopology::AVLRepresentation (HBLObjectRef layoutOption, HBLObjectRef cache) {
2147
2148 if (layoutOption->ObjectClass () == NUMBER) {
2149 bool preOrder = layoutOption->Compute()->Value()>0.5;
2150
2151 _AssociativeList * masterList = new _AssociativeList ();
2152 // arrayKey;
2153
2154
2155 long rootIndex = 0;
2156
2157 _SimpleList nodeList;
2158 _AVLListX nodeIndexList (&nodeList);
2159
2160 node_iterator<long> ni (theRoot, preOrder ? _HY_TREE_TRAVERSAL_PREORDER : _HY_TREE_TRAVERSAL_POSTORDER);
2161
2162 while (node<long>* iterator = ni.Next()) {
2163 nodeIndexList.Insert ((BaseObj*)iterator, nodeIndexList.countitems()+1L);
2164
2165 if (iterator->is_root()) {
2166 rootIndex = nodeIndexList.countitems();
2167 }
2168 }
2169
2170 ni.Reset (theRoot);
2171
2172 while (node<long>* iterator = ni.Next()) {
2173 _AssociativeList * nodeList = new _AssociativeList ();
2174 nodeList->MStore ("Name", new _FString (GetNodeName (iterator)), false);
2175 nodeList->MStore ("Length", new _Constant (GetBranchLength (iterator)), false);
2176 nodeList->MStore ("Depth", new _Constant (ni.Level()), false);
2177 if (! iterator->is_root()) {
2178 nodeList->MStore ("Parent", new _Constant(nodeIndexList.GetXtra (nodeIndexList.Find((BaseObj*)iterator->parent))), false);
2179 }
2180
2181 long nCount = iterator->get_num_nodes();
2182 if (nCount) {
2183 _AssociativeList * childList = new _AssociativeList ();
2184 for (long k = 1; k<=nCount; k++ ) {
2185 childList->MStore (_String((long)(k-1)),new _Constant(nodeIndexList.GetXtra (nodeIndexList.Find((BaseObj*)iterator->go_down(k)))) , false);
2186 }
2187 nodeList->MStore ("Children", childList, false);
2188 }
2189 masterList->MStore (_String((long)nodeIndexList.GetXtra (nodeIndexList.Find((BaseObj*)iterator))), nodeList, false);
2190 }
2191
2192 _AssociativeList * headerList = new _AssociativeList ();
2193
2194 headerList->MStore ("Name", new _FString (*GetName()), false);
2195 headerList->MStore ("Root", new _Constant(rootIndex), false);
2196 masterList->MStore ("0", headerList, false);
2197
2198 return masterList;
2199 }
2200 return new _MathObject;
2201 }
2202
2203 //__________________________________________________________________________________
TipName(HBLObjectRef p,HBLObjectRef cache)2204 HBLObjectRef _TreeTopology::TipName (HBLObjectRef p, HBLObjectRef cache) {
2205
2206 if (p&& p->ObjectClass()==NUMBER) {
2207 long tip_index = p->Value(),
2208 count = -1L;
2209
2210 if (tip_index < 0L) {
2211 return new _Matrix (RetrieveNodeNames(true, false, _HY_TREE_TRAVERSAL_POSTORDER));
2212 }
2213
2214 node_iterator<long> ni (theRoot, _HY_TREE_TRAVERSAL_POSTORDER);
2215 while (node<long>* iterator = ni.Next()) {
2216 if (iterator->is_leaf()) {
2217 count++;
2218 if (count == tip_index) {
2219 return _returnStringOrUseCache (GetNodeName (iterator), cache);
2220 }
2221 }
2222 }
2223 }
2224 return _returnStringOrUseCache(kEmptyString, cache);
2225 }
2226
2227 //__________________________________________________________________________________
2228
_recurse_and_reshuffle(node<long> * root,long & from,long & to,long & leaf_index,_SimpleList & leaf_order,_TreeTopology const & T,hyFloat def_rate,_AssociativeList * node_rates)2229 bool _recurse_and_reshuffle (node<long>* root, long& from, long &to, long &leaf_index, _SimpleList& leaf_order, _TreeTopology const& T, hyFloat def_rate, _AssociativeList* node_rates) {
2230 int children = root->get_num_nodes();
2231 if (children) {
2232
2233 hyFloat shuffle_rate = def_rate;
2234
2235 if (node_rates) {
2236 try {
2237 shuffle_rate = node_rates->GetNumberByKey(T.GetNodeName(root));
2238 } catch (const _String& e) {
2239 }
2240 }
2241
2242 long my_start = leaf_index;
2243 for (int k = 1; k <= root->get_num_nodes(); k ++) {
2244 long start, end;
2245 node<long>* kth_child = root->go_down (k);
2246 if (_recurse_and_reshuffle (kth_child, start, end, leaf_index, leaf_order,T, def_rate, node_rates)) {
2247 // reshuffled this child, lock the leaves
2248 for (long element = start; element <= end; element ++) {
2249 if (leaf_order.get (element) >= 0) {
2250 leaf_order[element] = -leaf_order.get (element) - 1;
2251 }
2252 }
2253 }
2254 }
2255 from = my_start;
2256 to = leaf_index-1;
2257
2258 if (to-from >= 2 && genrand_real1() <= shuffle_rate) { // not a cherry and selected
2259 //printf ("Shuffling at %s with rate %g\n", T.GetNodeName(root).get_str(), shuffle_rate);
2260 _SimpleList reshuffled_subset;
2261 for (long element = from; element <= to; element ++) {
2262 if (leaf_order.get (element) >= 0L) {
2263 reshuffled_subset << leaf_order.get (element);
2264 }
2265 }
2266 reshuffled_subset.Permute(1L);
2267 long hits = 0;
2268 for (long element = from; element <= to; element ++) {
2269 if (leaf_order.get (element) >= 0L) {
2270 leaf_order[element] = reshuffled_subset.get (hits++);
2271 }
2272 }
2273 return true;
2274 }
2275 return false;
2276 }
2277 leaf_index ++;
2278 return false;
2279 }
2280 //__________________________________________________________________________________
RandomizeTips(HBLObjectRef rate,HBLObjectRef cache)2281 HBLObjectRef _TreeTopology::RandomizeTips (HBLObjectRef rate, HBLObjectRef cache) {
2282 _TreeTopology * reshuffled = nil;
2283
2284 try {
2285 if (CheckArgumentType (rate, NUMBER | ASSOCIATIVE_LIST, true)) {
2286
2287 hyFloat default_shuffle_rate = 0.1;
2288 _AssociativeList* node_level_shuffle_rates = nil;
2289 if (rate->ObjectClass() == NUMBER) {
2290 default_shuffle_rate = rate->Compute()->Value();
2291 } else {
2292 node_level_shuffle_rates = (_AssociativeList*)rate;
2293 try {
2294 default_shuffle_rate = node_level_shuffle_rates->GetNumberByKey("default");
2295 } catch (const _String& err) { // no default shuffle rate
2296
2297 }
2298 }
2299
2300
2301
2302
2303 if (CheckRange (default_shuffle_rate, 0, 1, true)) {
2304 reshuffled = new _TreeTopology (*this);
2305
2306 long leaves, ints;
2307 EdgeCount (leaves, ints);
2308
2309 _SimpleList leaf_order;
2310
2311 /* this will store the reshuffled order of leaves
2312 leaf_order[i] = the index of the original leaf i (in post-order traversal) in the reshuffled tree
2313 a negative value indicates that the particular leaf has been permuted already, and is locked for subsequent permutations
2314 */
2315
2316 node_iterator<long> ni (reshuffled->theRoot, _HY_TREE_TRAVERSAL_POSTORDER);
2317 _Vector node_shuffle_rates;
2318
2319 while (node<long>* iterator = ni.Next()) {
2320 if (iterator->is_leaf()) {
2321 leaf_order << iterator->in_object;
2322 }
2323 }
2324
2325 //node_shuffle_rates.Trim();
2326 //printf ("%s\n", _String((_String*)node_shuffle_rates.toStr()).get_str());
2327
2328 long leaf_index = 0;
2329 long f,t;
2330 _recurse_and_reshuffle (&reshuffled->GetRoot(),f,t,leaf_index,leaf_order,*this,default_shuffle_rate,node_level_shuffle_rates);
2331 ni.Reset(reshuffled->theRoot);
2332 f = 0;
2333 //node_iterator<long> ni (theRoot, _HY_TREE_TRAVERSAL_POSTORDER);
2334 while (node<long>* iterator = ni.Next()) {
2335 if (iterator->is_leaf()){
2336 iterator->in_object = leaf_order.get (f++);
2337 if (iterator->in_object < 0) {
2338 iterator->in_object = -iterator->in_object - 1;
2339 }
2340 }
2341 }
2342
2343
2344
2345 return reshuffled;
2346 }
2347 }
2348
2349 } catch (const _String& e) {
2350 HandleApplicationError(e);
2351 }
2352 return new _MathObject;
2353 }
2354
2355
2356 //__________________________________________________________________________________
BranchLength(HBLObjectRef p,HBLObjectRef cache)2357 HBLObjectRef _TreeTopology::BranchLength (HBLObjectRef p, HBLObjectRef cache) {
2358 hyFloat branch_length = HY_INVALID_RETURN_VALUE;
2359
2360 if (p) {
2361 node_iterator<long> ni (theRoot, _HY_TREE_TRAVERSAL_POSTORDER);
2362 if (p->ObjectClass()==NUMBER) {
2363 long res = p->Value();
2364
2365 if (res < 0L) {
2366 // get ALL branch lengths
2367 _Vector * branch_lengths = new _Vector;
2368
2369 bool two = IsDegenerate();
2370
2371 while (node<long>* iterator = ni.Next()) {
2372 if (!iterator->is_root() || two){
2373 branch_lengths->Store(GetBranchLength (iterator));
2374 }
2375 }
2376 branch_lengths->Store (0.0); // backward compatibility with storing 0 at the root
2377 branch_lengths->Trim();
2378 branch_lengths->Transpose();
2379 return branch_lengths;
2380 } else {
2381 // get a specific branch length
2382 long count = -1L;
2383 while (node<long>* iterator = ni.Next()) {
2384 if (!iterator->is_root()) {
2385 if (++count == res) {
2386 branch_length = GetBranchLength(iterator);
2387 break;
2388 }
2389 }
2390 }
2391 }
2392 } else {
2393 if (p->ObjectClass()==STRING) {
2394 _List twoIDs = ((_FString*)p->Compute())->get_str().Tokenize(";");
2395
2396 if (twoIDs.countitems() == 2UL || twoIDs.countitems() == 1UL) {
2397
2398 _String * nodes[2] = {(_String*)twoIDs.GetItem (0L),
2399 twoIDs.countitems()>1UL?(_String*)twoIDs.GetItem (1L):nil};
2400
2401
2402 node<long>* node_objects[2] = {nil,nil};
2403 long levels[2] = {0L,0L};
2404
2405
2406 while (node<long>* iterator = ni.Next()) {
2407 _String nn = GetNodeName(iterator);
2408 for (unsigned long i = 0L; i < 2UL; i++) {
2409 if (nodes[i] && nn == *nodes[i]) {
2410 node_objects[i] = iterator;
2411 levels[i] = ni.Level();
2412 if (node_objects[1-i]) {
2413 break;
2414 }
2415 }
2416 }
2417 }
2418
2419 if (node_objects[0] && node_objects[1]) {
2420 branch_length = 0.;
2421
2422 for (long i = 0L; i < 2L; i++) { // walk up to the same depth
2423 while (levels[1-i] < levels[i]) {
2424 branch_length += GetBranchLength(node_objects[i]);
2425 node_objects[i] = node_objects[i]->get_parent();
2426 levels[i]--;
2427 }
2428 }
2429
2430 while (node_objects[0] != node_objects[1]) {
2431 branch_length += GetBranchLength (node_objects[0]) + GetBranchLength (node_objects[1]);
2432 node_objects[0] = node_objects[0]->parent;
2433 node_objects[1] = node_objects[1]->parent;
2434 }
2435 } else if (node_objects[0]) {
2436 if (nodes[1]) {
2437 if (*nodes[0] == *nodes[1]) {
2438 branch_length = 0.0;
2439 } else if (*nodes[1] == kExpectedNumberOfSubstitutions) {
2440 _String bl (GetBranchLengthString (node_objects[0], true));
2441 if (bl.nonempty()) {
2442 return new _FString (bl);
2443 }
2444 }
2445 } else {
2446 branch_length = GetBranchLength(node_objects[0]);
2447 }
2448 }
2449 }
2450 }
2451 }
2452 }
2453
2454 if (isnan (branch_length)) {
2455 return new _MathObject ();
2456 }
2457
2458 return _returnConstantOrUseCache(branch_length, cache);
2459 }
2460
2461 //__________________________________________________________________________________
2462
TreeBranchName(HBLObjectRef node_ref,bool get_subtree,HBLObjectRef mapping_mode,HBLObjectRef cache)2463 HBLObjectRef _TreeTopology::TreeBranchName (HBLObjectRef node_ref, bool get_subtree, HBLObjectRef mapping_mode, HBLObjectRef cache) {
2464 _StringBuffer branch_name;
2465
2466
2467 if (node_ref) {
2468 if (node_ref->ObjectClass()==NUMBER) { // get by index
2469
2470 long argument = node_ref->Value(),
2471 count = -1L;
2472
2473 if (argument>=0L) { // get a specific internal node name/subtree
2474
2475 node<long>* ith_internal_node = ConditionalTraverser (
2476 [&] (node <long>* iterator, node_iterator<long>& ni) -> bool {
2477 if (iterator->is_leaf () == false) {
2478 return (++count == argument);
2479 }
2480 return false;
2481 },
2482 false);
2483
2484 if (!ith_internal_node) {
2485 ith_internal_node = theRoot;
2486 }
2487
2488 if (ith_internal_node) {
2489 if (get_subtree) {
2490 hyTopologyBranchLengthMode mapMode = kTopologyBranchLengthNone;
2491 if (mapping_mode) {
2492 _String * t = (_String*)mapping_mode->Compute()->toStr();
2493 DetermineBranchLengthMappingMode (t,mapMode);
2494 DeleteObject (t);
2495 }
2496 SubTreeString (ith_internal_node, branch_name,CollectParseSettings(), true,mapMode);
2497 } else {
2498 branch_name = GetNodeName (ith_internal_node);
2499 }
2500 }
2501 } else {
2502 _List branch_lengths;
2503 node_iterator<long> ni (theRoot, _HY_TREE_TRAVERSAL_POSTORDER);
2504 while (node<long>* iterator = ni.Next()) {
2505 branch_lengths.AppendNewInstance(new _String (GetNodeName (iterator)));
2506 }
2507 return new _Matrix (branch_lengths);
2508 }
2509 } else {
2510 if (node_ref->ObjectClass()==STRING) {
2511 _List twoIDs = ((_FString*)node_ref->Compute())->get_str().Tokenize(";");
2512
2513 if (twoIDs.countitems() == 2UL || twoIDs.countitems() == 1UL) {
2514
2515 _String * nodes[2] = {(_String*) twoIDs.GetItem(0),
2516 (_String*)(twoIDs.countitems() > 1L?twoIDs.GetItem(1):nil)};
2517
2518
2519
2520 if (twoIDs.countitems() == 1UL) {
2521 _AssociativeList * resList = new _AssociativeList;
2522 long masterLevel = 0L;
2523
2524 node_iterator<long> ni (theRoot, _HY_TREE_TRAVERSAL_PREORDER);
2525 while (node<long>* iterator = ni.Next()) {
2526 _String node_name = GetNodeName (iterator);
2527 if (node_name == *nodes[0]) {
2528 masterLevel = ni.Level();
2529 //resList->MStore(node_name,new _Constant (iterator->get_num_nodes()));
2530 do {
2531 // 20151203: SLKP this will store the root name twice; commented the line above
2532 resList->MStore(GetNodeName (iterator),new _Constant (1L+(iterator->get_num_nodes()>0L)), false);
2533 iterator = ni.Next();
2534 } while (iterator && ni.Level() > masterLevel);
2535 break;
2536 }
2537 }
2538 if (resList->countitems() == 0L) {
2539 // did not find the target node
2540 DeleteObject (resList);
2541 return new _MathObject;
2542 }
2543 return resList;
2544 } else {
2545 // this returns the sequence of nodes between node 1 and node 2
2546 node<long>* node_objects[2]= {nil, nil};
2547 long levels[2] = {0L, 0L};
2548
2549 node_iterator<long> ni (theRoot, _HY_TREE_TRAVERSAL_POSTORDER);
2550
2551 while (node<long>* iterator = ni.Next()) {
2552 _String nn = GetNodeName(iterator);
2553 for (long i = 0L; i < 2L; i++) {
2554 if (nodes[i] && nn == *nodes[i]) {
2555 node_objects[i] = iterator;
2556 levels[i] = ni.Level();
2557 if (node_objects[1-i]) {
2558 break;
2559 }
2560 }
2561 }
2562 }
2563
2564 if (node_objects [0] && node_objects [1]) {
2565 _List partial_paths [2];
2566
2567 for (long i = 0L; i < 2L; i++) { // walk up to the same depth
2568 while (levels[1-i] < levels[i]) {
2569 partial_paths[i] < new _String (GetNodeName(node_objects[i]));
2570 node_objects[i] = node_objects[i]->get_parent();
2571 levels[i]--;
2572 }
2573 }
2574
2575 while (node_objects[0] != node_objects[1]) {
2576 for (long i = 0L; i < 2L; i++) {
2577 partial_paths[i] < new _String (GetNodeName(node_objects[i]));
2578 node_objects[i] = node_objects[i]->parent;
2579 }
2580 }
2581 partial_paths[1].Flip();
2582 partial_paths[0] << partial_paths[1];
2583 return new _Matrix(partial_paths[0]);
2584 } else if (node_objects[0]) {
2585 return new _Matrix();
2586 }
2587 return new _MathObject();
2588 }
2589 }
2590 }
2591 }
2592 }
2593 //return new _FString (branch_name, false);
2594 return _returnStringOrUseCache(branch_name, cache);
2595 }
2596
2597 //__________________________________________________________________________________
SubTreeString(node<long> * root,_StringBuffer & result,_TreeTopologyParseSettings const & settings,bool all_names,hyTopologyBranchLengthMode mode,long branch_length_variable,_AVLListXL * substitutions) const2598 void _TreeTopology::SubTreeString (node<long>* root, _StringBuffer &result, _TreeTopologyParseSettings const& settings, bool all_names, hyTopologyBranchLengthMode mode, long branch_length_variable, _AVLListXL* substitutions) const {
2599
2600
2601 auto handle_node = [&] (node<long>* iterator, bool is_degenerate) -> void {
2602 _String node_name = GetNodeName (iterator);
2603 if (substitutions) {
2604 if (BaseRefConst replacement = substitutions->GetDataByKey(&node_name)) {
2605 node_name = *(_String*)replacement;
2606 }
2607 }
2608
2609 if (is_degenerate || !iterator->is_root()) {
2610 if (all_names || !node_name.BeginsWith(settings.inode_prefix)) {
2611 result << node_name;
2612 }
2613 PasteBranchLength (iterator,result,mode,branch_length_variable);
2614 }
2615 };
2616
2617 if (root->parent == NULL && IsDegenerate()) {
2618 node_iterator<long> ni (root, _HY_TREE_TRAVERSAL_POSTORDER);
2619 result << '(';
2620
2621 while (node<long>* iterator = ni.Next()) {
2622 handle_node (iterator, true);
2623 result << (iterator->is_root() ? ')' : ',');
2624 }
2625 } else {
2626 long last_level = 0L;
2627
2628 node_iterator<long> ni (root, _HY_TREE_TRAVERSAL_POSTORDER);
2629
2630 while (node <long>* iterator = ni.Next()) {
2631
2632 if (ni.Level() > last_level) {
2633 if (last_level) {
2634 result<<',';
2635 }
2636 result.AppendNCopies('(', ni.Level()-last_level);
2637 } else if (ni.Level() < last_level) {
2638 result.AppendNCopies(')', last_level-ni.Level());
2639 } else if (last_level) {
2640 result<<',';
2641 }
2642
2643 last_level = ni.Level();
2644 handle_node (iterator, false);
2645 }
2646 }
2647 }
2648
2649
2650 //__________________________________________________________________________________
RerootTreeInternalTraverser(node<long> * iterator,long originator,bool passedRoot,_StringBuffer & res,_TreeTopologyParseSettings const & settings,hyTopologyBranchLengthMode branch_length_mode,long variable_ref,bool first_time) const2651 void _TreeTopology::RerootTreeInternalTraverser (node<long>* iterator, long originator, bool passedRoot, _StringBuffer &res, _TreeTopologyParseSettings const& settings, hyTopologyBranchLengthMode branch_length_mode, long variable_ref, bool first_time) const {
2652
2653 //printf ("[RerootTreeInternalTraverser]%s %ld %ld\n", GetNodeName(iterator).sData, originator, passedRoot);
2654
2655 if (passedRoot) {
2656 SubTreeString (iterator, res, settings, false, branch_length_mode, variable_ref);
2657 } else {
2658 // move to parent now
2659 node<long>* iterator_parent = iterator->get_parent();
2660
2661 /*
2662 StringToConsole(GetNodeName(iterator)); NLToConsole();
2663 if (iterator_parent) {
2664 StringToConsole(GetNodeName(iterator_parent)); NLToConsole();
2665 }
2666 */
2667
2668 if (iterator_parent) { // not root yet
2669 bool is_root_next = iterator_parent->get_parent() == NULL;
2670 if (!is_root_next) {
2671 res<<'(';
2672 }
2673 long the_index_of_this_child = iterator->get_child_num();
2674 RerootTreeInternalTraverser (iterator_parent, the_index_of_this_child ,false,res,settings,branch_length_mode,variable_ref,first_time);
2675
2676 if (iterator_parent->get_parent()) {
2677
2678 for (long i = 1L; i<=iterator_parent->get_num_nodes(); i++) {
2679 if (i!=the_index_of_this_child) {
2680 res<<',';
2681 SubTreeString (iterator_parent->go_down(i),res, settings, false, branch_length_mode,variable_ref);
2682 }
2683 }
2684 }
2685
2686 if (!is_root_next) {
2687 res<<')';
2688
2689 if (!first_time) {
2690 _String node_name = GetNodeName (iterator);
2691 if (!node_name.BeginsWith(settings.inode_prefix)) {
2692 res<<node_name;
2693 }
2694 }
2695 PasteBranchLength (iterator,res,branch_length_mode, variable_ref);
2696 }
2697 } else {
2698 /* passing old root
2699 create a new root with >=2 children nodes - this node,
2700 and one more containing all other children (>=2 of them)
2701 */
2702
2703 long count = 0L,
2704 root_children_count = theRoot->get_num_nodes();
2705
2706 if (root_children_count > 2) {
2707 res << '(';
2708 }
2709
2710 node<long>* stash_originator = nil;
2711
2712 for (long k = 1; k<=theRoot->get_num_nodes(); k++) {
2713 if (k==originator) {
2714 stash_originator = theRoot->go_down(k);
2715 continue;
2716 }
2717 if (count) {
2718 res<<',';
2719 }
2720 count++;
2721 SubTreeString (theRoot->go_down(k), res,settings, false, branch_length_mode,variable_ref);
2722 }
2723
2724 if (!stash_originator) {
2725 HandleApplicationError ("Internal error in RerootTreeInternalTraverser");
2726 return;
2727 }
2728
2729 if (root_children_count > 2) {
2730 res<<')';
2731 }
2732
2733 PasteBranchLength (stash_originator,res, branch_length_mode, variable_ref);
2734 }
2735 }
2736 }
2737
2738
2739 //__________________________________________________________________________________
PasteBranchLength(node<long> * node,_StringBuffer & result,hyTopologyBranchLengthMode const mode,long variable_reference,hyFloat factor) const2740 void _TreeTopology::PasteBranchLength (node<long>* node, _StringBuffer& result,
2741 hyTopologyBranchLengthMode const mode, long variable_reference , hyFloat factor) const {
2742
2743 auto decorate_string = [&] (_String const& value) -> void {
2744 if (value.nonempty()) {
2745 result << ':' << _String (value.to_float()*factor);
2746 }
2747 };
2748
2749 //printf ("_TreeTopology::PasteBranchLength %d %g %s\n", mode, GetNodeName(node).get_str(), GetBranchLength(node));
2750
2751 switch (mode) {
2752 case kTopologyBranchLengthExpectedSubs: {
2753 result << ':' << _String(GetBranchLength (node)*factor);
2754 break;
2755 }
2756 case kTopologyBranchLengthUserLengths: {
2757 decorate_string (GetBranchValue (node));
2758 break;
2759 }
2760 case kTopologyBranchLengthLocalParameter: {
2761 decorate_string (GetBranchVarValue (node, variable_reference));
2762 }
2763 }
2764 }
2765
2766 //__________________________________________________________________________________
GetBranchLengthString(node<long> * n,bool get_expression) const2767 const _String _TreeTopology::GetBranchLengthString (node<long> * n, bool get_expression) const {
2768 if (get_expression) {
2769 return kEmptyString;
2770 } else {
2771 return compExp->theData[n->in_object];
2772 }
2773 }
2774
2775 //__________________________________________________________________________________
GetBranchLength(node<long> * n) const2776 hyFloat _TreeTopology::GetBranchLength (node<long> * n) const {
2777 return compExp->theData[n->in_object];
2778 }
2779
2780
2781 //__________________________________________________________________________________
GetBranchValue(node<long> *) const2782 const _String _TreeTopology::GetBranchValue (node<long> *) const{
2783 return kEmptyString;
2784 }
2785 //__________________________________________________________________________________
GetBranchVarValue(node<long> *,long) const2786 const _String _TreeTopology::GetBranchVarValue (node<long> *, long) const {
2787 return kEmptyString;
2788 }
2789
2790
2791 //__________________________________________________________________________________
RerootTree(HBLObjectRef new_root,HBLObjectRef cache)2792 HBLObjectRef _TreeTopology::RerootTree (HBLObjectRef new_root, HBLObjectRef cache) {
2793 _StringBuffer * res = new _StringBuffer (256UL);
2794
2795 _TreeTopologyParseSettings settings = CollectParseSettings();
2796
2797 if (new_root && new_root->ObjectClass()==STRING) {
2798 if (rooted == UNROOTED) {
2799 ReportWarning ("Reroot was called with an unrooted tree. Rerooting was still performed.");
2800 }
2801
2802 _String * new_root_name = (_String*)new_root->toStr();
2803 node<long>* reroot_at = FindNodeByName (new_root_name);
2804 DeleteObject (new_root_name);
2805
2806 if (reroot_at) { // good node name, can reroot
2807 if (reroot_at->is_root()) {
2808 SubTreeString (theRoot, *res,settings,kTopologyBranchLengthUserLengths);
2809 } else {
2810 (*res)<<'('; // opening tree (
2811 RerootTreeInternalTraverser (reroot_at, reroot_at->get_child_num(),false,*res,settings,kTopologyBranchLengthUserLengths,-1,true);
2812 (*res)<<',';
2813 SubTreeString (reroot_at, *res,settings,false, kTopologyBranchLengthUserLengths);
2814 (*res)<<')';
2815 }
2816 }
2817 } else {
2818 HandleApplicationError ("Reroot Tree was passed an invalid branch argument.");
2819 }
2820
2821 res->TrimSpace();
2822 return _returnStringOrUseCache(res, cache);
2823 }
2824
2825
2826 //__________________________________________________________________________________
2827
DetermineBranchLengthMappingMode(_String const * param,hyTopologyBranchLengthMode & map_mode) const2828 _String const _TreeTopology::DetermineBranchLengthMappingMode (_String const * param, hyTopologyBranchLengthMode& map_mode) const {
2829 map_mode = kTopologyBranchLengthNone;
2830 if (param) {
2831 if (*param == hy_env::kExpectedNumberOfSubstitutions) {
2832 map_mode = kTopologyBranchLengthExpectedSubs;
2833 } else if (*param == hy_env::kStringSuppliedLengths) {
2834 map_mode = kTopologyBranchLengthUserLengths;
2835 } else {
2836 map_mode = kTopologyBranchLengthLocalParameter;
2837 return _String('.') & *param;
2838 }
2839 }
2840 return kEmptyString;
2841 }
2842
2843 //_______________________________________________________________________________________________
2844
prepTree4Comparison(_List & leafNames,_SimpleList & mapping,node<long> * topNode) const2845 node<long>* _TreeTopology::prepTree4Comparison (_List& leafNames, _SimpleList& mapping, node<long>* topNode) const {
2846
2847 /**
2848 Creates a tree structure which facilitates comparison to other trees
2849
2850 The in_object of each node is a numeric list of indices of all the leaves that are at or below this node
2851 The indices are in post-order traversal order
2852
2853 `leafNames` stores lexicographically sorted leaf names
2854 `mapping` stores the mapping between the original post-order index of a leaf and its location in `leafNames`
2855
2856 */
2857
2858 node<long>* res = topNode?topNode->duplicate_tree():theRoot->duplicate_tree();
2859
2860 node_iterator<long> ni (res, _HY_TREE_TRAVERSAL_POSTORDER);
2861
2862 _SimpleList indexer;
2863
2864 while (node<long> * iterator = ni.Next()) {
2865
2866 _SimpleList * descendants = new _SimpleList;
2867
2868 if (!iterator->is_leaf()) {
2869 for (unsigned long k = 1UL; k <= iterator->get_num_nodes(); k++) {
2870 (*descendants) << *(_SimpleList*)iterator->go_down(k)->in_object;
2871 }
2872 } else {
2873 (*descendants) << leafNames.countitems();
2874 indexer << leafNames.countitems();
2875 leafNames < new _String (GetNodeName (iterator));
2876 }
2877
2878 iterator->in_object = (long)descendants;
2879 }
2880
2881 // now sort leaf names and build the indexer
2882
2883 mapping.Clear();
2884 mapping.Duplicate (&indexer);
2885
2886 SortLists (&leafNames, &indexer);
2887 SortLists (&indexer, &mapping);
2888
2889 return res;
2890 }
2891
2892 //_______________________________________________________________________________________________
2893
destroyCompTree(node<long> * compRoot) const2894 void _TreeTopology::destroyCompTree (node<long>* compRoot) const {
2895 for (unsigned long i=1UL; i<=compRoot->get_num_nodes(); i++) {
2896 destroyCompTree (compRoot->go_down(i));
2897 }
2898 DeleteObject ((BaseRef)compRoot->in_object);
2899 delete (compRoot);
2900 }
2901
2902 //_______________________________________________________________________________________________
2903
CompareTrees(_TreeTopology * compareTo) const2904 _String _TreeTopology::CompareTrees (_TreeTopology* compareTo) const {
2905
2906 _List myLeaves,
2907 otherLeaves;
2908
2909 _SimpleList indexer,
2910 otherIndexer;
2911
2912 node<long> *myCT,
2913 *otherCT;
2914
2915 _String rerootAt;
2916
2917 myCT = prepTree4Comparison(myLeaves, indexer);
2918 otherCT = compareTo->prepTree4Comparison(otherLeaves, otherIndexer);
2919
2920 // first compare the set of leaf labels
2921
2922 if (myLeaves.Equal (otherLeaves)) {
2923 _SimpleList * reindexer = nil;
2924
2925 if (indexer.Equal (otherIndexer)) {
2926 otherIndexer.Populate (_SimpleList ((unsigned long)myLeaves.countitems()).Populate (indexer, _SimpleList::action_invert),
2927 _SimpleList::action_compose);
2928 reindexer = &otherIndexer;
2929 }
2930
2931 char compRes;
2932
2933 if (internalTreeCompare (myCT, otherCT, reindexer, 1, myLeaves.lLength, nil, compareTo)>0) {
2934 rerootAt = kCompareEqualWithoutReroot;
2935 } else {
2936 long tCount = 0;
2937
2938 node_iterator<long> ni (otherCT, _HY_TREE_TRAVERSAL_POSTORDER);
2939 node<long> * meNode;
2940
2941 while (meNode = ni.Next()) {
2942 if (meNode == otherCT) {
2943 break;
2944 }
2945 if (meNode->get_num_nodes()) {
2946 compRes = internalTreeCompare (myCT, meNode, reindexer, 1, myLeaves.lLength, nil, compareTo);
2947 if (compRes>0) {
2948 break;
2949 } else if (compRes) {
2950 meNode = otherCT;
2951 break;
2952 }
2953 }
2954
2955 tCount ++;
2956 }
2957
2958 if (meNode!=otherCT) {
2959 node_iterator<long> ni (compareTo->theRoot, _HY_TREE_TRAVERSAL_POSTORDER);
2960 while (node<long> * meNode = ni.Next()) {
2961 if (meNode == theRoot) {
2962 break;
2963 }
2964 if (tCount==1) {
2965 rerootAt = kCompareEqualWithReroot & *LocateVar (meNode->in_object)->GetName() & '.';
2966 break;
2967 } else {
2968 tCount --;
2969 }
2970 }
2971 }
2972 }
2973 if (rerootAt.empty()) {
2974 rerootAt = kCompareUnequalToplogies;
2975 }
2976 } else {
2977 rerootAt = kCompareUnequalLabelSets;
2978 }
2979
2980 destroyCompTree (myCT);
2981 destroyCompTree (otherCT);
2982
2983 return rerootAt;
2984 }
2985
2986
2987
2988
2989 //_______________________________________________________________________________________________
2990
SplitTreeIntoClustersInt(node<long> *,_List *,_AVLListX &,long,long) const2991 _List* _TreeTopology::SplitTreeIntoClustersInt (node<long>* , _List* , _AVLListX& , long , long ) const
2992 // TODO SLKP 20180202: clearly this needs work (or needs to be deprecated)
2993 // returns the list of nodes included in the splits BELOW trav1
2994 {
2995 /*_List * dependancies = nil;
2996
2997 for (long k = trav1->get_num_nodes(); k; k--)
2998 {
2999 _List * me = SplitTreeIntoClustersInt(trav1->go_down(k), res, counts, size, tol);
3000 if (me)
3001 {
3002 if (!dependancies)
3003 checkPointer (dependancies = new _List);
3004
3005 (*dependancies) << (*me);
3006 DeleteObject (me);
3007 }
3008 }
3009
3010 long distinctElements = counts.GetXtra (counts.Find((BaseRef)trav1->in_object)),
3011 dLength = dependancies?dependancies->lLength:0;
3012
3013 if (((trav1->parent==nil)||(distinctElements+dLength>=size-tol))&&(distinctElements+dLength<=size+tol))
3014 {
3015 _List entry;
3016 _String nName;
3017 GetNodeName (trav1, nName);
3018 entry && & nName;
3019 entry && & _Constant (distinctElements);
3020 if (dependancies)
3021 entry << (*dependancies);
3022 (*res) && & entry;
3023
3024 trav1 = trav1->parent;
3025 while (trav1)
3026 {
3027 long aidx = counts.Find((BaseRef)trav1->in_object);
3028 counts.SetXtra(aidx,counts.GetXtra (aidx) - distinctElements);
3029 trav1 = trav1->parent;
3030 }
3031
3032 if (dependancies)
3033 dependancies->Clear();
3034 else
3035 checkPointer (dependancies = new _List);
3036
3037 (*dependancies) && & nName;
3038 }
3039
3040 return dependancies;*/
3041
3042 // decide if child or i-node
3043
3044 /*if (trav1->get_num_nodes())
3045 {
3046
3047 }
3048 else
3049 {
3050 node <long>* pn = trav1->parent;
3051
3052 while (pn)
3053 {
3054 long pidx = counts.Find((BaseRef)pn->in_object);
3055
3056 }
3057 }*/
3058 // TBI
3059 return new _List;
3060 }
3061
3062 //_______________________________________________________________________________________________
3063
SplitTreeIntoClusters(unsigned long size,unsigned long tol) const3064 _List* _TreeTopology::SplitTreeIntoClusters (unsigned long size, unsigned long tol) const {
3065 // TODO SLKP 20180202: clearly this needs work (or needs to be deprecated)
3066 // assume fixed rooting for now
3067 // returns the list of list speccing pointers to calcnodes rooting the splits
3068 // the member list contains 2 or more entries for each node:
3069 // itself, the number of (independent)leaves encompassed by that node,
3070 // and an optional references to the node whose
3071 // results it depends on.
3072
3073 // assumes that size-tol>=2
3074
3075 _SimpleList counts;
3076 _AVLListX cavl (&counts);
3077
3078 node_iterator<long> ni (theRoot, _HY_TREE_TRAVERSAL_POSTORDER);
3079 while (node<long> * iterator = ni.Next()) {
3080 long nC = iterator->get_num_nodes();
3081 if (nC) {
3082 long c = 0;
3083 for (long k=1; k<=nC; k++) {
3084 c += counts.list_data[iterator->go_down(k)->in_object];
3085 }
3086 cavl.Insert((BaseRef)iterator->in_object,c);
3087 } else {
3088 cavl.Insert((BaseRef)iterator->in_object,1);
3089 }
3090 }
3091
3092 _List* result = new _List;
3093 DeleteObject(SplitTreeIntoClustersInt (theRoot, result, cavl, size, tol));
3094 return result;
3095 }
3096
3097 //_______________________________________________________________________________________________
3098
MatchTreePattern(_TreeTopology const * compareTo) const3099 const _String _TreeTopology::MatchTreePattern (_TreeTopology const* compareTo) const {
3100 // TODO: 20180202, SLKP, this needs to be confirmed as working
3101 // the pattern is this
3102 // compare to is the potential match
3103 _List myLeaves,
3104 otherLeaves,
3105 overlappedLeaves;
3106
3107 _SimpleList indexer,
3108 otherIndexer;
3109
3110 node<long> *myCT,
3111 *otherCT;
3112
3113 _String rerootAt;
3114
3115 myCT = prepTree4Comparison(myLeaves, indexer);
3116 otherCT = compareTo->prepTree4Comparison(otherLeaves, otherIndexer);
3117
3118
3119 _SimpleList matchedLeaves;
3120 overlappedLeaves.Intersect (myLeaves, otherLeaves,&matchedLeaves);
3121
3122 unsigned long my_leaf_count = myLeaves.countitems();
3123
3124 if ((my_leaf_count >=otherLeaves.countitems())&&(overlappedLeaves.countitems() == otherLeaves.countitems())) {
3125 if (my_leaf_count >otherLeaves.countitems()) {
3126 // map leaves back to ordering space
3127
3128 // trim the pattern tree
3129 _SimpleList allowedLeaves (my_leaf_count, 0, 0),
3130 recordTransfer (my_leaf_count),
3131 inverted (my_leaf_count);
3132
3133 inverted.Populate (indexer, _SimpleList::action_invert);
3134
3135 for (long padresSuck = 0; padresSuck < matchedLeaves.lLength; padresSuck ++) {
3136 allowedLeaves.list_data[inverted.get(matchedLeaves.get(padresSuck))] = 1;
3137 }
3138
3139 long slider = 0L;
3140
3141 for (long dodgersSuck = 0L; dodgersSuck < my_leaf_count; dodgersSuck ++)
3142 if (allowedLeaves.get (dodgersSuck)) {
3143 recordTransfer.list_data[dodgersSuck] = slider++;
3144 }
3145
3146 // pass 1 - delete all the superfluous leaves
3147
3148 node_iterator<long> ni (myCT, _HY_TREE_TRAVERSAL_POSTORDER);
3149 while (node<long>* iterator = ni.Next()) {
3150 if (iterator != myCT && !iterator->is_leaf()) {
3151 _SimpleList* descendants = ((_SimpleList*)iterator->in_object);
3152
3153 if ((descendants&&(allowedLeaves.list_data[descendants->list_data[0]] == 0))||(!descendants)) {
3154 // mark this node for deletion
3155 if (descendants) {
3156 DeleteObject(descendants);
3157 }
3158
3159 node<long>* sacLamb = iterator;
3160 ni.Next();
3161
3162 if (sacLamb->parent->get_num_nodes()==1) {
3163 DeleteObject((BaseRef)sacLamb->parent->in_object);
3164 sacLamb->parent->in_object = 0L;
3165 }
3166
3167 sacLamb->parent->detach_child (sacLamb->get_child_num());
3168 delete (sacLamb);
3169 continue;
3170 }
3171 }
3172 }
3173
3174 // pass 2 - prune internal nodes with exactly one child
3175
3176 // >O
3177 ni.Reset (myCT);
3178
3179 while (node<long>* iterator = ni.Next()) {
3180 long nn = iterator->get_num_nodes();
3181 if (nn == 1) {
3182 node<long>* loneChild = iterator->go_down(1);
3183 DeleteObject((_SimpleList*)iterator->in_object);
3184 iterator->in_object = loneChild->in_object;
3185 iterator->detach_child (1);
3186 delete (loneChild);
3187 } else {
3188 if (nn > 1) {
3189 _SimpleList * myDescs = (_SimpleList*)iterator->in_object;
3190 myDescs->Clear();
3191
3192 for (long cc = 1; cc <= nn; cc++) {
3193 _SimpleList temp;
3194
3195 temp.Union (*myDescs, *(_SimpleList*)iterator->go_down(cc)->in_object);
3196
3197 myDescs->Clear();
3198 myDescs->Duplicate (&temp);
3199 }
3200 } else {
3201 ((_SimpleList*)iterator->in_object)->list_data[0] = recordTransfer.list_data[((_SimpleList*)iterator->in_object)->list_data[0]];
3202 }
3203
3204 }
3205 }
3206
3207 _List newLeaves;
3208 recordTransfer.Clear();
3209 inverted.Clear();
3210
3211 for (long lc = 0; lc < allowedLeaves.lLength; lc++)
3212 if (allowedLeaves.get(lc)) {
3213 recordTransfer << newLeaves.lLength;
3214 inverted << newLeaves.lLength;
3215 newLeaves << myLeaves (indexer.get(lc));
3216 }
3217
3218 SortLists (&newLeaves, &recordTransfer);
3219 SortLists (&recordTransfer,&inverted);
3220 indexer.Clear();
3221 indexer.Duplicate (&inverted);
3222 myLeaves.Clear();
3223 myLeaves.Duplicate (&newLeaves);
3224
3225 // finally check whether the root still has 3 or more children
3226
3227 if (myCT->get_num_nodes()==2) {
3228 node <long>* promoteMe = nil;
3229
3230 if (myCT->go_down(1)->get_num_nodes ()) {
3231 promoteMe = myCT->go_down(1);
3232 } else {
3233 promoteMe = myCT->go_down(2);
3234 }
3235
3236 long nn = promoteMe->get_num_nodes();
3237 if (nn) {
3238 for (long cc = 1; cc <= nn; cc++) {
3239 myCT->add_node (*promoteMe->go_down(cc));
3240 }
3241
3242 myCT->detach_child (promoteMe->get_child_num());
3243 DeleteObject ((BaseRef)promoteMe->in_object);
3244 delete promoteMe;
3245 } else {
3246 HandleApplicationError ("Internal tree pattern error in MatchTreePattern");
3247 return "Unequal: Error";
3248 }
3249 }
3250 }
3251
3252 _SimpleList * reindexer = nil;
3253
3254 if (!indexer.Equal (otherIndexer)) {
3255 otherIndexer.Populate (_SimpleList ((unsigned long)myLeaves.countitems()).Populate (indexer, _SimpleList::action_invert),
3256 _SimpleList::action_compose);
3257 reindexer = &otherIndexer;
3258 }
3259
3260 char compRes;
3261
3262 if (internalTreeCompare (myCT, otherCT, reindexer, 1, myLeaves.lLength, nil, compareTo, true)>0) {
3263 rerootAt = kCompareEqualWithoutReroot;
3264 } else {
3265 long tCount = 0;
3266
3267
3268 node_iterator <long> ni (otherCT, _HY_TREE_TRAVERSAL_POSTORDER);
3269 node<long> * iterator = ni.Next();
3270
3271 while (iterator!=otherCT) {
3272 if (!iterator->is_leaf()) {
3273 compRes = internalTreeCompare (myCT, iterator, reindexer, 1, myLeaves.lLength, nil, compareTo, true);
3274 if (compRes>0) {
3275 break;
3276 } else if (compRes) {
3277 iterator = otherCT;
3278 break;
3279 }
3280 }
3281 iterator = ni.Next();
3282
3283 tCount ++;
3284 }
3285
3286 if (iterator!=otherCT) {
3287 ni.Reset (compareTo->theRoot);
3288 iterator = ni.Next();
3289 while (iterator!=theRoot) {
3290 if (tCount==1) {
3291 rerootAt = kCompareEqualWithReroot & *map_node_to_calcnode (iterator)->GetName() & '.';
3292 break;
3293 } else {
3294 tCount --;
3295 }
3296
3297 iterator = ni.Next();
3298 }
3299 }
3300 }
3301 if (rerootAt.empty()) {
3302 rerootAt = kCompareUnequalToplogies;
3303 }
3304 } else {
3305 rerootAt = kCompareUnequalLabelSets;
3306 }
3307
3308 destroyCompTree (myCT);
3309 destroyCompTree (otherCT);
3310 return rerootAt;
3311 }
3312
3313 //__________________________________________________________________________________
3314
ComputeClusterTable(_SimpleList & result,_SimpleList & pswRepresentation) const3315 void _TreeTopology::ComputeClusterTable (_SimpleList& result, _SimpleList& pswRepresentation) const {
3316
3317 long leaf_count = pswRepresentation.Element(-2L),
3318 upper_bound = pswRepresentation.countitems()-2L,
3319 leafCode = 0L,
3320 L = 0L,
3321 R = 0L;
3322
3323 result.Clear ();
3324 result.Populate (3*leaf_count,-1,0);
3325
3326
3327 for (long k = 0; k < upper_bound; k+=2L) {
3328 if (pswRepresentation.get(k) < leaf_count) { // is a leaf
3329 R = leafCode++;
3330 } else {
3331 long row;
3332 L = pswRepresentation.get (k-2*pswRepresentation.get(k+1));
3333 if (k + 2L == upper_bound /* root */
3334 || pswRepresentation.get(k+3L) == 0L) {
3335 row = R;
3336 } else {
3337 row = L;
3338 }
3339
3340 result[row*3] = L;
3341 result[row*3+1L] = R;
3342 }
3343 }
3344 }
3345 //__________________________________________________________________________________
ConvertFromPSW(_AVLListX & nodeMap,_SimpleList & pswRepresentation) const3346 _String* _TreeTopology::ConvertFromPSW (_AVLListX& nodeMap,_SimpleList& pswRepresentation) const {
3347 _StringBuffer * result = new _StringBuffer (128L);
3348
3349 if (pswRepresentation.countitems() > 4L) {
3350 long leafCount = pswRepresentation.Element (-2);
3351 // traverse backwards
3352 bool lastLeaf = false;
3353 _SimpleList bounds;
3354
3355 for (long k = pswRepresentation.countitems()-4L; k>=0L; k-=2L) {
3356 if (lastLeaf) {
3357 (*result) << ',';
3358 }
3359 if (pswRepresentation.list_data[k] >= leafCount) { //
3360 (*result) << ')';
3361 lastLeaf = false;
3362 bounds << k-2*pswRepresentation.get(k+1);
3363 } else {
3364 _String nodeName (*(_String*)nodeMap.Retrieve(pswRepresentation.get(k)));
3365 nodeName.Flip();
3366 (*result) << nodeName;
3367 while (bounds.Element(-1) == k && bounds.nonempty()) {
3368 (*result) << '(';
3369 bounds.Pop();
3370 }
3371 lastLeaf = true;
3372 }
3373 }
3374 }
3375 result->TrimSpace();
3376 result->Flip();
3377 return result;
3378 }
3379 /*
3380 if (reference == false) {
3381 nodeMap.Clear();
3382 }
3383
3384 pswRepresentation.Clear();
3385
3386 long leafIndex = 0,
3387 iNodeCount = -1;
3388
3389 _SimpleList levelBuffer;
3390
3391 node_iterator<long> ni (theRoot, _HY_TREE_TRAVERSAL_POSTORDER);
3392
3393 while (node<long> * currentNode = ni.Next (&levelBuffer)) {
3394 _String nodeName = GetNodeName (currentNode);
3395
3396 while (levelBuffer.countitems() <= ni.Level()) {
3397 levelBuffer << 0;
3398 }
3399
3400 if (currentNode->is_leaf()) {
3401 pswRepresentation << leafIndex;
3402 pswRepresentation << 0;
3403 if (reference) {
3404 long remapped = nodeMap.Find(&nodeName);
3405 if (remapped < 0) {
3406 return false;
3407 } else {
3408 remapped = nodeMap.GetXtra (remapped);
3409 if (remapped >= 0) {
3410 pswRepresentation << remapped;
3411 } else {
3412 return false;
3413 }
3414 }
3415
3416 leafIndex++;
3417 } else {
3418 nodeMap.Insert(nodeName.makeDynamic(), leafIndex++, false);
3419 }
3420 } else {
3421 pswRepresentation << iNodeCount;
3422 pswRepresentation << levelBuffer.list_data[ni.Level()];
3423 if (reference) {
3424 pswRepresentation << 0;
3425 } else {
3426 (*inames) && &nodeName;
3427 }
3428
3429 iNodeCount--;
3430 }
3431 if (ni.Level()) {
3432 levelBuffer.list_data[ni.Level()-1] += levelBuffer.list_data[ni.Level()]+1;
3433 }
3434 levelBuffer.list_data[ni.Level()] = 0;
3435 }
3436
3437 for (long k = 0; k < pswRepresentation.lLength; k+=(reference?3:2))
3438 if (pswRepresentation.list_data[k] < 0) {
3439 pswRepresentation.list_data[k] = leafIndex-pswRepresentation.list_data[k]-1;
3440 }
3441
3442 pswRepresentation << leafIndex;
3443 pswRepresentation << (-iNodeCount-1);
3444
3445 return true;
3446
3447 */
3448 //__________________________________________________________________________________
ConvertToPSW(_AVLListX & nodeMap,_List * inames,_SimpleList & pswRepresentation,bool reference) const3449 bool _TreeTopology::ConvertToPSW (_AVLListX& nodeMap, _List* inames, _SimpleList& pswRepresentation, bool reference) const {
3450
3451 if (reference == false) {
3452 nodeMap.Clear();
3453 }
3454
3455 pswRepresentation.Clear();
3456
3457 long leafIndex = 0L,
3458 iNodeCount = -1L;
3459
3460 _SimpleList levelBuffer;
3461
3462 node_iterator<long> ni (theRoot, _HY_TREE_TRAVERSAL_POSTORDER);
3463
3464 while (node<long> * currentNode = ni.Next ()) {
3465 _String nodeName = GetNodeName (currentNode);
3466
3467
3468 if (levelBuffer.countitems() <= ni.Level()) {
3469 levelBuffer.AppendRange(ni.Level() - levelBuffer.countitems() + 1 , 0, 0);
3470 }
3471
3472 if (currentNode->is_leaf()) {
3473 pswRepresentation << leafIndex << 0;
3474 if (reference) {
3475 long remapped = nodeMap.Find(&nodeName);
3476 if (remapped == kNotFound) {
3477 return false;
3478 } else {
3479 remapped = nodeMap.GetXtra (remapped);
3480 if (remapped >= 0) {
3481 pswRepresentation << remapped;
3482 } else {
3483 return false;
3484 }
3485 }
3486
3487 leafIndex++;
3488 } else {
3489 nodeMap.Insert(nodeName.makeDynamic(), leafIndex++, false);
3490 }
3491 } else {
3492 pswRepresentation << iNodeCount << levelBuffer.get(ni.Level());
3493 if (reference) {
3494 pswRepresentation << 0L;
3495 } else {
3496 (*inames) && &nodeName;
3497 }
3498 iNodeCount--;
3499 }
3500 if (ni.Level()) {
3501 levelBuffer[ni.Level()-1] += levelBuffer.get(ni.Level())+1;
3502 }
3503 levelBuffer[ni.Level()] = 0;
3504 }
3505
3506 for (long k = 0; k < pswRepresentation.lLength; k+=(reference?3:2)) {
3507 if (pswRepresentation.get(k) < 0L) {
3508 pswRepresentation[k] = leafIndex-pswRepresentation.get(k)-1;
3509 }
3510 }
3511
3512 pswRepresentation << leafIndex << (-iNodeCount-1);
3513
3514 return true;
3515 }
3516
3517
3518 //__________________________________________________________________________________
3519
SplitsIdentity(HBLObjectRef p,HBLObjectRef cache) const3520 _AssociativeList * _TreeTopology::SplitsIdentity (HBLObjectRef p, HBLObjectRef cache) const {
3521 // compare tree topologies
3522 _Matrix * result = new _Matrix (2,1,false,true),
3523 * result2 = nil;
3524
3525 _String * tree_repr = nil;
3526
3527 long leaves, internals;
3528 EdgeCount (leaves, internals);
3529
3530 result->theData[0] = internals - 1.;
3531 result->theData[1] = -1.;
3532
3533 if (p && (p->ObjectClass() == TOPOLOGY || p->ObjectClass() == TREE)) {
3534 _List avlSupport,
3535 iNames;
3536
3537 _AVLListX nameMap (&avlSupport);
3538
3539 _SimpleList psw, psw2, clusters, inodeList;
3540
3541
3542 ConvertToPSW (nameMap, &iNames, psw);
3543 ComputeClusterTable (clusters, psw);
3544
3545 if (((_TreeTopology*)p)->ConvertToPSW (nameMap, nil, psw2, true)) {
3546 _SimpleList workSpace;
3547 long leafCount = psw.Element (-2),
3548 upper_bound = psw2.countitems() - 3L;
3549
3550 for (long k = 0L; k < upper_bound; k+=3L) {
3551
3552 if (psw2.list_data[k] < leafCount) {
3553 workSpace << 1L
3554 << 1L
3555 << psw2.get(k+2L)
3556 << psw2.get(k+2L);
3557 } else {
3558 _SimpleList quad;
3559
3560 quad << leafCount+1 << 0 << 0 << 1;
3561
3562 long w = psw2.get (k+1);
3563 while (w > 0) {
3564 _SimpleList quad2;
3565 quad2 << workSpace.Pop() << workSpace.Pop() << workSpace.Pop() << workSpace.Pop();
3566 w -= quad2.get (3);
3567 quad.list_data[0] = Minimum (quad2.get(0),quad.get(0));
3568 quad.list_data[1] = Maximum (quad2.get(1),quad.get(1));
3569 quad.list_data[2] += quad2.get(2);
3570 quad.list_data[3] += quad2.get(3);
3571 }
3572
3573 if (quad.list_data[2] == quad.list_data[1] - quad.list_data[0] + 1) {
3574 if (clusters.list_data[3*quad.list_data[0]] == quad.list_data[0] && clusters.list_data[3*quad.list_data[0]+1] == quad.list_data[1]) {
3575 clusters.list_data[3*quad.list_data[0]+2] = 1;
3576 } else if (clusters.list_data[3*quad.list_data[1]] == quad.list_data[0] && clusters.list_data[3*quad.list_data[1]+1] == quad.list_data[1]) {
3577 clusters.list_data[3*quad.list_data[1]+2] = 1;
3578 }
3579 }
3580 quad.Flip();
3581 workSpace << quad;
3582 }
3583 }
3584
3585 psw2.Clear();
3586 long matchCount = 0,
3587 iNodeCount = 0;
3588
3589 long L, R = -1L;
3590
3591 _SimpleList leafSpans (leafCount,0,0),
3592 iNodesTouched;
3593
3594 for (unsigned long k = 0; k < psw.lLength-2; k+=2) {
3595 if (psw.list_data[k] < leafCount) {
3596 R = psw.list_data[k];
3597 psw2 << R;
3598 psw2 << 0;
3599 leafSpans.list_data[R] = (psw2.lLength>>1);
3600 } else {
3601 long ll = k-2*psw.list_data[k+1];
3602 L = psw.list_data[ll];
3603 if ((clusters.list_data[3*L] == L && clusters.list_data[3*L+1] == R && clusters.list_data[3*L+2] > 0)
3604 || (clusters.list_data[3*R] == L && clusters.list_data[3*R+1] == R && clusters.list_data[3*R+2] > 0)) {
3605 L = (psw2.lLength>>1) - leafSpans.list_data[L] + 1;
3606 psw2 << leafCount+iNodeCount++;
3607 psw2 << L;
3608
3609 iNodesTouched << psw.list_data[k];
3610 }
3611 }
3612 }
3613
3614 for (unsigned long k = 0; k < psw2.lLength; k+=2)
3615 if (psw2.list_data[k] < leafCount) {
3616 psw2.list_data[k+1] = 0;
3617 } else {
3618 matchCount++;
3619 }
3620
3621 psw2 << leafCount;
3622 psw2 << iNodeCount;
3623
3624 result->theData[0] = psw.Element (-1);
3625 result->theData[1] = matchCount;
3626
3627
3628 tree_repr = ConvertFromPSW (nameMap, psw2);
3629
3630 _List sharedNames;
3631 for (long k = 0; k < iNodesTouched.lLength; k++) {
3632 sharedNames << iNames (iNodesTouched(k)-leafCount);
3633 }
3634
3635 result2 = new _Matrix (sharedNames);
3636 }
3637
3638 }
3639
3640 //DeleteObject (bc);
3641
3642 _AssociativeList * resultList = new _AssociativeList;
3643 resultList->MStore ("CLUSTERS", result, false);
3644 if (result2) {
3645 resultList->MStore ("NODES", result2, false);
3646 }
3647 resultList->MStore ("CONSENSUS", tree_repr ? new _FString (tree_repr) : new _FString(), false);
3648 return resultList;
3649 }
3650
3651 //__________________________________________________________________________________
3652
FindMaxCommonSubTree(_TreeTopology const * compare_to,long & size_tracker,_List * forest) const3653 const _String _TreeTopology::FindMaxCommonSubTree (_TreeTopology const* compare_to, long& size_tracker, _List* forest) const{
3654 _List myLeaves,
3655 otherLeaves,
3656 sharedLeaves;
3657
3658 _SimpleList indexer,
3659 otherIndexer,
3660 sharedLeavesIDin1,
3661 sharedLeavesIDin2;
3662
3663
3664 _String rerootAt;
3665
3666 node<long> *myCT = prepTree4Comparison(myLeaves, indexer),
3667 *otherCT = compare_to->prepTree4Comparison(otherLeaves, otherIndexer);
3668
3669 sharedLeaves.Intersect (otherLeaves,myLeaves,&sharedLeavesIDin1,&sharedLeavesIDin2);
3670
3671 if (sharedLeaves.countitems() > 1UL) { // more than one common leaf
3672 // now we need to map shared leaves to a common indexing space
3673
3674 _SimpleList my_inverted_index, // was lidx1
3675 other_inverted_index; // was lidx2
3676
3677 my_inverted_index.Populate(indexer, _SimpleList::action_invert);
3678 other_inverted_index.Populate(otherIndexer, _SimpleList::action_invert);
3679
3680 _SimpleList reindexer (otherLeaves.countitems(),-1L,0L),
3681 ldx1,
3682 ldx2;
3683
3684
3685 for (long k2=0L; k2<sharedLeaves.countitems(); k2++) {
3686 reindexer [other_inverted_index.get (sharedLeavesIDin2.get (k2))] = my_inverted_index.get (sharedLeavesIDin1.get (k2));
3687 // the original index of shared leaf k2 -> the original index of shared lead k1 (in post-order traveral)
3688 }
3689
3690 // now we map actual leaf structures to their respective leaf indices
3691
3692 node_iterator<long> ni (myCT, _HY_TREE_TRAVERSAL_POSTORDER);
3693
3694 while (node<long>* iterator = ni.Next()) {
3695 if (iterator->is_leaf()) {
3696 ldx1 << (long)iterator;
3697 }
3698 }
3699
3700 ni.Reset (otherCT);
3701 while (node<long>* iterator = ni.Next()) {
3702 if (iterator->is_leaf()) {
3703 ldx2 << (long)iterator;
3704 }
3705 }
3706
3707 // now we loop through the list of leaves and try to match them all up
3708
3709 _SimpleList matchedTops,
3710 matchedSize;
3711
3712 for (long k3=0UL; k3<sharedLeaves.countitems()-1L; k3++) {
3713 long try_leaf_index = other_inverted_index.get (sharedLeavesIDin2.get (k3));
3714
3715 if (reindexer.get (try_leaf_index) >= 0L) {
3716 // leaf still available
3717 node<long>* ln1 = (node<long>*)ldx1.get (my_inverted_index.get (sharedLeavesIDin1.get (k3))),
3718 * ln2 = (node<long>*)ldx2.get (try_leaf_index),
3719 * p1 = ln1->parent,
3720 * p2 = ln2->parent;
3721
3722 bool comparison_result = false;
3723
3724 while ((internalTreeCompare (p1,p2,&reindexer,0,myLeaves.lLength,p2->parent?nil:ln2,compare_to) == 1)&&p1&&p2) {
3725 ln1 = p1;
3726 ln2 = p2;
3727 p1=p1->parent;
3728 p2=p2->parent;
3729
3730 comparison_result = true;
3731 }
3732
3733 if (comparison_result) {
3734 _SimpleList* matchedLeaves = (_SimpleList*)ln2->in_object;
3735
3736 matchedTops << (long)ln1;
3737 matchedSize << matchedLeaves->countitems();
3738
3739 matchedLeaves->Each ([&] (long value, unsigned long) -> void {
3740 reindexer[value] = -1;
3741 });
3742 }
3743 }
3744 }
3745
3746 if (matchedSize.nonempty()) {
3747
3748 auto handle_subtree = [&] (node<long> const * top_node) -> _String * {
3749 ni.Reset(myCT);
3750
3751 long internal_node_count = 0L;
3752
3753 while (node<long>* iterator = ni.Next()) {
3754 if (!iterator->is_leaf()) {
3755 if (iterator == top_node) {
3756 break;
3757 }
3758 internal_node_count ++;
3759 }
3760 }
3761 ni.Reset(theRoot);
3762 while (node<long>* iterator = ni.Next()) {
3763 if (!iterator->is_leaf()) {
3764 if (internal_node_count == 0) {
3765 return LocateVar(iterator->in_object)->GetName();
3766 }
3767 internal_node_count--;
3768 }
3769 }
3770 return nil;
3771 };
3772
3773 if (forest) {
3774 size_tracker = 0L;
3775 for (long k6=0L; k6<matchedSize.lLength; k6++) {
3776 (*forest) << handle_subtree ((node<long>*)matchedTops.get (k6));
3777 size_tracker += matchedSize.get (k6);
3778 }
3779 } else {
3780 long maxSz = -1L,
3781 maxIdx = 0L;
3782
3783 matchedSize.Each ([&] (long value, unsigned long index) -> void {
3784 if (StoreIfGreater(maxSz, value)) {
3785 maxIdx = index;
3786 }
3787 });
3788
3789 size_tracker = maxSz;
3790 _String const * res = handle_subtree ((node<long>*)matchedTops.list_data[maxIdx]);
3791 destroyCompTree(myCT);
3792 destroyCompTree(otherCT);
3793 return *res;
3794 }
3795 }
3796 }
3797 destroyCompTree(myCT);
3798 destroyCompTree(otherCT);
3799 return kEmptyString;
3800 }
3801
3802
3803 //__________________________________________________________________________________
3804
CompareSubTrees(_TreeTopology const * compareTo,node<long> * topNode) const3805 const _String _TreeTopology::CompareSubTrees (_TreeTopology const* compareTo, node<long>* topNode) const {
3806 // compare tree topologies
3807
3808 _List myLeaves,
3809 otherLeaves,
3810 sharedLeaves;
3811
3812 _SimpleList indexer,
3813 otherIndexer,
3814 sharedLeavesID;
3815
3816 node<long> *myCT,
3817 *otherCT;
3818
3819 _String rerootAt;
3820
3821 myCT = prepTree4Comparison(myLeaves, indexer);
3822 otherCT = compareTo->prepTree4Comparison(otherLeaves, otherIndexer, topNode);
3823
3824 sharedLeaves.Intersect (myLeaves, otherLeaves,&sharedLeavesID);
3825
3826 // first compare the inclusion for the set of leaf labels
3827
3828 if (sharedLeavesID.countitems() == otherLeaves.countitems()) {
3829 _SimpleList ilist;
3830
3831 ilist.Populate(myLeaves.countitems(), -1L, 0L);
3832
3833 for (long k2 = 0L; k2 < otherIndexer.countitems(); k2++) {
3834 ilist [sharedLeavesID.get(otherIndexer.get(k2))] = k2;
3835 }
3836
3837 for (long k2 = 0L; k2<indexer.countitems(); k2++) {
3838 long lidx = ilist.list_data[indexer.get(k2)];
3839 indexer[k2] = lidx >=0L ? lidx : -1L;
3840 }
3841
3842 _SimpleList *reindexer = &indexer;
3843
3844 // now compare explore possible subtree matchings
3845 // for all internal nodes of this tree except the root
3846
3847 char compRes = 0;
3848
3849 long tCount = 1L,
3850 nc2 = topNode->get_num_nodes();
3851
3852 node_iterator<long> ni (myCT, _HY_TREE_TRAVERSAL_POSTORDER);
3853 ni.Next();
3854 node<long>* iterator = ni.Next();
3855
3856 while (iterator!=myCT) {
3857 long nc = iterator->get_num_nodes();
3858 if (nc == nc2) {
3859 long kk;
3860 for (kk = 1; kk <= nc; kk++) {
3861 compRes = internalTreeCompare (otherCT,iterator, reindexer, 0, otherLeaves.lLength, iterator->go_down(kk),compareTo);
3862 if (compRes) {
3863 if (compRes == -1) {
3864 iterator = myCT;
3865 }
3866 break;
3867 }
3868 }
3869 if (kk>nc) {
3870 compRes = internalTreeCompare (otherCT,iterator, reindexer, 0, otherLeaves.lLength, nil, compareTo);
3871 if (compRes) {
3872 if (compRes == -1) {
3873 iterator = myCT;
3874 }
3875 break;
3876 }
3877 } else {
3878 break;
3879 }
3880 }
3881 tCount ++;
3882 iterator = ni.Next();
3883 }
3884
3885 if (iterator != myCT) {
3886 ni.Reset (theRoot);
3887 iterator = ni.Next ();
3888 while (iterator != theRoot) {
3889 if (tCount==0) {
3890 rerootAt = _String("Matched at the ") & *map_node_to_calcnode (iterator)->GetName() & '.';
3891 break;
3892 } else {
3893 tCount --;
3894 }
3895
3896 iterator = ni.Next();
3897 }
3898 } else {
3899 long nc = myCT->get_num_nodes();
3900
3901 if (nc == nc2+1)
3902 for (long kk = 1; kk <= nc; kk++) {
3903 compRes = internalTreeCompare (otherCT,myCT, reindexer, 0, otherLeaves.lLength, myCT->go_down(kk),compareTo);
3904 if (compRes==1) {
3905 break;
3906 }
3907 }
3908
3909 if (compRes == 1) {
3910 rerootAt = "Matched at the root.";
3911 }
3912 }
3913
3914 if (rerootAt.empty()) {
3915 rerootAt = "No match: Different topologies (matching label sets).";
3916 }
3917 } else {
3918 rerootAt = "No match: Unequal label sets.";
3919 }
3920
3921 destroyCompTree (myCT);
3922 destroyCompTree (otherCT);
3923
3924 return rerootAt;
3925 }
3926
3927
3928
3929
3930
3931
3932