1 /* $Id: test_biotree.cpp 607145 2020-04-30 13:02:34Z grichenk $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Authors: Anatoliy Kuznetsov
27 *
28 * File Description
29 *
30 */
31
32 #include <ncbi_pch.hpp>
33 #include <stdio.h>
34 #include <corelib/ncbiapp.hpp>
35 #include <corelib/ncbienv.hpp>
36 #include <corelib/ncbireg.hpp>
37 #include <corelib/ncbifile.hpp>
38 #include <corelib/ncbi_tree.hpp>
39 #include <util/bitset/bitset_debug.hpp>
40 #include <algo/tree/tree_algo.hpp>
41 #include <util/bitset/ncbi_bitset.hpp>
42 #include <algorithm>
43 #include <algo/phy_tree/bio_tree.hpp>
44 #include <algo/phy_tree/bio_tree_forest.hpp>
45 #include <algo/phy_tree/bio_tree_conv.hpp>
46
47 #include <objects/biotree/BioTreeContainer.hpp>
48 #include <objects/biotree/FeatureDescr.hpp>
49 #include <objects/biotree/FeatureDictSet.hpp>
50 #include <objects/biotree/NodeFeatureSet.hpp>
51 #include <objects/biotree/NodeFeature.hpp>
52 #include <objects/biotree/NodeSet.hpp>
53 #include <objects/biotree/Node.hpp>
54
55 #include <objects/taxon1/taxon1.hpp>
56
57 #include <common/test_assert.h>
58
59
60 USING_NCBI_SCOPE;
61 USING_SCOPE(objects);
62
63 static int label_id;
64 static int name_id = 1;
65
s_Node2String(const CBioTreeDynamic::TBioTreeNode & node)66 static string s_Node2String(const CBioTreeDynamic::TBioTreeNode& node)
67 {
68 const CBioTreeDynamic::TBioTreeNode::TValueType& v = node.GetValue();
69 // cout << v.GetTree() << endl;
70 // _ASSERT(v.GetTree());
71 return v.features.GetFeatureValue(label_id);
72 }
73
s_Node2String_name(const CBioTreeDynamic::TBioTreeNode & node)74 static string s_Node2String_name(const CBioTreeDynamic::TBioTreeNode& node)
75 {
76 const CBioTreeDynamic::TBioTreeNode::TValueType& v = node.GetValue();
77 return v.features.GetFeatureValue(name_id);
78
79 CNodeSet s;
80 }
81
82
83
TestBioTreeContainer()84 static void TestBioTreeContainer()
85 {
86 typedef CBioTreeContainer::TFdict TContainerDict;
87 typedef TContainerDict::Tdata::value_type::element_type TCFeatureDescr;
88 typedef CBioTreeContainer::TNodes TContainerNodes;
89
90 // create the bio-tree
91 CBioTreeContainer btrc;
92 btrc.SetTreetype("prot_cluster");
93
94 // Build/populate feature dictionary
95 //
96 {{
97 TContainerDict& fd = btrc.SetFdict();
98 TContainerDict::Tdata& feat_list = fd.Set();
99 // create id-value pairs (id MUST be unique)
100 {{
101 CRef<TCFeatureDescr> d(new TCFeatureDescr);
102 d->SetId(1); d->SetName("label");
103 feat_list.push_back(d);
104 }}
105 {{
106 CRef<TCFeatureDescr> d(new TCFeatureDescr);
107 d->SetId(2); d->SetName("dist");
108 feat_list.push_back(d);
109 }}
110 {{
111 CRef<TCFeatureDescr> d(new TCFeatureDescr);
112 d->SetId(3); d->SetName("seq-id");
113 feat_list.push_back(d);
114 }}
115 {{
116 CRef<TCFeatureDescr> d(new TCFeatureDescr);
117 d->SetId(4); d->SetName("cluster-id");
118 feat_list.push_back(d);
119 }}
120 }}
121
122 // Add tree nodes with topology and features
123 //
124 {{
125 unsigned node_id = 1;
126 TContainerNodes& nodes = btrc.SetNodes();
127 TContainerNodes::Tdata& node_lst = nodes.Set();
128
129 // make root node
130 {{
131 CRef<CNode> n(new CNode);
132 n->SetId(node_id); // unique node id
133
134 CNode::TFeatures& nfeatures = n->SetFeatures();
135 CNode::TFeatures::Tdata& feature_list = nfeatures.Set();
136
137 // Add node features -- ids MUST correspond the feature dictionary
138 //
139 {{
140 CRef<CNodeFeature> nf(new CNodeFeature);
141 nf->SetFeatureid(1); // "label"
142 nf->SetValue("RootNode_LBL");
143 feature_list.push_back(nf);
144 }}
145
146 {{
147 CRef<CNodeFeature> nf(new CNodeFeature);
148 nf->SetFeatureid(3); // "seq-id"
149 nf->SetValue("gi|123");
150 feature_list.push_back(nf);
151 }}
152
153 node_lst.push_back(n);
154 }}
155 unsigned parent_node_id = node_id;
156 ++node_id;
157
158 // make child node 1
159 {{
160 CRef<CNode> n(new CNode);
161 n->SetId(node_id); // unique node id
162 n->SetParent(parent_node_id);
163
164 CNode::TFeatures& nfeatures = n->SetFeatures();
165 CNode::TFeatures::Tdata& feature_list = nfeatures.Set();
166
167 // Add node features -- ids MUST correspond the feature dictionary
168 //
169 {{
170 CRef<CNodeFeature> nf(new CNodeFeature);
171 nf->SetFeatureid(1); // "label"
172 nf->SetValue("Child1");
173 feature_list.push_back(nf);
174 }}
175
176 {{
177 CRef<CNodeFeature> nf(new CNodeFeature);
178 nf->SetFeatureid(3); // "seq-id"
179 nf->SetValue("gi|124");
180 feature_list.push_back(nf);
181 }}
182
183 node_lst.push_back(n);
184 }}
185 ++node_id;
186
187 // make child node 2
188 {{
189 CRef<CNode> n(new CNode);
190 n->SetId(node_id); // unique node id
191 n->SetParent(parent_node_id);
192
193 CNode::TFeatures& nfeatures = n->SetFeatures();
194 CNode::TFeatures::Tdata& feature_list = nfeatures.Set();
195
196 // Add node features -- ids MUST correspond the feature dictionary
197 //
198 {{
199 CRef<CNodeFeature> nf(new CNodeFeature);
200 nf->SetFeatureid(1); // "label"
201 nf->SetValue("Child2");
202 feature_list.push_back(nf);
203 }}
204
205 {{
206 CRef<CNodeFeature> nf(new CNodeFeature);
207 nf->SetFeatureid(3); // "seq-id"
208 nf->SetValue("gi|125");
209 feature_list.push_back(nf);
210 }}
211
212 node_lst.push_back(n);
213 }}
214
215
216 }}
217
218 // Dump tree to file (cout)
219 cout << MSerial_AsnText << btrc;
220 }
221
222
223 /////////////////////////////////
224 // Test application
225 //
226
227 class CTestApplication : public CNcbiApplication
228 {
229 public:
230 void Init(void);
231 int Run(void);
232 };
233
234
Init(void)235 void CTestApplication::Init(void)
236 {
237 // Set err.-posting and tracing to maximum
238 SetDiagTrace(eDT_Enable);
239 SetDiagPostFlag(eDPF_All);
240 SetDiagPostLevel(eDiag_Info);
241 }
242
243
244
Run(void)245 int CTestApplication::Run(void)
246 {
247
248
249 // CExceptionReporterStream reporter(cerr);
250 // CExceptionReporter::SetDefault(&reporter);
251 // CExceptionReporter::EnableDefault(false);
252 // CExceptionReporter::EnableDefault(true);
253 // CExceptionReporter::SetDefault(0);
254
255
256 CTaxon1 tax;
257 bool res = tax.Init();
258
259 if (!res) {
260 return 1;
261 }
262
263
264 // Create one container and tree:
265 try{
266 TTaxId tax_id = tax.GetTaxIdByName("Mouse");
267 cout << tax_id << endl;
268
269 CBioTreeContainer btrc;
270 CTaxon1ConvertToBioTreeContainer<CBioTreeContainer,
271 CTaxon1,
272 ITaxon1Node,
273 ITreeIterator>
274 conv_func;
275 conv_func(btrc, tax, tax_id);
276
277 cout << MSerial_AsnText << btrc;
278
279 CBioTreeDynamic dtr2;
280 BioTreeConvertContainer2Dynamic(dtr2, btrc);
281
282 TreePrint(cout, *(dtr2.GetTreeNode()), s_Node2String_name);
283 }
284 catch (exception& ex)
285 {
286 cout << "Error! " << ex.what() << endl;
287 }
288
289 // Create one contanier with two trees in it, and a forest:
290 try{
291 TTaxId tax_id1 = tax.GetTaxIdByName("Mouse");
292 cout << tax_id1 << endl;
293
294 TTaxId tax_id2 = tax.GetTaxIdByName("Rattus rattus");
295 cout << tax_id2 << endl;
296
297 //
298 // Create two containers and a forest. Convert containers to forest and back, and write
299 // the contents both before and after conversions.
300 CBioTreeContainer c1, c2;
301 CBioTreeForestDynamic f1;
302
303 /// Add 2 separate trees to container c1
304 CTaxon1ConvertToBioTreeContainer<CBioTreeContainer,
305 CTaxon1,
306 ITaxon1Node,
307 ITreeIterator>
308 conv_func;
309 bool is_forest;
310
311 conv_func(c1, tax, tax_id1);
312 is_forest = BioTreeContainerIsForest(c1);
313 if (is_forest)
314 cout << "Container with 1 tree misclassified as forest." << endl;
315 else
316 cout << "Container with 1 tree classified as tree." << endl;
317
318 conv_func(c1, tax, tax_id2);
319 is_forest = BioTreeContainerIsForest(c1);
320 if (!is_forest)
321 cout << "Container with 2 trees misclassified as tree." << endl;
322 else
323 cout << "Container with 2 trees classified as forest." << endl;
324
325 // Create temporary files for output. Replace any previous contents.
326 std::string tdir = CDir::GetTmpDir();
327 cout << "Temp Output Dir: " << tdir << endl;
328
329 std::string container1 = CDirEntry::ConcatPath(tdir,"treetestc1.txt");
330 std::string forest1 = CDirEntry::ConcatPath(tdir,"treetestf1.txt");
331 std::string container2 = CDirEntry::ConcatPath(tdir,"treetestc2.txt");
332
333 CNcbiOfstream tstoutc1(container1.c_str());
334 CNcbiOfstream tstoutf1(forest1.c_str());
335 CNcbiOfstream tstoutc2(container2.c_str());
336
337 // Write iniial value of container 1 to firest container file
338 tstoutc1 << MSerial_AsnText << c1;
339
340 // Create forest from container and write contents to first forest file
341 BioTreeConvertContainer2DynamicForest(f1, c1);
342 for (unsigned int i=0; i<f1.GetTrees().size(); ++i)
343 TreePrint(tstoutf1, *(f1.GetTrees()[i]->GetTreeNode()),
344 s_Node2String_name);
345
346 // Convert forest back into a container (2), and write out container 2
347 BioTreeForestConvert2Container<CBioTreeContainer,
348 CBioTreeForestDynamic>(c2, f1);
349 tstoutc2 << MSerial_AsnText << c2;
350
351 // close output files
352 tstoutc1.close();
353 tstoutf1.close();
354 tstoutc2.close();
355
356 // Compare contents of first container with contents
357 // of container created from conversion to and from forest
358 CFile fc1(container1);
359
360 if (!fc1.CompareTextContents(container2, CFile::eIgnoreWs))
361 cout << "Converted container file differs from original" << endl;
362 else
363 cout << "Converted container file is identical to original" << endl;
364 }
365 catch (exception& ex)
366 {
367 cout << "Error! " << ex.what() << endl;
368 }
369
370 tax.Fini();
371
372 return 0;
373 }
374
375
376
377 /// Imitation of simple phylogenetic tree
378 struct PhyNode
379 {
PhyNodePhyNode380 PhyNode() {}
PhyNodePhyNode381 PhyNode(int d, const string& l) : distance(d), label(l){}
382
383 int distance;
384 string label;
385 };
386
387 typedef
388 CBioTree<BioTreeBaseNode<PhyNode, CBioTreeFeatureList> >
389 CPhyTree;
390
391
392 class NodeConvert
393 {
394 public:
NodeConvert(CBioTreeDynamic & dynamic_tree)395 NodeConvert(CBioTreeDynamic& dynamic_tree)
396 : m_DynamicTree(&dynamic_tree)
397 {}
398
operator ()(CBioTreeDynamic::TBioTreeNode & dnode,const CPhyTree::TBioTreeNode & src_node)399 void operator()(CBioTreeDynamic::TBioTreeNode& dnode,
400 const CPhyTree::TBioTreeNode& src_node)
401 {
402 const CPhyTree::TBioTreeNode::TValueType& nv = src_node.GetValue();
403 NStr::IntToString(m_TmpStr, nv.data.distance);
404 m_DynamicTree->AddFeature(&dnode, "distance", m_TmpStr);
405 m_DynamicTree->AddFeature(&dnode, "label", nv.data.label);
406 }
407
408 private:
409 CBioTreeDynamic* m_DynamicTree;
410 string m_TmpStr;
411 };
412
413
414
415
416
417
418
419 struct PhyNodeId
420 {
PhyNodeIdPhyNodeId421 PhyNodeId()
422 {}
423
PhyNodeIdPhyNodeId424 PhyNodeId(unsigned x_id, int d, const string& l)
425 : id(x_id), distance(d), label(l)
426 {}
427
GetIdPhyNodeId428 unsigned GetId() const { return id; }
SetIdPhyNodeId429 void SetId(unsigned x_id) { id = x_id; }
430
431 unsigned id;
432 int distance;
433 string label;
434 };
435
436
437 typedef CTreeNode<PhyNodeId> CPhyTreeNode;
438
439
440 /// Sample converter from tree node to dynamic tree node
441 class IdNodeConvert
442 {
443 public:
IdNodeConvert(CBioTreeDynamic & dynamic_tree)444 IdNodeConvert(CBioTreeDynamic& dynamic_tree)
445 : m_DynamicTree(&dynamic_tree)
446 {}
447
operator ()(CBioTreeDynamic::TBioTreeNode & dnode,const CPhyTreeNode & src_node)448 void operator()(CBioTreeDynamic::TBioTreeNode& dnode,
449 const CPhyTreeNode& src_node)
450 {
451 const PhyNodeId& nv = src_node.GetValue();
452 NStr::IntToString(m_TmpStr, nv.distance);
453 m_DynamicTree->AddFeature(&dnode, "distance", m_TmpStr);
454 m_DynamicTree->AddFeature(&dnode, "label", nv.label);
455 }
456
457 private:
458 CBioTreeDynamic* m_DynamicTree;
459 string m_TmpStr;
460 };
461
462 struct IdNodeConvertToTree
463 {
operator ()IdNodeConvertToTree464 void operator()(CPhyTreeNode& dst_node,
465 const CBioTreeDynamic::TBioTreeNode& src)
466 {
467 const string& dist = src.GetFeature("distance");
468 dst_node.GetValue().distance = atoi(dist.c_str());
469 dst_node.GetValue().label = src.GetFeature("label");
470 }
471 };
472
473
TestTreeConvert()474 void TestTreeConvert()
475 {
476 // ---------------------------------
477 // Filling the tree
478
479 CPhyTreeNode* n0 = new CPhyTreeNode;
480 {
481 PhyNodeId& nv = n0->GetValue();
482 nv.id = 0;
483 nv.distance = 0;
484 nv.label = "root";
485 }
486
487 n0->AddNode(PhyNodeId(10, 1, "node1"));
488 CPhyTreeNode* n2 = n0->AddNode(PhyNodeId(11, 1, "node2"));
489
490 n2->AddNode(PhyNodeId(20, 3, "node20"));
491
492
493 // -----------------------------------
494 // Convert to dynamic
495
496 CBioTreeDynamic dtr;
497
498 CBioTreeFeatureDictionary& dict = dtr.GetFeatureDict();
499 dict.Register("distance");
500 label_id = dict.Register("label");
501
502 IdNodeConvert func(dtr);
503 TreeConvert2Dynamic(dtr, n0, func);
504
505 // ----------------------------------------------------------
506 // Print the tree to check if everything is correct...
507
508 TreePrint(cout, *(dtr.GetTreeNode()), s_Node2String);
509 cout << endl << endl;
510
511 delete n0;
512
513 IdNodeConvertToTree func2;
514
515 DynamicConvert2Tree(dtr, func2, n0);
516
517 delete n0;
518 }
519
520
521
main(int argc,char ** argv)522 int main(int argc, char** argv)
523 {
524 TestBioTreeContainer();
525
526 TestTreeConvert();
527
528 CPhyTree tr;
529
530 // ----------------------------------------------------------
531
532 CPhyTree::TBioTreeNode* n0 = new CPhyTree::TBioTreeNode;
533 CPhyTree::TBioTreeNode::TValueType& nv = n0->GetValue();
534 nv.uid = 1;
535 nv.data.distance = 0;
536 nv.data.label = "n0";
537
538 tr.SetTreeNode(n0);
539
540
541
542 // ----------------------------------------------------------
543
544 {{
545
546 CPhyTree::TBioTreeNode* n10 = new CPhyTree::TBioTreeNode;
547 CPhyTree::TBioTreeNode::TValueType& nv = n10->GetValue();
548
549 nv.uid = 10;
550 nv.data.distance = 1;
551 nv.data.label = "n10";
552
553 n0->AddNode(n10);
554
555 }}
556
557
558 {{
559
560 CPhyTree::TBioTreeNode* n11 = new CPhyTree::TBioTreeNode;
561 CPhyTree::TBioTreeNode::TValueType& nv = n11->GetValue();
562
563 nv.uid = 11;
564 nv.data.distance = 2;
565 nv.data.label = "n11";
566
567 n0->AddNode(n11);
568
569 }}
570
571 // ----------------------------------------------------------
572
573 CBioTreeDynamic dtr;
574
575 CBioTreeFeatureDictionary& dict = dtr.GetFeatureDict();
576 dict.Register("distance");
577 label_id = dict.Register("label");
578
579 BioTreeConvert2Dynamic(dtr, tr, NodeConvert(dtr));
580
581 {{
582 const CBioTreeDynamic::TBioTreeNode* dnode = dtr.GetTreeNode();
583 const string& st = dnode->GetFeature("label");
584 cout << st << endl;
585 }}
586 {{
587 CBioTreeDynamic::TBioTreeNode* dnode = dtr.GetTreeNodeNonConst();
588 dnode->SetFeature("label", "n0-new");
589 const string& st = (*dnode)["label"];
590 cout << st << endl;
591 }}
592
593 // ----------------------------------------------------------
594
595 // Print the tree to check if everything is correct...
596
597 TreePrint(cout, *(dtr.GetTreeNode()), s_Node2String);
598
599 cout << endl << endl;
600
601 CBioTreeContainer btrc;
602 btrc.SetTreetype("phy_tree");
603
604 BioTreeConvert2Container(btrc, dtr);
605 // cout << MSerial_AsnText << btrc;
606
607 // ----------------------------------------------------------
608 /*
609 CBioTreeDynamic dtr2;
610 BioTreeConvertContainer2Dynamic(dtr2, btrc);
611
612 TreePrint(cout, *(dtr2.GetTreeNode()), s_Node2String);
613 */
614
615
616 // ----------------------------------------------------------
617
618 CTestApplication theTestApplication;
619 return theTestApplication.AppMain(argc, argv, 0 /*envp*/, eDS_ToMemory);
620
621
622
623 return 0;
624 }
625