1 /*  $Id: test_biotree.cpp 607145 2020-04-30 13:02:34Z grichenk $
2  * ===========================================================================
3  *
4  *                            PUBLIC DOMAIN NOTICE
5  *               National Center for Biotechnology Information
6  *
7  *  This software/database is a "United States Government Work" under the
8  *  terms of the United States Copyright Act.  It was written as part of
9  *  the author's official duties as a United States Government employee and
10  *  thus cannot be copyrighted.  This software/database is freely available
11  *  to the public for use. The National Library of Medicine and the U.S.
12  *  Government have not placed any restriction on its use or reproduction.
13  *
14  *  Although all reasonable efforts have been taken to ensure the accuracy
15  *  and reliability of the software and data, the NLM and the U.S.
16  *  Government do not and cannot warrant the performance or results that
17  *  may be obtained by using this software or data. The NLM and the U.S.
18  *  Government disclaim all warranties, express or implied, including
19  *  warranties of performance, merchantability or fitness for any particular
20  *  purpose.
21  *
22  *  Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors:  Anatoliy Kuznetsov
27  *
28  * File Description
29  *
30  */
31 
32 #include <ncbi_pch.hpp>
33 #include <stdio.h>
34 #include <corelib/ncbiapp.hpp>
35 #include <corelib/ncbienv.hpp>
36 #include <corelib/ncbireg.hpp>
37 #include <corelib/ncbifile.hpp>
38 #include <corelib/ncbi_tree.hpp>
39 #include <util/bitset/bitset_debug.hpp>
40 #include <algo/tree/tree_algo.hpp>
41 #include <util/bitset/ncbi_bitset.hpp>
42 #include <algorithm>
43 #include <algo/phy_tree/bio_tree.hpp>
44 #include <algo/phy_tree/bio_tree_forest.hpp>
45 #include <algo/phy_tree/bio_tree_conv.hpp>
46 
47 #include <objects/biotree/BioTreeContainer.hpp>
48 #include <objects/biotree/FeatureDescr.hpp>
49 #include <objects/biotree/FeatureDictSet.hpp>
50 #include <objects/biotree/NodeFeatureSet.hpp>
51 #include <objects/biotree/NodeFeature.hpp>
52 #include <objects/biotree/NodeSet.hpp>
53 #include <objects/biotree/Node.hpp>
54 
55 #include <objects/taxon1/taxon1.hpp>
56 
57 #include <common/test_assert.h>
58 
59 
60 USING_NCBI_SCOPE;
61 USING_SCOPE(objects);
62 
63 static int label_id;
64 static int name_id = 1;
65 
s_Node2String(const CBioTreeDynamic::TBioTreeNode & node)66 static string s_Node2String(const CBioTreeDynamic::TBioTreeNode& node)
67 {
68     const CBioTreeDynamic::TBioTreeNode::TValueType& v = node.GetValue();
69 //    cout << v.GetTree() << endl;
70 //    _ASSERT(v.GetTree());
71     return v.features.GetFeatureValue(label_id);
72 }
73 
s_Node2String_name(const CBioTreeDynamic::TBioTreeNode & node)74 static string s_Node2String_name(const CBioTreeDynamic::TBioTreeNode& node)
75 {
76     const CBioTreeDynamic::TBioTreeNode::TValueType& v = node.GetValue();
77     return v.features.GetFeatureValue(name_id);
78 
79     CNodeSet s;
80 }
81 
82 
83 
TestBioTreeContainer()84 static void TestBioTreeContainer()
85 {
86     typedef CBioTreeContainer::TFdict  TContainerDict;
87     typedef TContainerDict::Tdata::value_type::element_type TCFeatureDescr;
88     typedef CBioTreeContainer::TNodes  TContainerNodes;
89 
90     // create the bio-tree
91     CBioTreeContainer btrc;
92     btrc.SetTreetype("prot_cluster");
93 
94     // Build/populate feature dictionary
95     //
96     {{
97         TContainerDict& fd = btrc.SetFdict();
98         TContainerDict::Tdata& feat_list = fd.Set();
99         // create id-value pairs (id MUST be unique)
100         {{
101         CRef<TCFeatureDescr> d(new TCFeatureDescr);
102         d->SetId(1); d->SetName("label");
103         feat_list.push_back(d);
104         }}
105         {{
106         CRef<TCFeatureDescr> d(new TCFeatureDescr);
107         d->SetId(2); d->SetName("dist");
108         feat_list.push_back(d);
109         }}
110         {{
111         CRef<TCFeatureDescr> d(new TCFeatureDescr);
112         d->SetId(3); d->SetName("seq-id");
113         feat_list.push_back(d);
114         }}
115         {{
116         CRef<TCFeatureDescr> d(new TCFeatureDescr);
117         d->SetId(4); d->SetName("cluster-id");
118         feat_list.push_back(d);
119         }}
120     }}
121 
122     // Add tree nodes with topology and features
123     //
124     {{
125         unsigned node_id = 1;
126         TContainerNodes& nodes = btrc.SetNodes();
127         TContainerNodes::Tdata& node_lst = nodes.Set();
128 
129         // make root node
130         {{
131         CRef<CNode> n(new CNode);
132         n->SetId(node_id);  // unique node id
133 
134         CNode::TFeatures& nfeatures = n->SetFeatures();
135         CNode::TFeatures::Tdata& feature_list = nfeatures.Set();
136 
137             // Add node features -- ids MUST correspond the feature dictionary
138             //
139             {{
140             CRef<CNodeFeature> nf(new CNodeFeature);
141             nf->SetFeatureid(1);  //  "label"
142             nf->SetValue("RootNode_LBL");
143             feature_list.push_back(nf);
144             }}
145 
146             {{
147             CRef<CNodeFeature> nf(new CNodeFeature);
148             nf->SetFeatureid(3);  //  "seq-id"
149             nf->SetValue("gi|123");
150             feature_list.push_back(nf);
151             }}
152 
153         node_lst.push_back(n);
154         }}
155         unsigned parent_node_id = node_id;
156         ++node_id;
157 
158         // make child node 1
159         {{
160         CRef<CNode> n(new CNode);
161         n->SetId(node_id);  // unique node id
162         n->SetParent(parent_node_id);
163 
164         CNode::TFeatures& nfeatures = n->SetFeatures();
165         CNode::TFeatures::Tdata& feature_list = nfeatures.Set();
166 
167             // Add node features -- ids MUST correspond the feature dictionary
168             //
169             {{
170             CRef<CNodeFeature> nf(new CNodeFeature);
171             nf->SetFeatureid(1);  //  "label"
172             nf->SetValue("Child1");
173             feature_list.push_back(nf);
174             }}
175 
176             {{
177             CRef<CNodeFeature> nf(new CNodeFeature);
178             nf->SetFeatureid(3);  //  "seq-id"
179             nf->SetValue("gi|124");
180             feature_list.push_back(nf);
181             }}
182 
183         node_lst.push_back(n);
184         }}
185         ++node_id;
186 
187         // make child node 2
188         {{
189         CRef<CNode> n(new CNode);
190         n->SetId(node_id);  // unique node id
191         n->SetParent(parent_node_id);
192 
193         CNode::TFeatures& nfeatures = n->SetFeatures();
194         CNode::TFeatures::Tdata& feature_list = nfeatures.Set();
195 
196             // Add node features -- ids MUST correspond the feature dictionary
197             //
198             {{
199             CRef<CNodeFeature> nf(new CNodeFeature);
200             nf->SetFeatureid(1);  //  "label"
201             nf->SetValue("Child2");
202             feature_list.push_back(nf);
203             }}
204 
205             {{
206             CRef<CNodeFeature> nf(new CNodeFeature);
207             nf->SetFeatureid(3);  //  "seq-id"
208             nf->SetValue("gi|125");
209             feature_list.push_back(nf);
210             }}
211 
212         node_lst.push_back(n);
213         }}
214 
215 
216     }}
217 
218     // Dump tree to file (cout)
219     cout << MSerial_AsnText << btrc;
220 }
221 
222 
223 /////////////////////////////////
224 // Test application
225 //
226 
227 class CTestApplication : public CNcbiApplication
228 {
229 public:
230     void Init(void);
231     int Run(void);
232 };
233 
234 
Init(void)235 void CTestApplication::Init(void)
236 {
237     // Set err.-posting and tracing to maximum
238     SetDiagTrace(eDT_Enable);
239     SetDiagPostFlag(eDPF_All);
240     SetDiagPostLevel(eDiag_Info);
241 }
242 
243 
244 
Run(void)245 int CTestApplication::Run(void)
246 {
247 
248 
249     //        CExceptionReporterStream reporter(cerr);
250     //        CExceptionReporter::SetDefault(&reporter);
251     //        CExceptionReporter::EnableDefault(false);
252     //        CExceptionReporter::EnableDefault(true);
253     //        CExceptionReporter::SetDefault(0);
254 
255 
256 	CTaxon1 tax;
257 	bool res = tax.Init();
258 
259 	if (!res) {
260 		return 1;
261 	}
262 
263 
264     // Create one container and tree:
265 	try{
266 		TTaxId tax_id = tax.GetTaxIdByName("Mouse");
267 		cout << tax_id << endl;
268 
269 		CBioTreeContainer btrc;
270 		CTaxon1ConvertToBioTreeContainer<CBioTreeContainer,
271 				                         CTaxon1,
272 				                         ITaxon1Node,
273 				                         ITreeIterator>
274 		conv_func;
275 		conv_func(btrc, tax, tax_id);
276 
277 		cout << MSerial_AsnText << btrc;
278 
279 	    CBioTreeDynamic dtr2;
280 		BioTreeConvertContainer2Dynamic(dtr2, btrc);
281 
282 		TreePrint(cout, *(dtr2.GetTreeNode()), s_Node2String_name);
283 	}
284 	catch (exception& ex)
285 	{
286 		cout << "Error! " << ex.what() << endl;
287 	}
288 
289     // Create one contanier with two trees in it, and a forest:
290 	try{
291 		TTaxId tax_id1 = tax.GetTaxIdByName("Mouse");
292 		cout << tax_id1 << endl;
293 
294 	    TTaxId tax_id2 = tax.GetTaxIdByName("Rattus rattus");
295 		cout << tax_id2 << endl;
296 
297         //
298         // Create two containers and a forest. Convert containers to forest and back, and write
299         // the contents both before and after conversions.
300 		CBioTreeContainer c1, c2;
301         CBioTreeForestDynamic f1;
302 
303         /// Add 2 separate trees to container c1
304 		CTaxon1ConvertToBioTreeContainer<CBioTreeContainer,
305 				                         CTaxon1,
306 				                         ITaxon1Node,
307 				                         ITreeIterator>
308 		conv_func;
309         bool is_forest;
310 
311 		conv_func(c1, tax, tax_id1);
312         is_forest = BioTreeContainerIsForest(c1);
313         if (is_forest)
314             cout << "Container with 1 tree misclassified as forest." << endl;
315         else
316             cout << "Container with 1 tree classified as tree." << endl;
317 
318         conv_func(c1, tax, tax_id2);
319         is_forest = BioTreeContainerIsForest(c1);
320         if (!is_forest)
321             cout << "Container with 2 trees misclassified as tree." << endl;
322         else
323             cout << "Container with 2 trees classified as forest." << endl;
324 
325         // Create temporary files for output.  Replace any previous contents.
326         std::string tdir = CDir::GetTmpDir();
327         cout << "Temp Output Dir: " << tdir << endl;
328 
329         std::string container1 =  CDirEntry::ConcatPath(tdir,"treetestc1.txt");
330         std::string forest1 =  CDirEntry::ConcatPath(tdir,"treetestf1.txt");
331         std::string container2 =  CDirEntry::ConcatPath(tdir,"treetestc2.txt");
332 
333         CNcbiOfstream tstoutc1(container1.c_str());
334         CNcbiOfstream tstoutf1(forest1.c_str());
335         CNcbiOfstream tstoutc2(container2.c_str());
336 
337         // Write iniial value of container 1 to firest container file
338 		tstoutc1 << MSerial_AsnText << c1;
339 
340         // Create forest from container and write contents to first forest file
341 		BioTreeConvertContainer2DynamicForest(f1, c1);
342         for (unsigned int i=0; i<f1.GetTrees().size(); ++i)
343             TreePrint(tstoutf1, *(f1.GetTrees()[i]->GetTreeNode()),
344                       s_Node2String_name);
345 
346         // Convert forest back into a container (2), and write out container 2
347         BioTreeForestConvert2Container<CBioTreeContainer,
348                                        CBioTreeForestDynamic>(c2, f1);
349         tstoutc2 << MSerial_AsnText << c2;
350 
351         // close output files
352         tstoutc1.close();
353         tstoutf1.close();
354         tstoutc2.close();
355 
356         // Compare contents of first container with contents
357         // of container created from conversion to and from forest
358         CFile  fc1(container1);
359 
360         if (!fc1.CompareTextContents(container2, CFile::eIgnoreWs))
361             cout << "Converted container file differs from original" << endl;
362         else
363             cout << "Converted container file is identical to original" << endl;
364 	}
365 	catch (exception& ex)
366 	{
367 		cout << "Error! " << ex.what() << endl;
368 	}
369 
370     tax.Fini();
371 
372     return 0;
373 }
374 
375 
376 
377 /// Imitation of simple phylogenetic tree
378 struct PhyNode
379 {
PhyNodePhyNode380     PhyNode() {}
PhyNodePhyNode381     PhyNode(int d, const string& l) : distance(d), label(l){}
382 
383     int       distance;
384     string    label;
385 };
386 
387 typedef
388   CBioTree<BioTreeBaseNode<PhyNode, CBioTreeFeatureList> >
389   CPhyTree;
390 
391 
392 class NodeConvert
393 {
394 public:
NodeConvert(CBioTreeDynamic & dynamic_tree)395     NodeConvert(CBioTreeDynamic& dynamic_tree)
396     : m_DynamicTree(&dynamic_tree)
397     {}
398 
operator ()(CBioTreeDynamic::TBioTreeNode & dnode,const CPhyTree::TBioTreeNode & src_node)399     void operator()(CBioTreeDynamic::TBioTreeNode& dnode,
400                     const CPhyTree::TBioTreeNode&  src_node)
401     {
402         const CPhyTree::TBioTreeNode::TValueType& nv = src_node.GetValue();
403         NStr::IntToString(m_TmpStr, nv.data.distance);
404         m_DynamicTree->AddFeature(&dnode, "distance", m_TmpStr);
405         m_DynamicTree->AddFeature(&dnode, "label", nv.data.label);
406     }
407 
408 private:
409     CBioTreeDynamic*   m_DynamicTree;
410     string             m_TmpStr;
411 };
412 
413 
414 
415 
416 
417 
418 
419 struct PhyNodeId
420 {
PhyNodeIdPhyNodeId421     PhyNodeId()
422     {}
423 
PhyNodeIdPhyNodeId424     PhyNodeId(unsigned x_id, int d, const string& l)
425         : id(x_id), distance(d), label(l)
426     {}
427 
GetIdPhyNodeId428     unsigned GetId() const { return id; }
SetIdPhyNodeId429     void SetId(unsigned x_id) { id = x_id; }
430 
431     unsigned  id;
432     int       distance;
433     string    label;
434 };
435 
436 
437 typedef CTreeNode<PhyNodeId> CPhyTreeNode;
438 
439 
440 /// Sample converter from tree node to dynamic tree node
441 class IdNodeConvert
442 {
443 public:
IdNodeConvert(CBioTreeDynamic & dynamic_tree)444     IdNodeConvert(CBioTreeDynamic& dynamic_tree)
445     : m_DynamicTree(&dynamic_tree)
446     {}
447 
operator ()(CBioTreeDynamic::TBioTreeNode & dnode,const CPhyTreeNode & src_node)448     void operator()(CBioTreeDynamic::TBioTreeNode& dnode,
449                     const CPhyTreeNode&  src_node)
450     {
451         const PhyNodeId& nv = src_node.GetValue();
452         NStr::IntToString(m_TmpStr, nv.distance);
453         m_DynamicTree->AddFeature(&dnode, "distance", m_TmpStr);
454         m_DynamicTree->AddFeature(&dnode, "label", nv.label);
455     }
456 
457 private:
458     CBioTreeDynamic*   m_DynamicTree;
459     string             m_TmpStr;
460 };
461 
462 struct IdNodeConvertToTree
463 {
operator ()IdNodeConvertToTree464     void operator()(CPhyTreeNode&  dst_node,
465                     const CBioTreeDynamic::TBioTreeNode& src)
466     {
467         const string& dist = src.GetFeature("distance");
468         dst_node.GetValue().distance = atoi(dist.c_str());
469         dst_node.GetValue().label = src.GetFeature("label");
470     }
471 };
472 
473 
TestTreeConvert()474 void TestTreeConvert()
475 {
476     // ---------------------------------
477     // Filling the tree
478 
479     CPhyTreeNode* n0 = new CPhyTreeNode;
480     {
481     PhyNodeId& nv = n0->GetValue();
482     nv.id = 0;
483     nv.distance = 0;
484     nv.label = "root";
485     }
486 
487     n0->AddNode(PhyNodeId(10, 1, "node1"));
488     CPhyTreeNode* n2 = n0->AddNode(PhyNodeId(11, 1, "node2"));
489 
490     n2->AddNode(PhyNodeId(20, 3, "node20"));
491 
492 
493     // -----------------------------------
494     // Convert to dynamic
495 
496     CBioTreeDynamic dtr;
497 
498     CBioTreeFeatureDictionary& dict = dtr.GetFeatureDict();
499     dict.Register("distance");
500     label_id = dict.Register("label");
501 
502     IdNodeConvert func(dtr);
503     TreeConvert2Dynamic(dtr, n0, func);
504 
505     // ----------------------------------------------------------
506     // Print the tree to check if everything is correct...
507 
508     TreePrint(cout, *(dtr.GetTreeNode()), s_Node2String);
509 	cout << endl << endl;
510 
511     delete n0;
512 
513     IdNodeConvertToTree func2;
514 
515     DynamicConvert2Tree(dtr, func2, n0);
516 
517     delete n0;
518 }
519 
520 
521 
main(int argc,char ** argv)522 int main(int argc, char** argv)
523 {
524     TestBioTreeContainer();
525 
526     TestTreeConvert();
527 
528     CPhyTree tr;
529 
530     // ----------------------------------------------------------
531 
532     CPhyTree::TBioTreeNode* n0 =  new CPhyTree::TBioTreeNode;
533     CPhyTree::TBioTreeNode::TValueType& nv = n0->GetValue();
534     nv.uid = 1;
535     nv.data.distance = 0;
536     nv.data.label = "n0";
537 
538     tr.SetTreeNode(n0);
539 
540 
541 
542     // ----------------------------------------------------------
543 
544     {{
545 
546     CPhyTree::TBioTreeNode* n10 =  new CPhyTree::TBioTreeNode;
547     CPhyTree::TBioTreeNode::TValueType& nv = n10->GetValue();
548 
549     nv.uid = 10;
550     nv.data.distance = 1;
551     nv.data.label = "n10";
552 
553     n0->AddNode(n10);
554 
555     }}
556 
557 
558     {{
559 
560     CPhyTree::TBioTreeNode* n11 =  new CPhyTree::TBioTreeNode;
561     CPhyTree::TBioTreeNode::TValueType& nv = n11->GetValue();
562 
563     nv.uid = 11;
564     nv.data.distance = 2;
565     nv.data.label = "n11";
566 
567     n0->AddNode(n11);
568 
569     }}
570 
571     // ----------------------------------------------------------
572 
573     CBioTreeDynamic dtr;
574 
575     CBioTreeFeatureDictionary& dict = dtr.GetFeatureDict();
576     dict.Register("distance");
577     label_id = dict.Register("label");
578 
579     BioTreeConvert2Dynamic(dtr, tr, NodeConvert(dtr));
580 
581     {{
582     const CBioTreeDynamic::TBioTreeNode* dnode = dtr.GetTreeNode();
583     const string& st = dnode->GetFeature("label");
584     cout << st << endl;
585     }}
586     {{
587     CBioTreeDynamic::TBioTreeNode* dnode = dtr.GetTreeNodeNonConst();
588     dnode->SetFeature("label", "n0-new");
589     const string& st = (*dnode)["label"];
590     cout << st << endl;
591     }}
592 
593     // ----------------------------------------------------------
594 
595     // Print the tree to check if everything is correct...
596 
597     TreePrint(cout, *(dtr.GetTreeNode()), s_Node2String);
598 
599 	cout << endl << endl;
600 
601     CBioTreeContainer btrc;
602     btrc.SetTreetype("phy_tree");
603 
604     BioTreeConvert2Container(btrc, dtr);
605 //    cout << MSerial_AsnText << btrc;
606 
607     // ----------------------------------------------------------
608 /*
609     CBioTreeDynamic dtr2;
610     BioTreeConvertContainer2Dynamic(dtr2, btrc);
611 
612     TreePrint(cout, *(dtr2.GetTreeNode()), s_Node2String);
613 */
614 
615 
616     // ----------------------------------------------------------
617 
618     CTestApplication theTestApplication;
619     return theTestApplication.AppMain(argc, argv, 0 /*envp*/, eDS_ToMemory);
620 
621 
622 
623     return 0;
624 }
625