1 /* $Id: cuTaxTree.hpp 607148 2020-04-30 13:05:44Z grichenk $ 2 * =========================================================================== 3 * 4 * PUBLIC DOMAIN NOTICE 5 * National Center for Biotechnology Information 6 * 7 * This software/database is a "United States Government Work" under the 8 * terms of the United States Copyright Act. It was written as part of 9 * the author's official duties as a United States Government employee and 10 * thus cannot be copyrighted. This software/database is freely available 11 * to the public for use. The National Library of Medicine and the U.S. 12 * Government have not placed any restriction on its use or reproduction. 13 * 14 * Although all reasonable efforts have been taken to ensure the accuracy 15 * and reliability of the software and data, the NLM and the U.S. 16 * Government do not and cannot warrant the performance or results that 17 * may be obtained by using this software or data. The NLM and the U.S. 18 * Government disclaim all warranties, express or implied, including 19 * warranties of performance, merchantability or fitness for any particular 20 * purpose. 21 * 22 * Please cite the author in any work or product based on this material. 23 * 24 * =========================================================================== 25 * 26 * Author: Charlie Liu 27 * 28 * File Description: Retrieve and create a Taxonomy tree for several CDs. 29 * part of CDTree app 30 */ 31 32 #ifndef CU_TAXTREE_HPP 33 #define CU_TAXTREE_HPP 34 #include <algo/structure/cd_utils/cuCppNCBI.hpp> 35 #include <algo/structure/cd_utils/tree_msvc7.hpp> 36 #include <algo/structure/cd_utils/cuTaxClient.hpp> 37 #include <algo/structure/cd_utils/cuAlignmentCollection.hpp> 38 #include <list> 39 #include <stack> 40 #include <algorithm> 41 42 BEGIN_NCBI_SCOPE 43 USING_SCOPE(objects); 44 BEGIN_SCOPE(cd_utils) 45 46 class NCBI_CDUTILS_EXPORT TaxNode 47 { 48 public: 49 //fields for internal tax nodes 50 TTaxId taxId; 51 std::string orgName; 52 short rankId; 53 //fields for external seq nodes 54 int rowId; 55 CCdCore* cd; 56 std::string seqName; 57 //total and selected leaf counts 58 int numLeaves; 59 int selectedLeaves; 60 61 //methods 62 TaxNode(); 63 TaxNode(const TaxNode& rhs); operator ==(const TaxNode & rhs)64 bool operator==(const TaxNode& rhs) {return taxId == rhs.taxId;}; 65 static bool isSeqLeaf(const TaxNode& node); 66 static bool isSubSeqLeaf(const TaxNode& node); 67 static TaxNode* makeTaxNode(TTaxId taxID, std::string taxName, short rankId=-1); 68 static TaxNode* makeSeqLeaf(int rowID, std::string sequenceName); 69 static TaxNode* makeSubSeqLeaf(int rowID, CCdCore* cd, int rowInCd); 70 71 private: 72 void init(); 73 }; 74 75 typedef tree<TaxNode> TaxonomyTree; 76 typedef TaxonomyTree::iterator TaxTreeIterator; 77 //typedef list<CCd*> CDList; 78 79 /* taxonomy ranking -- total 8 level 80 Superkingdom: Eukaryota 81 Kingdom: Metazoa 82 Phylum: Chordata 83 Class: Mammalia 84 Order: Primata 85 Family: Hominidae 86 Genus: Homo 87 Species: sapiens 88 */ 89 90 // define a family hiearchy of CDs 91 class NCBI_CDUTILS_EXPORT TaxTreeData : public TaxonomyTree 92 { 93 public: 94 TaxTreeData(const AlignmentCollection& ac); 95 getFailedRows()96 const vector<int>& getFailedRows() { return m_failedRows;} 97 void selectTaxNode(TaxTreeIterator& taxNode, bool select); 98 void setSelections(const vector<int>& rowIDs, CCdCore* cd=0); 99 int getSelections(vector<int>& rows); 100 void clearSelection(); 101 void deselectAllTaxNodes(); 102 void fillLeafCount(const TaxTreeIterator& cursor); 103 //bool isPreferredTaxNode(const TaxTreeIterator& taxNode); 104 int getAllLeafNodes(const TaxTreeIterator& taxNode, vector<TaxTreeIterator>& nodes) const; 105 short getRankId(string rankName); 106 TaxTreeIterator getParentAtRank(int row, string rankName); 107 bool isEmpty()const; 108 //bool missLocalTaxFiles()const {return m_missLocalTaxFiles;} 109 ~TaxTreeData(); 110 111 void addTaxToBioseq(CBioseq& bioseq, TTaxId taxid, string& taxName); 112 bool writeToFile(string fname)const; 113 bool writeToFileAsTable(string fname)const; 114 bool write(std::ostream&os, const iterator& cursor)const; 115 bool writeAsTable(std::ostream&os, const iterator& cursor, const iterator& branchingNode)const; 116 private: 117 const AlignmentCollection& m_ac; 118 typedef map<int, TaxonomyTree::iterator> RowToTaxNode; 119 RowToTaxNode m_rowToTaxNode; 120 typedef map<string, short> RankNameToId; 121 RankNameToId m_rankNameToId; 122 // wrapper of taxonomy server class 123 TaxClient* m_taxDataSource; 124 vector<int> m_failedRows; 125 126 bool makeTaxonomyTree(); 127 void addRows(const AlignmentCollection& ac); 128 // get integer taxid for a sequence 129 TTaxId GetTaxIDForSequence(const AlignmentCollection& aligns, int rowID); 130 // get info for taxid 131 void selectTaxTreeLeaf(const TaxTreeIterator& cursor, bool select, CCdCore* cd=0); 132 void addSeqTax(int rowID, string seqName, TTaxId taxid); 133 void growAndInsertLineage(stack<TaxNode*>& lineage); 134 void insertLineage(TaxTreeIterator& pos, stack<TaxNode*>& lineage); 135 void cacheRank(short rank, string rankName); 136 void writeOutRanks(); 137 }; 138 139 END_SCOPE(cd_utils) 140 END_NCBI_SCOPE 141 #endif 142 143