1 /* $Id: cuTaxClient.cpp 609836 2020-06-08 15:56:03Z grichenk $
2  * ===========================================================================
3  *
4  *                            PUBLIC DOMAIN NOTICE
5  *               National Center for Biotechnology Information
6  *
7  *  This software/database is a "United States Government Work" under the
8  *  terms of the United States Copyright Act.  It was written as part of
9  *  the author's official duties as a United States Government employee and
10  *  thus cannot be copyrighted.  This software/database is freely available
11  *  to the public for use. The National Library of Medicine and the U.S.
12  *  Government have not placed any restriction on its use or reproduction.
13  *
14  *  Although all reasonable efforts have been taken to ensure the accuracy
15  *  and reliability of the software and data, the NLM and the U.S.
16  *  Government do not and cannot warrant the performance or results that
17  *  may be obtained by using this software or data. The NLM and the U.S.
18  *  Government disclaim all warranties, express or implied, including
19  *  warranties of performance, merchantability or fitness for any particular
20  *  purpose.
21  *
22  *  Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author:  Adapted from CDTree-1 files by Chris Lanczycki
27  *
28  * File Description:
29  *
30  *       Various utilities and classes for obtaining taxonomy information
31  *       from ASN objects and NCBI taxonomy services.
32  *       Also maintain lists of preferred and model tax nodes.
33  *
34  * ===========================================================================
35  */
36 
37 
38 #include <ncbi_pch.hpp>
39 #include <objects/taxon1/taxon1.hpp>
40 #include <objects/taxon1/Taxon2_data.hpp>
41 #include <objects/seq/Bioseq.hpp>
42 #include <objects/seq/Seq_descr.hpp>
43 #include <objects/seq/Seqdesc.hpp>
44 #include <objects/seqfeat/Org_ref.hpp>
45 #include <objects/seqloc/Seq_id.hpp>
46 #include <objects/seqloc/Seq_loc.hpp>
47 #include <objects/general/Dbtag.hpp>
48 #include <objects/general/Object_id.hpp>
49 #include <objects/seqfeat/BioSource.hpp>
50 #include <math.h>
51 
52 #include <algo/structure/cd_utils/cuTaxClient.hpp>
53 #include <objects/id1/id1_client.hpp>
54 
55 BEGIN_NCBI_SCOPE
56 BEGIN_SCOPE(cd_utils)
57 
58 const bool   TaxClient::REFRESH_DEFAULT               = false;
59 
TaxClient(bool refresh)60 TaxClient::TaxClient(bool refresh) : m_taxonomyClient(0), m_id1(0)
61 {
62 }
63 
64 
~TaxClient()65 TaxClient::~TaxClient() {
66 	if(m_taxonomyClient){
67 		m_taxonomyClient->Fini();
68 		delete m_taxonomyClient;
69 		m_taxonomyClient = 0;
70 	}
71 	if (m_id1)
72 		delete m_id1;
73 }
74 
init()75 bool TaxClient::init()
76 {
77 	return ConnectToTaxServer();
78 }
79 
ConnectToTaxServer()80 bool TaxClient::ConnectToTaxServer()
81 {
82 	if(!m_taxonomyClient)
83 		m_taxonomyClient= new CTaxon1();
84 	m_taxonomyClient->Init();
85 
86 	if (m_taxonomyClient->IsAlive())
87 		return true;
88     else
89 		return false;
90 }
91 
92 
IsAlive()93 bool TaxClient::IsAlive() {
94     if(!m_taxonomyClient)return false;
95     return m_taxonomyClient->IsAlive();
96 }
97 
98 // try to get "official" tax info from seq_id's gi
GetTaxIDForSeqId(CConstRef<CSeq_id> sid)99 TTaxId TaxClient::GetTaxIDForSeqId(CConstRef< CSeq_id > sid)
100 {
101 	TGi gi = ZERO_GI;
102     if (sid->IsGi())
103 	{
104         gi = sid->GetGi();
105     }
106 	else
107 	{
108 		if (!m_id1)
109 			m_id1= new CID1Client;
110 		gi = m_id1->AskGetgi(*sid);
111 	}
112 	return GetTaxIDForGI(gi);
113 }
114 
115 
GetTaxIDForGI(TGi gi)116 TTaxId TaxClient::GetTaxIDForGI(TGi gi) {
117     TTaxId taxid = ZERO_TAX_ID;
118     if (IsAlive()) {
119         return (m_taxonomyClient->GetTaxId4GI(gi, taxid)) ? taxid : ZERO_TAX_ID;
120     }
121     return taxid;
122 }
123 
GetOrgRef(TTaxId taxId,CRef<COrg_ref> & orgRef)124 bool TaxClient::GetOrgRef(TTaxId taxId, CRef< COrg_ref >& orgRef) {
125     bool result = false;
126 
127     if (IsAlive() && orgRef.NotEmpty() && taxId > ZERO_TAX_ID) {
128         bool is_species, is_uncultured;
129         string blast_name;
130         CConstRef< COrg_ref > constOrgRef = m_taxonomyClient->GetOrgRef(taxId, is_species, is_uncultured, blast_name);
131         orgRef->Assign(*constOrgRef);
132         result = true;
133     } else {
134         orgRef.Reset();
135     }
136     return result;
137 }
138 
139 
140 //  Look through the bioseq for a COrg object, and use it to get taxid.
141 //  Use tax server by default, unless server fails and lookInBioseq is true.
GetTaxIDFromBioseq(const CBioseq & bioseq,bool lookInBioseq)142 TTaxId TaxClient::GetTaxIDFromBioseq(const CBioseq& bioseq, bool lookInBioseq) {
143 
144     TTaxId	taxid =	ZERO_TAX_ID;
145 	list< CRef<	CSeqdesc > >::const_iterator  j, jend;
146 
147 	if (bioseq.IsSetDescr())
148 	{
149 		jend = bioseq.GetDescr().Get().end();
150 
151 		// look	through	the	sequence descriptions
152 		for	(j=bioseq.GetDescr().Get().begin();	j!=jend; j++)
153 		{
154 			const COrg_ref *org	= NULL;
155 			if ((*j)->IsOrg())
156 				org	= &((*j)->GetOrg());
157 			else if	((*j)->IsSource())
158 				org	= &((*j)->GetSource().GetOrg());
159 			if (org)
160 			{
161 				//	Use	tax	server
162 				if (IsAlive())
163 				{
164 						if ((taxid = m_taxonomyClient->GetTaxIdByOrgRef(*org)) != ZERO_TAX_ID)
165 						{
166 							if (taxid <	ZERO_TAX_ID)
167 							{  //  multiple	tax	nodes; -taxid is one of	them
168 								taxid = -taxid;
169 							}
170 							break;
171 						}
172 				}
173 				//	Use	bioseq,	which may be obsolete, only	when requested and tax server failed.
174 				if (taxid == ZERO_TAX_ID && lookInBioseq)
175 				{	//	is there an	ID in the bioseq itself	if fails
176 					vector < CRef< CDbtag >	>::const_iterator k, kend =	org->GetDb().end();
177 					for	(k=org->GetDb().begin(); k != kend;	++k) {
178 						if ((*k)->GetDb() == "taxon") {
179 							if ((*k)->GetTag().IsId()) {
180 								taxid = TAX_ID_FROM(CObject_id::TId, (*k)->GetTag().GetId());
181 								break;
182 							}
183 						}
184 					}
185 				}
186 			} //end	if (org)
187 		}//end for
188 	}
189 	return taxid;
190 }
191 
GetRankID(TTaxId taxId,string & rankName)192 short TaxClient::GetRankID(TTaxId taxId, string& rankName)
193 {
194 	short rankId = -1;
195 	if(IsAlive())
196 	{
197 		CRef< ITreeIterator > nodeIt = m_taxonomyClient->GetTreeIterator( taxId );
198 		rankId = nodeIt->GetNode()->GetRank();
199 		m_taxonomyClient->GetRankName(rankId, rankName);
200 	}
201 
202 	return rankId;
203 }
204     // get info for taxid
GetTaxNameForTaxID(TTaxId taxid)205 std::string TaxClient::GetTaxNameForTaxID(TTaxId taxid)
206 {
207 	std::string taxName = kEmptyStr;
208 	if (taxid <= ZERO_TAX_ID)
209 		return taxName;
210 	if (taxid == TAX_ID_CONST(1))
211 	{
212 		taxName = "Root";
213 		return taxName;
214 	}
215 	if (IsAlive())
216 	{
217 		CRef < CTaxon2_data > data = m_taxonomyClient->GetById(taxid);
218 		if (data->IsSetOrg() && data->GetOrg().IsSetTaxname()) {
219 			taxName = data->GetOrg().GetTaxname();
220 		}
221 	}
222 
223 	return taxName;
224 }
225 
226     // get parent for taxid
GetParentTaxID(TTaxId taxid)227 TTaxId TaxClient::GetParentTaxID(TTaxId taxid)
228 {
229     TTaxId parent = ZERO_TAX_ID;
230 	if (IsAlive())
231 		parent = m_taxonomyClient->GetParent(taxid);
232     return parent;
233 }
234 
GetSuperKingdom(TTaxId taxid)235 string TaxClient::GetSuperKingdom(TTaxId taxid) {
236 
237     TTaxId skId = m_taxonomyClient->GetSuperkingdom(taxid);
238     return (skId == INVALID_TAX_ID) ? kEmptyStr : GetTaxNameForTaxID(skId);
239 }
240 
GetDisplayCommonName(TTaxId taxid,string & displayCommonName)241 bool TaxClient::GetDisplayCommonName(TTaxId taxid, string& displayCommonName)
242 {
243     if (IsAlive()) {
244         return m_taxonomyClient->GetDisplayCommonName(taxid, displayCommonName);
245     } else {
246         return false;
247     }
248 }
249 
250 //is taxid2 the descendant of taxid1?
IsTaxDescendant(TTaxId taxid1,TTaxId taxid2)251 bool TaxClient::IsTaxDescendant(TTaxId taxid1, TTaxId taxid2)
252 {
253     if (IsAlive()) {
254         TTaxId ancestor = m_taxonomyClient->Join(taxid1, taxid2);
255         return (ancestor == taxid1);
256     } else
257         return false;
258 }
259 
GetOrgRef(TTaxId tax_id,bool & is_species,bool & is_uncultured,string & blast_name)260 CConstRef< COrg_ref > TaxClient::GetOrgRef(TTaxId tax_id, bool& is_species, bool& is_uncultured, string& blast_name)
261 {
262     if (IsAlive()) {
263         return m_taxonomyClient->GetOrgRef(tax_id, is_species, is_uncultured, blast_name);
264     } else {
265         //  following the behavior of CTaxon1::GetOrgRef on failure.
266         return ncbi::null;
267     }
268 }
269 
Join(TTaxId taxid1,TTaxId taxid2)270 TTaxId TaxClient::Join(TTaxId taxid1, TTaxId taxid2)
271 {
272     if (IsAlive()) {
273         return m_taxonomyClient->Join(taxid1, taxid2);
274     } else {
275         return ZERO_TAX_ID;
276     }
277 }
278 
GetFullLineage(TTaxId taxid,vector<TTaxId> & lineageFromRoot)279 bool TaxClient::GetFullLineage(TTaxId taxid, vector<TTaxId>& lineageFromRoot)
280 {
281     TTaxId rootTaxid = TAX_ID_CONST(1);
282     vector<TTaxId> v;
283     v.push_back(rootTaxid);
284     v.push_back(taxid);
285     lineageFromRoot.clear();
286 
287     if (IsAlive() && m_taxonomyClient->GetPopsetJoin(v, lineageFromRoot) && lineageFromRoot.size() > 0) {
288         lineageFromRoot.push_back(taxid);  //  GetPopsetJoin does not put taxid at the end of the lineage
289         return true;
290     }
291     return false;
292 }
293 
GetFullLineage(TTaxId taxid,vector<pair<TTaxId,string>> & lineageFromRoot,bool useCommonName)294 bool TaxClient::GetFullLineage(TTaxId taxid, vector< pair<TTaxId, string> >& lineageFromRoot, bool useCommonName)
295 {
296     string commonName;
297     string root("root");
298     vector<TTaxId> lineageAsTaxids;
299     pair<TTaxId, string> p;
300 
301     lineageFromRoot.clear();
302     if (GetFullLineage(taxid, lineageAsTaxids)) {
303         ITERATE(vector<TTaxId>, vit, lineageAsTaxids) {
304             p.first = *vit;
305             if (!useCommonName) {  // formal name via the COrg_ref
306                 commonName = GetTaxNameForTaxID(*vit);
307                 if (commonName.length() > 0) {
308                     p.second = (p.first > TAX_ID_CONST(1)) ? commonName : root;
309                 } else {
310                     p.second = kEmptyStr;
311                 }
312             } else if (useCommonName && GetDisplayCommonName(*vit, commonName) && commonName.length() > 0) {
313                 p.second = (p.first > TAX_ID_CONST(1)) ? commonName : root;
314             } else {
315                 p.second = kEmptyStr;
316             }
317             lineageFromRoot.push_back(p);
318         }
319         return true;
320     }
321     return false;
322 }
323 
324 
325 END_SCOPE(cd_utils)
326 END_NCBI_SCOPE
327