1 /* $Id: cuSeqTreeAPI.cpp 609836 2020-06-08 15:56:03Z grichenk $
2  * ===========================================================================
3  *
4  *                            PUBLIC DOMAIN NOTICE
5  *               National Center for Biotechnology Information
6  *
7  *  This software/database is a "United States Government Work" under the
8  *  terms of the United States Copyright Act.  It was written as part of
9  *  the author's official duties as a United States Government employee and
10  *  thus cannot be copyrighted.  This software/database is freely available
11  *  to the public for use. The National Library of Medicine and the U.S.
12  *  Government have not placed any restriction on its use or reproduction.
13  *
14  *  Although all reasonable efforts have been taken to ensure the accuracy
15  *  and reliability of the software and data, the NLM and the U.S.
16  *  Government do not and cannot warrant the performance or results that
17  *  may be obtained by using this software or data. The NLM and the U.S.
18  *  Government disclaim all warranties, express or implied, including
19  *  warranties of performance, merchantability or fitness for any particular
20  *  purpose.
21  *
22  *  Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author:  Charlie Liu
27  *
28  * File Description:
29  *
30  *       API to handle SeqTree
31  *
32  * ===========================================================================
33  */
34 
35 #include <ncbi_pch.hpp>
36 #include <algo/structure/cd_utils/cuSeqTreeAPI.hpp>
37 #include <algo/structure/cd_utils/cuSeqTreeFactory.hpp>
38 #include <algo/structure/cd_utils/cuSeqTreeRootedLayout.hpp>
39 #include <algo/structure/cd_utils/cuSeqTreeAsnizer.hpp>
40 
41 BEGIN_NCBI_SCOPE
BEGIN_SCOPE(cd_utils)42 BEGIN_SCOPE(cd_utils)
43 
44 SeqTreeAPI::SeqTreeAPI(vector<CCdCore*>& cds, bool loadExistingTreeOnly)
45 	: m_ma(), m_cd(0), m_family(0), m_seqTree(0), m_taxTree(0), m_taxClient(0),
46 	m_useMembership(true), m_taxLevel(BySuperkingdom), m_treeOptions(), m_triedTreeMaking(false),
47 	m_loadOnly(loadExistingTreeOnly)
48 {
49 	vector<CDFamily*> families;
50 	CDFamily::createFamilies(cds, families);
51 	if(families.size() != 1 )
52 	{
53 		for (int i = 0 ; i <families.size(); i++)
54 			delete families[i];
55 		return;
56 	}
57 	m_ma.setAlignment(*families[0]);
58 	m_family = families[0];
59 }
60 
SeqTreeAPI(CCdCore * cd)61 SeqTreeAPI::SeqTreeAPI(CCdCore* cd)
62 	: m_ma(), m_cd(cd), m_family(0), m_seqTree(0), m_taxTree(0), m_taxClient(0),
63 	m_useMembership(true), m_taxLevel(BySuperkingdom), m_treeOptions(), m_triedTreeMaking(false),
64 	m_loadOnly(true)
65 {
66 }
67 
SeqTreeAPI(CDFamily & cdfam,TreeOptions & option)68 SeqTreeAPI::SeqTreeAPI(CDFamily& cdfam, TreeOptions& option)
69     : m_ma(), m_cd(0), m_family(&cdfam), m_seqTree(0), m_taxTree(0), m_taxClient(0),
70 	m_useMembership(true), m_taxLevel(BySuperkingdom), m_treeOptions(option), m_triedTreeMaking(false),
71 	m_loadOnly(false)
72 {
73 	m_family = new CDFamily(cdfam);
74 }
75 
~SeqTreeAPI()76 SeqTreeAPI::~SeqTreeAPI()
77 {
78 	if (m_seqTree) delete m_seqTree;
79 	if (m_taxTree) delete m_taxTree;
80 	if (m_taxClient) delete m_taxClient;
81 	if (m_family) delete m_family;
82 }
83 
annotateTreeByMembership()84 void SeqTreeAPI::annotateTreeByMembership()
85 {
86 	m_useMembership = true;
87 }
88 
annotateTreeByTaxonomy(TaxonomyLevel tax)89 void SeqTreeAPI::annotateTreeByTaxonomy(TaxonomyLevel tax)
90 {
91 	m_taxLevel = tax;
92 	m_useMembership = false;
93 }
94 
getNumOfLeaves()95 int SeqTreeAPI::getNumOfLeaves()
96 {
97 	if (m_seqTree == 0)
98 		makeOrLoadTree();
99 	if (m_seqTree == 0)
100 		return 0;
101 	else
102 		return m_seqTree->getNumLeaf();
103 }
104 	//return a string of tree method names
105 	//lay out the tree to the specified area, .i.e. fit to screen
layoutSeqTree(int maxX,int maxY,vector<SeqTreeEdge> & edges)106 string SeqTreeAPI::layoutSeqTree(int maxX, int maxY, vector<SeqTreeEdge>& edges)
107 {
108 	if (m_seqTree == 0)
109 		makeOrLoadTree();
110 	return layoutSeqTree(maxX, maxY, -1, edges);
111 }
112 
113 	//lay out the tree with a fixed spacing between sequences
layoutSeqTree(int maxX,vector<SeqTreeEdge> & edges,int yInt)114 string SeqTreeAPI::layoutSeqTree(int maxX, vector<SeqTreeEdge>& edges, int yInt)
115 {
116 	if (m_seqTree == 0)
117 		makeOrLoadTree();
118 	int maxY = 0; //not used
119 	if (yInt < 2)
120 		yInt = 2;
121 	return layoutSeqTree(maxX, maxY, yInt, edges);
122 }
makeTree()123 bool SeqTreeAPI::makeTree()
124 {
125 	if (m_triedTreeMaking) //if already tried, don't try again
126 		return m_seqTree != 0;
127 	if (m_seqTree != 0)
128 	{
129 		delete m_seqTree;
130 		m_seqTree = 0;
131 		m_seqTree = new SeqTree;
132 	}
133 	if (!m_loadOnly)
134 	{
135 		if (m_ma.getFirstCD() == 0)
136 			m_ma.setAlignment(*m_family);
137 		if(!m_ma.isBlockAligned())
138 		{
139 			ERR_POST("Sequence tree is not made for " <<m_ma.getFirstCD()->GetAccession()
140 				<<" because it does not have a consistent block alognment.");
141 		}
142 		else
143 		{
144 			if ((m_treeOptions.distMethod == eScoreBlastFoot) || (m_treeOptions.distMethod == eScoreBlastFull))
145 				m_treeOptions.distMethod = eScoreAligned;
146 			m_seqTree = TreeFactory::makeTree(&m_ma, m_treeOptions);
147 		}
148 		if (m_seqTree)
149 			m_seqTree->fixRowName(m_ma, SeqTree::eGI);
150 	}
151 	m_triedTreeMaking = true;
152 	return m_seqTree != 0;
153 }
154 
makeOrLoadTree()155 bool SeqTreeAPI::makeOrLoadTree()
156 {
157 	if (m_triedTreeMaking) //if already tried, don't try again
158 		return m_seqTree != 0;
159 	m_seqTree = new SeqTree();
160 	bool loaded = false;
161 	if (m_cd)
162 		loaded = loadExistingTree(m_cd, &m_treeOptions, m_seqTree);
163 	else
164 		loaded = loadAndValidateExistingTree();
165 	if (!loaded)
166 	{
167 		return makeTree();
168 	}
169 	return loaded;
170 }
171 
layoutSeqTree(int maxX,int maxY,int yInt,vector<SeqTreeEdge> & edges)172 string SeqTreeAPI::layoutSeqTree(int maxX, int maxY, int yInt, vector<SeqTreeEdge>& edges)
173 {
174 	if (!m_seqTree)
175 		return "";
176 	SeqTreeRootedLayout treeLayout(yInt);
177 	treeLayout.calculateNodePositions(*m_seqTree, maxX, maxY);
178 	getAllEdges(edges);
179 	string param = GetTreeAlgorithmName(m_treeOptions.clusteringMethod);
180 	param.append(" / " + DistanceMatrix::GetDistMethodName(m_treeOptions.distMethod));
181 	if (DistanceMatrix::DistMethodUsesScoringMatrix(m_treeOptions.distMethod) ) {
182 		param.append(" / " + GetScoringMatrixName(m_treeOptions.matrix));
183 	}
184 	return param;
185 }
186 
getAllEdges(vector<SeqTreeEdge> & edges)187 int SeqTreeAPI::getAllEdges(vector<SeqTreeEdge>& edges)
188 {
189 	getEdgesFromSubTree(m_seqTree->begin(), edges);
190 	return edges.size();
191 }
192 
getEdgesFromSubTree(const SeqTree::iterator & cursor,vector<SeqTreeEdge> & edges)193 void SeqTreeAPI::getEdgesFromSubTree(const SeqTree::iterator& cursor, vector<SeqTreeEdge>& edges)
194 {
195     //if cursor is a leaf node, draw its name and return
196     if (cursor.number_of_children() == 0)
197     {
198         return;
199     }
200     //always draw from parent to children
201 	SeqTreeNode nodeParent;
202 	nodeParent.isLeaf = false;
203 	nodeParent.x = cursor->x;
204 	nodeParent.y = cursor->y;
205     SeqTree::sibling_iterator sib = cursor.begin();
206     while (sib != cursor.end())
207     {
208 		SeqTreeNode nodeChild, nodeTurn;
209 		nodeChild.x = sib->x;
210 		nodeChild.y = sib->y;
211 		nodeTurn.isLeaf = false;
212 		nodeTurn.x = nodeParent.x;
213 		nodeTurn.y= nodeChild.y;
214 		if (sib.number_of_children() == 0)
215 		{
216 			nodeChild.isLeaf =true;
217 			nodeChild.name = sib->name;
218             nodeChild.displayAnnotation = kEmptyStr;
219 			annotateLeafNode(*sib,nodeChild);
220 		}
221 		else
222 			nodeChild.isLeaf = false;
223 		SeqTreeEdge e1, e2;
224 		e1.first = nodeParent;
225 		e1.second = nodeTurn;
226 		e2.first = nodeTurn;
227 		e2.second = nodeChild;
228 		edges.push_back(e1);
229 		edges.push_back(e2);
230 		getEdgesFromSubTree(sib, edges);
231         ++sib;
232     }
233 }
234 
annotateLeafNode(const SeqItem & nodeData,SeqTreeNode & node)235 void SeqTreeAPI::annotateLeafNode(const SeqItem& nodeData, SeqTreeNode& node)
236 {
237 	if (m_useMembership)
238 	{
239 		/*if (!nodeData.membership.empty() && m_loadOnly)
240 		{
241 			node.annotation = nodeData.membership;
242 		}*/
243 		if ((m_ma.GetNumRows() > 0))
244 		{
245 			CCdCore* cd = m_ma.GetScopedLeafCD(nodeData.rowID);
246 			if (cd)
247 			{
248 				node.annotation = cd->GetAccession();
249 			}
250 		}
251 		else if (m_cd)
252 		{
253 			if (!nodeData.membership.empty())
254 				node.annotation = nodeData.membership;
255 			else
256 				node.annotation = m_cd->GetAccession();
257 		}
258 	}
259 	else
260 	{
261 		if (m_cd)
262 		{
263 			if (m_taxClient == 0)
264 			{
265 				m_taxClient = new TaxClient();
266 				m_taxClient->init();
267 			}
268 			TTaxId taxid = m_taxClient->GetTaxIDForSeqId(nodeData.seqId);
269 			if (taxid >= ZERO_TAX_ID)
270 			{
271 				if (m_taxLevel == BySuperkingdom)
272 					node.annotation = m_taxClient->GetSuperKingdom(taxid);
273 			}
274 		}
275 		else
276 		{
277 			if (!m_taxTree)
278 				m_taxTree = new TaxTreeData(m_ma);
279 			string rankName = "superkingdom";
280 			/*if(m_taxLevel == ByKingdom)
281 				rankName = "kingdom";*/
282 			TaxTreeIterator taxNode = m_taxTree->getParentAtRank(nodeData.rowID, rankName);
283 			node.annotation = taxNode->orgName;
284 		}
285 	}
286 }
287 
loadAndValidateExistingTree()288 bool SeqTreeAPI::loadAndValidateExistingTree()
289 {
290 	if ( m_seqTree == 0)
291 		m_seqTree = new SeqTree;
292 	CCdCore* cd = 0;
293 	if (m_family)
294 	{
295 		cd = m_family->getRootCD();
296 	}
297 	else
298 		cd = m_ma.getFirstCD();
299 	if (!cd->IsSetSeqtree())
300 		return false;
301 	SeqLocToSeqItemMap liMap;
302 	if (!SeqTreeAsnizer::convertToSeqTree(cd->SetSeqtree(), *m_seqTree, liMap))
303 		return false;
304 	CRef< CAlgorithm_type > algType(const_cast<CAlgorithm_type*> (&(cd->GetSeqtree().GetAlgorithm())));
305 	SeqTreeAsnizer::convertToTreeOption(algType, m_treeOptions);
306 	if (m_treeOptions.scope == CAlgorithm_type::eTree_scope_immediateChildrenOnly)
307 	{
308 		CDFamily *subfam = new CDFamily;
309 		m_family->subfamily(m_family->begin(),subfam, true); //childrenOnly = true
310 		m_ma.setAlignment(*subfam);
311 		delete subfam;
312 	}
313 	else
314 	{
315 		m_ma.setAlignment(*m_family);
316 	}
317 	bool validated = false;
318 	if(m_seqTree->isSequenceCompositionSame(m_ma))
319 		validated = true;
320 	else //if not same, resolve RowID with SeqLoc
321 	{
322 		if (SeqTreeAsnizer::resolveRowId(m_ma, liMap))
323 			validated = m_seqTree->isSequenceCompositionSame(m_ma);
324 	}
325 	if (validated)
326 		SeqTreeAsnizer::refillAsnMembership(m_ma, liMap);
327 	return validated;
328 }
329 
loadExistingTree(CCdCore * cd,TreeOptions * treeOptions,SeqTree * seqTree)330 bool SeqTreeAPI::loadExistingTree(CCdCore* cd, TreeOptions* treeOptions, SeqTree* seqTree)
331 {
332 	if (!cd->IsSetSeqtree() || !treeOptions)
333 		return false;
334 
335 	//bool loaded = false;
336 	SeqTree* tmpTree = 0;
337 	SeqTree tmpTreeObj;
338 
339 	if (seqTree)
340 		tmpTree = seqTree;
341 	else
342 		tmpTree = &tmpTreeObj;
343 
344 	SeqLocToSeqItemMap liMap;
345 	if (!SeqTreeAsnizer::convertToSeqTree(cd->SetSeqtree(), *tmpTree, liMap))
346 		return false;
347 	CRef< CAlgorithm_type > algType(const_cast<CAlgorithm_type*> (&(cd->GetSeqtree().GetAlgorithm())));
348 	SeqTreeAsnizer::convertToTreeOption(algType, *treeOptions);
349 	return true;
350 }
351 
352 END_SCOPE(cd_utils)
353 END_NCBI_SCOPE
354