1 /* $Id: cuSeqTreeAPI.cpp 609836 2020-06-08 15:56:03Z grichenk $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Charlie Liu
27 *
28 * File Description:
29 *
30 * API to handle SeqTree
31 *
32 * ===========================================================================
33 */
34
35 #include <ncbi_pch.hpp>
36 #include <algo/structure/cd_utils/cuSeqTreeAPI.hpp>
37 #include <algo/structure/cd_utils/cuSeqTreeFactory.hpp>
38 #include <algo/structure/cd_utils/cuSeqTreeRootedLayout.hpp>
39 #include <algo/structure/cd_utils/cuSeqTreeAsnizer.hpp>
40
41 BEGIN_NCBI_SCOPE
BEGIN_SCOPE(cd_utils)42 BEGIN_SCOPE(cd_utils)
43
44 SeqTreeAPI::SeqTreeAPI(vector<CCdCore*>& cds, bool loadExistingTreeOnly)
45 : m_ma(), m_cd(0), m_family(0), m_seqTree(0), m_taxTree(0), m_taxClient(0),
46 m_useMembership(true), m_taxLevel(BySuperkingdom), m_treeOptions(), m_triedTreeMaking(false),
47 m_loadOnly(loadExistingTreeOnly)
48 {
49 vector<CDFamily*> families;
50 CDFamily::createFamilies(cds, families);
51 if(families.size() != 1 )
52 {
53 for (int i = 0 ; i <families.size(); i++)
54 delete families[i];
55 return;
56 }
57 m_ma.setAlignment(*families[0]);
58 m_family = families[0];
59 }
60
SeqTreeAPI(CCdCore * cd)61 SeqTreeAPI::SeqTreeAPI(CCdCore* cd)
62 : m_ma(), m_cd(cd), m_family(0), m_seqTree(0), m_taxTree(0), m_taxClient(0),
63 m_useMembership(true), m_taxLevel(BySuperkingdom), m_treeOptions(), m_triedTreeMaking(false),
64 m_loadOnly(true)
65 {
66 }
67
SeqTreeAPI(CDFamily & cdfam,TreeOptions & option)68 SeqTreeAPI::SeqTreeAPI(CDFamily& cdfam, TreeOptions& option)
69 : m_ma(), m_cd(0), m_family(&cdfam), m_seqTree(0), m_taxTree(0), m_taxClient(0),
70 m_useMembership(true), m_taxLevel(BySuperkingdom), m_treeOptions(option), m_triedTreeMaking(false),
71 m_loadOnly(false)
72 {
73 m_family = new CDFamily(cdfam);
74 }
75
~SeqTreeAPI()76 SeqTreeAPI::~SeqTreeAPI()
77 {
78 if (m_seqTree) delete m_seqTree;
79 if (m_taxTree) delete m_taxTree;
80 if (m_taxClient) delete m_taxClient;
81 if (m_family) delete m_family;
82 }
83
annotateTreeByMembership()84 void SeqTreeAPI::annotateTreeByMembership()
85 {
86 m_useMembership = true;
87 }
88
annotateTreeByTaxonomy(TaxonomyLevel tax)89 void SeqTreeAPI::annotateTreeByTaxonomy(TaxonomyLevel tax)
90 {
91 m_taxLevel = tax;
92 m_useMembership = false;
93 }
94
getNumOfLeaves()95 int SeqTreeAPI::getNumOfLeaves()
96 {
97 if (m_seqTree == 0)
98 makeOrLoadTree();
99 if (m_seqTree == 0)
100 return 0;
101 else
102 return m_seqTree->getNumLeaf();
103 }
104 //return a string of tree method names
105 //lay out the tree to the specified area, .i.e. fit to screen
layoutSeqTree(int maxX,int maxY,vector<SeqTreeEdge> & edges)106 string SeqTreeAPI::layoutSeqTree(int maxX, int maxY, vector<SeqTreeEdge>& edges)
107 {
108 if (m_seqTree == 0)
109 makeOrLoadTree();
110 return layoutSeqTree(maxX, maxY, -1, edges);
111 }
112
113 //lay out the tree with a fixed spacing between sequences
layoutSeqTree(int maxX,vector<SeqTreeEdge> & edges,int yInt)114 string SeqTreeAPI::layoutSeqTree(int maxX, vector<SeqTreeEdge>& edges, int yInt)
115 {
116 if (m_seqTree == 0)
117 makeOrLoadTree();
118 int maxY = 0; //not used
119 if (yInt < 2)
120 yInt = 2;
121 return layoutSeqTree(maxX, maxY, yInt, edges);
122 }
makeTree()123 bool SeqTreeAPI::makeTree()
124 {
125 if (m_triedTreeMaking) //if already tried, don't try again
126 return m_seqTree != 0;
127 if (m_seqTree != 0)
128 {
129 delete m_seqTree;
130 m_seqTree = 0;
131 m_seqTree = new SeqTree;
132 }
133 if (!m_loadOnly)
134 {
135 if (m_ma.getFirstCD() == 0)
136 m_ma.setAlignment(*m_family);
137 if(!m_ma.isBlockAligned())
138 {
139 ERR_POST("Sequence tree is not made for " <<m_ma.getFirstCD()->GetAccession()
140 <<" because it does not have a consistent block alognment.");
141 }
142 else
143 {
144 if ((m_treeOptions.distMethod == eScoreBlastFoot) || (m_treeOptions.distMethod == eScoreBlastFull))
145 m_treeOptions.distMethod = eScoreAligned;
146 m_seqTree = TreeFactory::makeTree(&m_ma, m_treeOptions);
147 }
148 if (m_seqTree)
149 m_seqTree->fixRowName(m_ma, SeqTree::eGI);
150 }
151 m_triedTreeMaking = true;
152 return m_seqTree != 0;
153 }
154
makeOrLoadTree()155 bool SeqTreeAPI::makeOrLoadTree()
156 {
157 if (m_triedTreeMaking) //if already tried, don't try again
158 return m_seqTree != 0;
159 m_seqTree = new SeqTree();
160 bool loaded = false;
161 if (m_cd)
162 loaded = loadExistingTree(m_cd, &m_treeOptions, m_seqTree);
163 else
164 loaded = loadAndValidateExistingTree();
165 if (!loaded)
166 {
167 return makeTree();
168 }
169 return loaded;
170 }
171
layoutSeqTree(int maxX,int maxY,int yInt,vector<SeqTreeEdge> & edges)172 string SeqTreeAPI::layoutSeqTree(int maxX, int maxY, int yInt, vector<SeqTreeEdge>& edges)
173 {
174 if (!m_seqTree)
175 return "";
176 SeqTreeRootedLayout treeLayout(yInt);
177 treeLayout.calculateNodePositions(*m_seqTree, maxX, maxY);
178 getAllEdges(edges);
179 string param = GetTreeAlgorithmName(m_treeOptions.clusteringMethod);
180 param.append(" / " + DistanceMatrix::GetDistMethodName(m_treeOptions.distMethod));
181 if (DistanceMatrix::DistMethodUsesScoringMatrix(m_treeOptions.distMethod) ) {
182 param.append(" / " + GetScoringMatrixName(m_treeOptions.matrix));
183 }
184 return param;
185 }
186
getAllEdges(vector<SeqTreeEdge> & edges)187 int SeqTreeAPI::getAllEdges(vector<SeqTreeEdge>& edges)
188 {
189 getEdgesFromSubTree(m_seqTree->begin(), edges);
190 return edges.size();
191 }
192
getEdgesFromSubTree(const SeqTree::iterator & cursor,vector<SeqTreeEdge> & edges)193 void SeqTreeAPI::getEdgesFromSubTree(const SeqTree::iterator& cursor, vector<SeqTreeEdge>& edges)
194 {
195 //if cursor is a leaf node, draw its name and return
196 if (cursor.number_of_children() == 0)
197 {
198 return;
199 }
200 //always draw from parent to children
201 SeqTreeNode nodeParent;
202 nodeParent.isLeaf = false;
203 nodeParent.x = cursor->x;
204 nodeParent.y = cursor->y;
205 SeqTree::sibling_iterator sib = cursor.begin();
206 while (sib != cursor.end())
207 {
208 SeqTreeNode nodeChild, nodeTurn;
209 nodeChild.x = sib->x;
210 nodeChild.y = sib->y;
211 nodeTurn.isLeaf = false;
212 nodeTurn.x = nodeParent.x;
213 nodeTurn.y= nodeChild.y;
214 if (sib.number_of_children() == 0)
215 {
216 nodeChild.isLeaf =true;
217 nodeChild.name = sib->name;
218 nodeChild.displayAnnotation = kEmptyStr;
219 annotateLeafNode(*sib,nodeChild);
220 }
221 else
222 nodeChild.isLeaf = false;
223 SeqTreeEdge e1, e2;
224 e1.first = nodeParent;
225 e1.second = nodeTurn;
226 e2.first = nodeTurn;
227 e2.second = nodeChild;
228 edges.push_back(e1);
229 edges.push_back(e2);
230 getEdgesFromSubTree(sib, edges);
231 ++sib;
232 }
233 }
234
annotateLeafNode(const SeqItem & nodeData,SeqTreeNode & node)235 void SeqTreeAPI::annotateLeafNode(const SeqItem& nodeData, SeqTreeNode& node)
236 {
237 if (m_useMembership)
238 {
239 /*if (!nodeData.membership.empty() && m_loadOnly)
240 {
241 node.annotation = nodeData.membership;
242 }*/
243 if ((m_ma.GetNumRows() > 0))
244 {
245 CCdCore* cd = m_ma.GetScopedLeafCD(nodeData.rowID);
246 if (cd)
247 {
248 node.annotation = cd->GetAccession();
249 }
250 }
251 else if (m_cd)
252 {
253 if (!nodeData.membership.empty())
254 node.annotation = nodeData.membership;
255 else
256 node.annotation = m_cd->GetAccession();
257 }
258 }
259 else
260 {
261 if (m_cd)
262 {
263 if (m_taxClient == 0)
264 {
265 m_taxClient = new TaxClient();
266 m_taxClient->init();
267 }
268 TTaxId taxid = m_taxClient->GetTaxIDForSeqId(nodeData.seqId);
269 if (taxid >= ZERO_TAX_ID)
270 {
271 if (m_taxLevel == BySuperkingdom)
272 node.annotation = m_taxClient->GetSuperKingdom(taxid);
273 }
274 }
275 else
276 {
277 if (!m_taxTree)
278 m_taxTree = new TaxTreeData(m_ma);
279 string rankName = "superkingdom";
280 /*if(m_taxLevel == ByKingdom)
281 rankName = "kingdom";*/
282 TaxTreeIterator taxNode = m_taxTree->getParentAtRank(nodeData.rowID, rankName);
283 node.annotation = taxNode->orgName;
284 }
285 }
286 }
287
loadAndValidateExistingTree()288 bool SeqTreeAPI::loadAndValidateExistingTree()
289 {
290 if ( m_seqTree == 0)
291 m_seqTree = new SeqTree;
292 CCdCore* cd = 0;
293 if (m_family)
294 {
295 cd = m_family->getRootCD();
296 }
297 else
298 cd = m_ma.getFirstCD();
299 if (!cd->IsSetSeqtree())
300 return false;
301 SeqLocToSeqItemMap liMap;
302 if (!SeqTreeAsnizer::convertToSeqTree(cd->SetSeqtree(), *m_seqTree, liMap))
303 return false;
304 CRef< CAlgorithm_type > algType(const_cast<CAlgorithm_type*> (&(cd->GetSeqtree().GetAlgorithm())));
305 SeqTreeAsnizer::convertToTreeOption(algType, m_treeOptions);
306 if (m_treeOptions.scope == CAlgorithm_type::eTree_scope_immediateChildrenOnly)
307 {
308 CDFamily *subfam = new CDFamily;
309 m_family->subfamily(m_family->begin(),subfam, true); //childrenOnly = true
310 m_ma.setAlignment(*subfam);
311 delete subfam;
312 }
313 else
314 {
315 m_ma.setAlignment(*m_family);
316 }
317 bool validated = false;
318 if(m_seqTree->isSequenceCompositionSame(m_ma))
319 validated = true;
320 else //if not same, resolve RowID with SeqLoc
321 {
322 if (SeqTreeAsnizer::resolveRowId(m_ma, liMap))
323 validated = m_seqTree->isSequenceCompositionSame(m_ma);
324 }
325 if (validated)
326 SeqTreeAsnizer::refillAsnMembership(m_ma, liMap);
327 return validated;
328 }
329
loadExistingTree(CCdCore * cd,TreeOptions * treeOptions,SeqTree * seqTree)330 bool SeqTreeAPI::loadExistingTree(CCdCore* cd, TreeOptions* treeOptions, SeqTree* seqTree)
331 {
332 if (!cd->IsSetSeqtree() || !treeOptions)
333 return false;
334
335 //bool loaded = false;
336 SeqTree* tmpTree = 0;
337 SeqTree tmpTreeObj;
338
339 if (seqTree)
340 tmpTree = seqTree;
341 else
342 tmpTree = &tmpTreeObj;
343
344 SeqLocToSeqItemMap liMap;
345 if (!SeqTreeAsnizer::convertToSeqTree(cd->SetSeqtree(), *tmpTree, liMap))
346 return false;
347 CRef< CAlgorithm_type > algType(const_cast<CAlgorithm_type*> (&(cd->GetSeqtree().GetAlgorithm())));
348 SeqTreeAsnizer::convertToTreeOption(algType, *treeOptions);
349 return true;
350 }
351
352 END_SCOPE(cd_utils)
353 END_NCBI_SCOPE
354