1 /* $Id: gene_cache.cpp 569135 2018-08-16 16:19:54Z bollin $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Colleen Bollin
27 *
28 * File Description:
29 * Gene cache for validating features
30 * .......
31 *
32 */
33 #include <ncbi_pch.hpp>
34 #include <corelib/ncbistd.hpp>
35 #include <corelib/ncbistr.hpp>
36 #include <corelib/ncbiapp.hpp>
37 #include <objmgr/object_manager.hpp>
38
39 #include <objtools/validator/gene_cache.hpp>
40 #include <objtools/validator/utilities.hpp>
41
42 #include <objects/seqloc/Seq_loc.hpp>
43 #include <objects/seqloc/Seq_interval.hpp>
44 #include <objects/seqloc/Seq_point.hpp>
45
46 #include <objmgr/bioseq_ci.hpp>
47 #include <objmgr/seq_annot_ci.hpp>
48 #include <objmgr/util/feature.hpp>
49 #include <objmgr/util/sequence.hpp>
50
51 #include <objmgr/feat_ci.hpp>
52 #include <objmgr/scope.hpp>
53
54
55 #define NCBI_USE_ERRCODE_X Objtools_Validator
56
57 BEGIN_NCBI_SCOPE
58 BEGIN_SCOPE(objects)
59 BEGIN_SCOPE(validator)
60 using namespace sequence;
61
62
GetFeatTreeFromCache(CBioseq_Handle bsh)63 CRef<feature::CFeatTree> CGeneCache::GetFeatTreeFromCache(CBioseq_Handle bsh)
64 {
65 TSeqTreeMap::iterator smit = m_SeqTreeMap.find(bsh);
66 if (smit == m_SeqTreeMap.end()) {
67 // test: only keep the last one
68 m_SeqTreeMap.clear();
69 CFeat_CI f(bsh);
70 CRef<feature::CFeatTree> tr(new feature::CFeatTree(f));
71 m_SeqTreeMap[bsh] = tr;
72 return tr;
73 } else {
74 return smit->second;
75 }
76 }
77
78
GetFeatTreeFromCache(const CSeq_loc & loc,CScope & scope)79 CRef<feature::CFeatTree> CGeneCache::GetFeatTreeFromCache(const CSeq_loc& loc, CScope& scope)
80 {
81 CBioseq_Handle bsh;
82 try {
83 bsh = scope.GetBioseqHandle(loc);
84 } catch (CException&) {
85 CSeq_loc_CI li(loc);
86 while (li && !bsh) {
87 bsh = scope.GetBioseqHandle(li.GetSeq_id());
88 ++li;
89 }
90 }
91
92 if (bsh) {
93 return GetFeatTreeFromCache(bsh);
94 } else {
95 return (CRef<feature::CFeatTree>(NULL));
96 }
97 }
98
99
GetFeatTreeFromCache(const CSeq_feat & feat,CScope & scope)100 CRef<feature::CFeatTree> CGeneCache::GetFeatTreeFromCache(const CSeq_feat& feat, CScope& scope)
101 {
102 return GetFeatTreeFromCache(feat.GetLocation(), scope);
103 }
104
105
GetGeneFromCache(const CSeq_feat * feat,CScope & scope)106 CConstRef<CSeq_feat> CGeneCache::GetGeneFromCache(const CSeq_feat* feat, CScope& scope)
107 {
108 if (!feat) {
109 return CConstRef<CSeq_feat>(NULL);
110 }
111 CConstRef<CSeq_feat> gene;
112 TFeatGeneMap::iterator it = m_FeatGeneMap.find(feat);
113 if (it == m_FeatGeneMap.end()) {
114 try {
115 CSeq_feat_Handle fh = scope.GetSeq_featHandle(*feat);
116 CRef<feature::CFeatTree> tr = GetFeatTreeFromCache(*feat, scope);
117 if (!tr) {
118 return CConstRef<CSeq_feat>(NULL);
119 }
120 CMappedFeat mf = tr->GetBestGene(fh);
121 if (mf) {
122 gene = mf.GetSeq_feat();
123 }
124 } catch (CException&) {
125 gene = sequence::GetGeneForFeature(*feat, scope);
126 }
127 m_FeatGeneMap[feat] = gene;
128 return gene;
129 } else {
130 return it->second;
131 }
132 }
133
134
x_HasNamedQual(const CSeq_feat & feat,const string & qual)135 bool CGeneCache::x_HasNamedQual(const CSeq_feat& feat, const string& qual)
136 {
137 bool rval = false;
138 if (feat.IsSetQual()) {
139 for (auto it : feat.GetQual()) {
140 if (it->IsSetQual() && NStr::EqualNocase(it->GetQual(), qual)) {
141 rval = true;
142 break;
143 }
144 }
145 }
146 return rval;
147 }
148
149
x_IsPseudo(const CGene_ref & gref)150 bool CGeneCache::x_IsPseudo(const CGene_ref& gref)
151 {
152 return (gref.IsSetPseudo() && gref.GetPseudo());
153 }
154
155
IsPseudo(const CSeq_feat & feat,CScope & scope)156 bool CGeneCache::IsPseudo(const CSeq_feat& feat, CScope& scope)
157 {
158 return (feat.IsSetPseudo() && feat.GetPseudo()) ||
159 (x_HasNamedQual(feat, "pseudogene")) ||
160 (feat.GetData().IsGene() && x_IsPseudo(feat.GetData().GetGene()));
161 }
162
163
164 END_SCOPE(validator)
165 END_SCOPE(objects)
166 END_NCBI_SCOPE
167