1 /*  $Id: gene_cache.cpp 569135 2018-08-16 16:19:54Z bollin $
2  * ===========================================================================
3  *
4  *                            PUBLIC DOMAIN NOTICE
5  *               National Center for Biotechnology Information
6  *
7  *  This software/database is a "United States Government Work" under the
8  *  terms of the United States Copyright Act.  It was written as part of
9  *  the author's official duties as a United States Government employee and
10  *  thus cannot be copyrighted.  This software/database is freely available
11  *  to the public for use. The National Library of Medicine and the U.S.
12  *  Government have not placed any restriction on its use or reproduction.
13  *
14  *  Although all reasonable efforts have been taken to ensure the accuracy
15  *  and reliability of the software and data, the NLM and the U.S.
16  *  Government do not and cannot warrant the performance or results that
17  *  may be obtained by using this software or data. The NLM and the U.S.
18  *  Government disclaim all warranties, express or implied, including
19  *  warranties of performance, merchantability or fitness for any particular
20  *  purpose.
21  *
22  *  Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author:  Colleen Bollin
27  *
28  * File Description:
29  *   Gene cache for validating features
30  *   .......
31  *
32  */
33 #include <ncbi_pch.hpp>
34 #include <corelib/ncbistd.hpp>
35 #include <corelib/ncbistr.hpp>
36 #include <corelib/ncbiapp.hpp>
37 #include <objmgr/object_manager.hpp>
38 
39 #include <objtools/validator/gene_cache.hpp>
40 #include <objtools/validator/utilities.hpp>
41 
42 #include <objects/seqloc/Seq_loc.hpp>
43 #include <objects/seqloc/Seq_interval.hpp>
44 #include <objects/seqloc/Seq_point.hpp>
45 
46 #include <objmgr/bioseq_ci.hpp>
47 #include <objmgr/seq_annot_ci.hpp>
48 #include <objmgr/util/feature.hpp>
49 #include <objmgr/util/sequence.hpp>
50 
51 #include <objmgr/feat_ci.hpp>
52 #include <objmgr/scope.hpp>
53 
54 
55 #define NCBI_USE_ERRCODE_X   Objtools_Validator
56 
57 BEGIN_NCBI_SCOPE
58 BEGIN_SCOPE(objects)
59 BEGIN_SCOPE(validator)
60 using namespace sequence;
61 
62 
GetFeatTreeFromCache(CBioseq_Handle bsh)63 CRef<feature::CFeatTree> CGeneCache::GetFeatTreeFromCache(CBioseq_Handle bsh)
64 {
65     TSeqTreeMap::iterator smit = m_SeqTreeMap.find(bsh);
66     if (smit == m_SeqTreeMap.end()) {
67         // test: only keep the last one
68         m_SeqTreeMap.clear();
69         CFeat_CI f(bsh);
70         CRef<feature::CFeatTree> tr(new feature::CFeatTree(f));
71         m_SeqTreeMap[bsh] = tr;
72         return tr;
73     } else  {
74         return smit->second;
75     }
76 }
77 
78 
GetFeatTreeFromCache(const CSeq_loc & loc,CScope & scope)79 CRef<feature::CFeatTree> CGeneCache::GetFeatTreeFromCache(const CSeq_loc& loc, CScope& scope)
80 {
81     CBioseq_Handle bsh;
82     try {
83         bsh = scope.GetBioseqHandle(loc);
84     } catch (CException&) {
85         CSeq_loc_CI li(loc);
86         while (li && !bsh) {
87             bsh = scope.GetBioseqHandle(li.GetSeq_id());
88             ++li;
89         }
90     }
91 
92     if (bsh) {
93         return GetFeatTreeFromCache(bsh);
94     } else {
95         return (CRef<feature::CFeatTree>(NULL));
96     }
97 }
98 
99 
GetFeatTreeFromCache(const CSeq_feat & feat,CScope & scope)100 CRef<feature::CFeatTree> CGeneCache::GetFeatTreeFromCache(const CSeq_feat& feat, CScope& scope)
101 {
102     return GetFeatTreeFromCache(feat.GetLocation(), scope);
103 }
104 
105 
GetGeneFromCache(const CSeq_feat * feat,CScope & scope)106 CConstRef<CSeq_feat> CGeneCache::GetGeneFromCache(const CSeq_feat* feat, CScope& scope)
107 {
108     if (!feat) {
109         return CConstRef<CSeq_feat>(NULL);
110     }
111     CConstRef<CSeq_feat> gene;
112     TFeatGeneMap::iterator it = m_FeatGeneMap.find(feat);
113     if (it == m_FeatGeneMap.end()) {
114         try {
115             CSeq_feat_Handle fh = scope.GetSeq_featHandle(*feat);
116             CRef<feature::CFeatTree> tr = GetFeatTreeFromCache(*feat, scope);
117             if (!tr) {
118                 return CConstRef<CSeq_feat>(NULL);
119             }
120             CMappedFeat mf = tr->GetBestGene(fh);
121             if (mf) {
122                 gene = mf.GetSeq_feat();
123             }
124         } catch (CException&) {
125             gene = sequence::GetGeneForFeature(*feat, scope);
126         }
127         m_FeatGeneMap[feat] = gene;
128         return gene;
129     } else {
130         return it->second;
131     }
132 }
133 
134 
x_HasNamedQual(const CSeq_feat & feat,const string & qual)135 bool CGeneCache::x_HasNamedQual(const CSeq_feat& feat, const string& qual)
136 {
137     bool rval = false;
138     if (feat.IsSetQual()) {
139         for (auto it : feat.GetQual()) {
140             if (it->IsSetQual() && NStr::EqualNocase(it->GetQual(), qual)) {
141                 rval = true;
142                 break;
143             }
144         }
145     }
146     return rval;
147 }
148 
149 
x_IsPseudo(const CGene_ref & gref)150 bool CGeneCache::x_IsPseudo(const CGene_ref& gref)
151 {
152     return (gref.IsSetPseudo() && gref.GetPseudo());
153 }
154 
155 
IsPseudo(const CSeq_feat & feat,CScope & scope)156 bool CGeneCache::IsPseudo(const CSeq_feat& feat, CScope& scope)
157 {
158     return (feat.IsSetPseudo() && feat.GetPseudo()) ||
159         (x_HasNamedQual(feat, "pseudogene")) ||
160         (feat.GetData().IsGene() && x_IsPseudo(feat.GetData().GetGene()));
161 }
162 
163 
164 END_SCOPE(validator)
165 END_SCOPE(objects)
166 END_NCBI_SCOPE
167