1 /*  $Id: process_gene_overlap.hpp 617977 2020-10-08 18:29:47Z grichenk $
2 * ===========================================================================
3 *
4 *                            PUBLIC DOMAIN NOTICE
5 *               National Center for Biotechnology Information
6 *
7 *  This software/database is a "United States Government Work" under the
8 *  terms of the United States Copyright Act.  It was written as part of
9 *  the author's official duties as a United States Government employee and
10 *  thus cannot be copyrighted.  This software/database is freely available
11 *  to the public for use. The National Library of Medicine and the U.S.
12 *  Government have not placed any restriction on its use or reproduction.
13 *
14 *  Although all reasonable efforts have been taken to ensure the accuracy
15 *  and reliability of the software and data, the NLM and the U.S.
16 *  Government do not and cannot warrant the performance or results that
17 *  may be obtained by using this software or data. The NLM and the U.S.
18 *  Government disclaim all warranties, express or implied, including
19 *  warranties of performance, merchantability or fitness for any particular
20 *  purpose.
21 *
22 *  Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author:
27 *
28 * File Description:
29 *
30 * ===========================================================================
31 */
32 
33 #ifndef __process_gene_overlap__hpp__
34 #define __process_gene_overlap__hpp__
35 
36 //  ============================================================================
37 class CGeneOverlapProcess
38 //  ============================================================================
39     : public CScopedProcess
40 {
41 public:
42     //  ------------------------------------------------------------------------
CGeneOverlapProcess()43     CGeneOverlapProcess()
44     //  ------------------------------------------------------------------------
45         : CScopedProcess()
46         , m_out( 0 )
47     {};
48 
49     //  ------------------------------------------------------------------------
~CGeneOverlapProcess()50     ~CGeneOverlapProcess()
51     //  ------------------------------------------------------------------------
52     {
53     };
54 
55     //  ------------------------------------------------------------------------
ProcessInitialize(const CArgs & args)56     void ProcessInitialize(
57         const CArgs& args )
58     //  ------------------------------------------------------------------------
59     {
60         CScopedProcess::ProcessInitialize( args );
61 
62         m_out = args["o"] ? &(args["o"].AsOutputFile()) : &cout;
63     };
64 
65     //  ------------------------------------------------------------------------
ProcessFinalize()66     void ProcessFinalize()
67     //  ------------------------------------------------------------------------
68     {
69     }
70 
71     //  ------------------------------------------------------------------------
SeqEntryInitialize(CRef<CSeq_entry> & se)72     virtual void SeqEntryInitialize(
73         CRef<CSeq_entry>& se )
74     //  ------------------------------------------------------------------------
75     {
76         CScopedProcess::SeqEntryInitialize( se );
77     };
78 
79     //  ------------------------------------------------------------------------
SeqEntryProcess()80     void SeqEntryProcess()
81     //  ------------------------------------------------------------------------
82     {
83         try {
84             VISIT_ALL_BIOSEQS_WITHIN_SEQENTRY (bit, *m_entry) {
85                 const CBioseq& bioseq = *bit;
86                 if (bioseq.IsSetInst()) {
87                     const CSeq_inst& inst = bioseq.GetInst();
88                     if (inst.IsSetMol()) {
89                         TSEQ_MOL mol = inst.GetMol();
90                         if (mol == CSeq_inst::eMol_aa) {
91                             continue;
92                         }
93                     }
94                 }
95 
96                 CBioseq_Handle bsh = m_scope->GetBioseqHandle(bioseq);
97                 SAnnotSelector sel;
98                 sel.SetLimitTSE(bsh.GetTSE_Handle());
99                 sel.SetResolveDepth(0);
100 
101                 for (CFeat_CI feat_it(bsh, sel); feat_it; ++feat_it) {
102                     const CMappedFeat mf = *feat_it;
103                     const CSeq_feat& feat = mf.GetOriginalFeature();
104                     TestFeatureGeneOverlap( feat );
105                     ++m_objectcount;
106                 }
107 
108                 /*
109                 FOR_EACH_SEQANNOT_ON_BIOSEQ ( ait, bioseq ) {
110                     const CSeq_annot& annot = **ait;
111                     FOR_EACH_SEQFEAT_ON_SEQANNOT ( fit, annot ) {
112                         const CSeq_feat& feat = **fit;
113                         TestFeatureGeneOverlap( feat );
114                         ++m_objectcount;
115                     }
116                 }
117                 VISIT_ALL_FEATURES_WITHIN_SEQENTRY (fit, *m_entry) {
118                     const CSeq_feat& feat = *fit;
119                     TestFeatureGeneOverlap( feat );
120                     ++m_objectcount;
121                 }
122                 */
123             }
124         }
125         catch (CException& e) {
126             ERR_POST(Error << "error processing seqentry: " << e.what());
127         }
128     };
129 
130 protected:
131     //  ------------------------------------------------------------------------
TestFeatureGeneOverlap(const CSeq_feat & f)132     void TestFeatureGeneOverlap (
133         const CSeq_feat& f )
134     //  ------------------------------------------------------------------------
135     {
136         if ( f.GetData().Which() == CSeqFeatData::e_Gene ) {
137             return;
138         }
139         ++m_objectcount;
140         const CSeq_feat_Base::TLocation& locbase = f.GetLocation();
141         CConstRef<CSeq_feat> ol = sequence::GetOverlappingGene(
142             locbase, *m_scope );
143         if ( ! ol ) {
144             return;
145         }
146         const CGene_ref& gene = ol->GetData().GetGene();
147         if (gene.IsSetLocus()) {
148             *m_out << "Gene locus " << gene.GetLocus() << endl;
149         }
150         if (gene.IsSetLocus_tag()) {
151             *m_out << "Gene locus_tag " << gene.GetLocus_tag() << endl;
152         }
153         /*
154         *m_out << SeqLocString( locbase ) << " -> "
155                << SeqLocString( ol->GetLocation() ) << endl;
156         */
157     }
158 
159     //  ------------------------------------------------------------------------
SeqLocString(const CSeq_loc & loc)160     string SeqLocString( const CSeq_loc& loc )
161     //  ------------------------------------------------------------------------
162     {
163         string str;
164         loc.GetLabel(&str);
165         return str;
166     }
167 
168     //  ------------------------------------------------------------------------
169     //  Data:
170     //  ------------------------------------------------------------------------
171 protected:
172     CNcbiOstream* m_out;
173 };
174 
175 //  ============================================================================
176 class CGeneFeatTreeProcess
177 //  ============================================================================
178     : public CScopedProcess
179 {
180 public:
181     //  ------------------------------------------------------------------------
CGeneFeatTreeProcess()182     CGeneFeatTreeProcess()
183     //  ------------------------------------------------------------------------
184         : CScopedProcess()
185         , m_out( 0 )
186     {};
187 
188     //  ------------------------------------------------------------------------
~CGeneFeatTreeProcess()189     ~CGeneFeatTreeProcess()
190     //  ------------------------------------------------------------------------
191     {
192     };
193 
194     //  ------------------------------------------------------------------------
ProcessInitialize(const CArgs & args)195     void ProcessInitialize(
196         const CArgs& args )
197     //  ------------------------------------------------------------------------
198     {
199         CScopedProcess::ProcessInitialize( args );
200 
201         m_out = args["o"] ? &(args["o"].AsOutputFile()) : &cout;
202     };
203 
204     //  ------------------------------------------------------------------------
ProcessFinalize()205     void ProcessFinalize()
206     //  ------------------------------------------------------------------------
207     {
208     }
209 
210     //  ------------------------------------------------------------------------
SeqEntryInitialize(CRef<CSeq_entry> & se)211     virtual void SeqEntryInitialize(
212         CRef<CSeq_entry>& se )
213     //  ------------------------------------------------------------------------
214     {
215         CScopedProcess::SeqEntryInitialize( se );
216     };
217 
218     //  ------------------------------------------------------------------------
SeqEntryProcess()219     void SeqEntryProcess()
220     //  ------------------------------------------------------------------------
221     {
222         try {
223             VISIT_ALL_BIOSEQS_WITHIN_SEQENTRY (bit, *m_entry) {
224                 const CBioseq& bioseq = *bit;
225                 if (bioseq.IsSetInst()) {
226                     const CSeq_inst& inst = bioseq.GetInst();
227                     if (inst.IsSetMol()) {
228                         TSEQ_MOL mol = inst.GetMol();
229                         if (mol == CSeq_inst::eMol_aa) {
230                             continue;
231                         }
232                     }
233                 }
234 
235                 CBioseq_Handle bsh = m_scope->GetBioseqHandle(bioseq);
236                 SAnnotSelector sel;
237                 sel.SetLimitTSE(bsh.GetTSE_Handle());
238                 sel.SetResolveDepth(0);
239 
240                 for (CFeat_CI feat_it(bsh, sel); feat_it; ++feat_it) {
241                     const CMappedFeat mf = *feat_it;
242                     m_featTree.AddFeature(mf);
243                 }
244 
245                 for (CFeat_CI feat_it(bsh, sel); feat_it; ++feat_it) {
246                     const CMappedFeat mf = *feat_it;
247                     TestFeatureGeneTree( mf );
248                     ++m_objectcount;
249                 }
250             }
251         }
252         catch (CException& e) {
253             ERR_POST(Error << "error processing seqentry: " << e.what());
254         }
255     };
256 
257 protected:
258     //  ------------------------------------------------------------------------
TestFeatureGeneTree(const CMappedFeat mf)259     void TestFeatureGeneTree (
260         const CMappedFeat mf )
261     //  ------------------------------------------------------------------------
262     {
263         if ( mf.GetData().GetSubtype() == CSeqFeatData::eSubtype_gene ) {
264             return;
265         }
266         ++m_objectcount;
267         CMappedFeat best = feature::GetBestGeneForFeat(mf, &m_featTree);
268         if (best) {
269             const CGene_ref& gene = best.GetData().GetGene();
270             if (gene.IsSetLocus()) {
271                 *m_out << "Gene locus " << gene.GetLocus() << endl;
272             }
273             if (gene.IsSetLocus_tag()) {
274                 *m_out << "Gene locus_tag " << gene.GetLocus_tag() << endl;
275             }
276         }
277     }
278 
279     //  ------------------------------------------------------------------------
SeqLocString(const CSeq_loc & loc)280     string SeqLocString( const CSeq_loc& loc )
281     //  ------------------------------------------------------------------------
282     {
283         string str;
284         loc.GetLabel(&str);
285         return str;
286     }
287 
288     //  ------------------------------------------------------------------------
289     //  Data:
290     //  ------------------------------------------------------------------------
291 protected:
292     CNcbiOstream* m_out;
293     feature::CFeatTree m_featTree;
294 };
295 
296 #endif
297