1 /* $Id: process_gene_overlap.hpp 617977 2020-10-08 18:29:47Z grichenk $ 2 * =========================================================================== 3 * 4 * PUBLIC DOMAIN NOTICE 5 * National Center for Biotechnology Information 6 * 7 * This software/database is a "United States Government Work" under the 8 * terms of the United States Copyright Act. It was written as part of 9 * the author's official duties as a United States Government employee and 10 * thus cannot be copyrighted. This software/database is freely available 11 * to the public for use. The National Library of Medicine and the U.S. 12 * Government have not placed any restriction on its use or reproduction. 13 * 14 * Although all reasonable efforts have been taken to ensure the accuracy 15 * and reliability of the software and data, the NLM and the U.S. 16 * Government do not and cannot warrant the performance or results that 17 * may be obtained by using this software or data. The NLM and the U.S. 18 * Government disclaim all warranties, express or implied, including 19 * warranties of performance, merchantability or fitness for any particular 20 * purpose. 21 * 22 * Please cite the author in any work or product based on this material. 23 * 24 * =========================================================================== 25 * 26 * Author: 27 * 28 * File Description: 29 * 30 * =========================================================================== 31 */ 32 33 #ifndef __process_gene_overlap__hpp__ 34 #define __process_gene_overlap__hpp__ 35 36 // ============================================================================ 37 class CGeneOverlapProcess 38 // ============================================================================ 39 : public CScopedProcess 40 { 41 public: 42 // ------------------------------------------------------------------------ CGeneOverlapProcess()43 CGeneOverlapProcess() 44 // ------------------------------------------------------------------------ 45 : CScopedProcess() 46 , m_out( 0 ) 47 {}; 48 49 // ------------------------------------------------------------------------ ~CGeneOverlapProcess()50 ~CGeneOverlapProcess() 51 // ------------------------------------------------------------------------ 52 { 53 }; 54 55 // ------------------------------------------------------------------------ ProcessInitialize(const CArgs & args)56 void ProcessInitialize( 57 const CArgs& args ) 58 // ------------------------------------------------------------------------ 59 { 60 CScopedProcess::ProcessInitialize( args ); 61 62 m_out = args["o"] ? &(args["o"].AsOutputFile()) : &cout; 63 }; 64 65 // ------------------------------------------------------------------------ ProcessFinalize()66 void ProcessFinalize() 67 // ------------------------------------------------------------------------ 68 { 69 } 70 71 // ------------------------------------------------------------------------ SeqEntryInitialize(CRef<CSeq_entry> & se)72 virtual void SeqEntryInitialize( 73 CRef<CSeq_entry>& se ) 74 // ------------------------------------------------------------------------ 75 { 76 CScopedProcess::SeqEntryInitialize( se ); 77 }; 78 79 // ------------------------------------------------------------------------ SeqEntryProcess()80 void SeqEntryProcess() 81 // ------------------------------------------------------------------------ 82 { 83 try { 84 VISIT_ALL_BIOSEQS_WITHIN_SEQENTRY (bit, *m_entry) { 85 const CBioseq& bioseq = *bit; 86 if (bioseq.IsSetInst()) { 87 const CSeq_inst& inst = bioseq.GetInst(); 88 if (inst.IsSetMol()) { 89 TSEQ_MOL mol = inst.GetMol(); 90 if (mol == CSeq_inst::eMol_aa) { 91 continue; 92 } 93 } 94 } 95 96 CBioseq_Handle bsh = m_scope->GetBioseqHandle(bioseq); 97 SAnnotSelector sel; 98 sel.SetLimitTSE(bsh.GetTSE_Handle()); 99 sel.SetResolveDepth(0); 100 101 for (CFeat_CI feat_it(bsh, sel); feat_it; ++feat_it) { 102 const CMappedFeat mf = *feat_it; 103 const CSeq_feat& feat = mf.GetOriginalFeature(); 104 TestFeatureGeneOverlap( feat ); 105 ++m_objectcount; 106 } 107 108 /* 109 FOR_EACH_SEQANNOT_ON_BIOSEQ ( ait, bioseq ) { 110 const CSeq_annot& annot = **ait; 111 FOR_EACH_SEQFEAT_ON_SEQANNOT ( fit, annot ) { 112 const CSeq_feat& feat = **fit; 113 TestFeatureGeneOverlap( feat ); 114 ++m_objectcount; 115 } 116 } 117 VISIT_ALL_FEATURES_WITHIN_SEQENTRY (fit, *m_entry) { 118 const CSeq_feat& feat = *fit; 119 TestFeatureGeneOverlap( feat ); 120 ++m_objectcount; 121 } 122 */ 123 } 124 } 125 catch (CException& e) { 126 ERR_POST(Error << "error processing seqentry: " << e.what()); 127 } 128 }; 129 130 protected: 131 // ------------------------------------------------------------------------ TestFeatureGeneOverlap(const CSeq_feat & f)132 void TestFeatureGeneOverlap ( 133 const CSeq_feat& f ) 134 // ------------------------------------------------------------------------ 135 { 136 if ( f.GetData().Which() == CSeqFeatData::e_Gene ) { 137 return; 138 } 139 ++m_objectcount; 140 const CSeq_feat_Base::TLocation& locbase = f.GetLocation(); 141 CConstRef<CSeq_feat> ol = sequence::GetOverlappingGene( 142 locbase, *m_scope ); 143 if ( ! ol ) { 144 return; 145 } 146 const CGene_ref& gene = ol->GetData().GetGene(); 147 if (gene.IsSetLocus()) { 148 *m_out << "Gene locus " << gene.GetLocus() << endl; 149 } 150 if (gene.IsSetLocus_tag()) { 151 *m_out << "Gene locus_tag " << gene.GetLocus_tag() << endl; 152 } 153 /* 154 *m_out << SeqLocString( locbase ) << " -> " 155 << SeqLocString( ol->GetLocation() ) << endl; 156 */ 157 } 158 159 // ------------------------------------------------------------------------ SeqLocString(const CSeq_loc & loc)160 string SeqLocString( const CSeq_loc& loc ) 161 // ------------------------------------------------------------------------ 162 { 163 string str; 164 loc.GetLabel(&str); 165 return str; 166 } 167 168 // ------------------------------------------------------------------------ 169 // Data: 170 // ------------------------------------------------------------------------ 171 protected: 172 CNcbiOstream* m_out; 173 }; 174 175 // ============================================================================ 176 class CGeneFeatTreeProcess 177 // ============================================================================ 178 : public CScopedProcess 179 { 180 public: 181 // ------------------------------------------------------------------------ CGeneFeatTreeProcess()182 CGeneFeatTreeProcess() 183 // ------------------------------------------------------------------------ 184 : CScopedProcess() 185 , m_out( 0 ) 186 {}; 187 188 // ------------------------------------------------------------------------ ~CGeneFeatTreeProcess()189 ~CGeneFeatTreeProcess() 190 // ------------------------------------------------------------------------ 191 { 192 }; 193 194 // ------------------------------------------------------------------------ ProcessInitialize(const CArgs & args)195 void ProcessInitialize( 196 const CArgs& args ) 197 // ------------------------------------------------------------------------ 198 { 199 CScopedProcess::ProcessInitialize( args ); 200 201 m_out = args["o"] ? &(args["o"].AsOutputFile()) : &cout; 202 }; 203 204 // ------------------------------------------------------------------------ ProcessFinalize()205 void ProcessFinalize() 206 // ------------------------------------------------------------------------ 207 { 208 } 209 210 // ------------------------------------------------------------------------ SeqEntryInitialize(CRef<CSeq_entry> & se)211 virtual void SeqEntryInitialize( 212 CRef<CSeq_entry>& se ) 213 // ------------------------------------------------------------------------ 214 { 215 CScopedProcess::SeqEntryInitialize( se ); 216 }; 217 218 // ------------------------------------------------------------------------ SeqEntryProcess()219 void SeqEntryProcess() 220 // ------------------------------------------------------------------------ 221 { 222 try { 223 VISIT_ALL_BIOSEQS_WITHIN_SEQENTRY (bit, *m_entry) { 224 const CBioseq& bioseq = *bit; 225 if (bioseq.IsSetInst()) { 226 const CSeq_inst& inst = bioseq.GetInst(); 227 if (inst.IsSetMol()) { 228 TSEQ_MOL mol = inst.GetMol(); 229 if (mol == CSeq_inst::eMol_aa) { 230 continue; 231 } 232 } 233 } 234 235 CBioseq_Handle bsh = m_scope->GetBioseqHandle(bioseq); 236 SAnnotSelector sel; 237 sel.SetLimitTSE(bsh.GetTSE_Handle()); 238 sel.SetResolveDepth(0); 239 240 for (CFeat_CI feat_it(bsh, sel); feat_it; ++feat_it) { 241 const CMappedFeat mf = *feat_it; 242 m_featTree.AddFeature(mf); 243 } 244 245 for (CFeat_CI feat_it(bsh, sel); feat_it; ++feat_it) { 246 const CMappedFeat mf = *feat_it; 247 TestFeatureGeneTree( mf ); 248 ++m_objectcount; 249 } 250 } 251 } 252 catch (CException& e) { 253 ERR_POST(Error << "error processing seqentry: " << e.what()); 254 } 255 }; 256 257 protected: 258 // ------------------------------------------------------------------------ TestFeatureGeneTree(const CMappedFeat mf)259 void TestFeatureGeneTree ( 260 const CMappedFeat mf ) 261 // ------------------------------------------------------------------------ 262 { 263 if ( mf.GetData().GetSubtype() == CSeqFeatData::eSubtype_gene ) { 264 return; 265 } 266 ++m_objectcount; 267 CMappedFeat best = feature::GetBestGeneForFeat(mf, &m_featTree); 268 if (best) { 269 const CGene_ref& gene = best.GetData().GetGene(); 270 if (gene.IsSetLocus()) { 271 *m_out << "Gene locus " << gene.GetLocus() << endl; 272 } 273 if (gene.IsSetLocus_tag()) { 274 *m_out << "Gene locus_tag " << gene.GetLocus_tag() << endl; 275 } 276 } 277 } 278 279 // ------------------------------------------------------------------------ SeqLocString(const CSeq_loc & loc)280 string SeqLocString( const CSeq_loc& loc ) 281 // ------------------------------------------------------------------------ 282 { 283 string str; 284 loc.GetLabel(&str); 285 return str; 286 } 287 288 // ------------------------------------------------------------------------ 289 // Data: 290 // ------------------------------------------------------------------------ 291 protected: 292 CNcbiOstream* m_out; 293 feature::CFeatTree m_featTree; 294 }; 295 296 #endif 297