1 /* $Id: feature_propagate.hpp 632623 2021-06-03 17:38:11Z ivanov $ 2 * =========================================================================== 3 * 4 * PUBLIC DOMAIN NOTICE 5 * National Center for Biotechnology Information 6 * 7 * This software/database is a "United States Government Work" under the 8 * terms of the United States Copyright Act. It was written as part of 9 * the author's official duties as a United States Government employee and 10 * thus cannot be copyrighted. This software/database is freely available 11 * to the public for use. The National Library of Medicine and the U.S. 12 * Government have not placed any restriction on its use or reproduction. 13 * 14 * Although all reasonable efforts have been taken to ensure the accuracy 15 * and reliability of the software and data, the NLM and the U.S. 16 * Government do not and cannot warrant the performance or results that 17 * may be obtained by using this software or data. The NLM and the U.S. 18 * Government disclaim all warranties, express or implied, including 19 * warranties of performance, merchantability or fitness for any particular 20 * purpose. 21 * 22 * Please cite the author in any work or product based on this material. 23 * 24 * =========================================================================== 25 * 26 * Authors: Colleen Bollin, Igor Filippov 27 */ 28 29 #ifndef _EDIT_FEATURE_PROPAGATE__HPP_ 30 #define _EDIT_FEATURE_PROPAGATE__HPP_ 31 32 #include <corelib/ncbistd.hpp> 33 #include <corelib/ncbiobj.hpp> 34 #include <corelib/ncbi_message.hpp> 35 #include <objects/seqfeat/Seq_feat.hpp> 36 #include <objects/seqloc/Seq_loc.hpp> 37 #include <objects/general/Object_id.hpp> 38 #include <objects/seqloc/Seq_interval.hpp> 39 #include <objects/seqalign/Dense_seg.hpp> 40 #include <objmgr/seq_entry_handle.hpp> 41 #include <objmgr/bioseq_handle.hpp> 42 #include <objmgr/bioseq_ci.hpp> 43 #include <objmgr/scope.hpp> 44 #include <objmgr/util/sequence.hpp> 45 46 47 BEGIN_NCBI_SCOPE 48 BEGIN_SCOPE(objects) 49 BEGIN_SCOPE(edit) 50 51 class NCBI_XOBJEDIT_EXPORT CFeaturePropagator 52 { 53 public: 54 NCBI_DEPRECATED CFeaturePropagator(CBioseq_Handle src, CBioseq_Handle target, const CSeq_align& align, 55 bool stop_at_stop = true, bool cleanup_partials = true, bool merge_abutting = true, 56 CMessageListener_Basic* pMessageListener = 0, 57 CObject_id::TId* feat_id = nullptr); 58 CFeaturePropagator(CBioseq_Handle src, CBioseq_Handle target, const CSeq_align& align, 59 bool stop_at_stop = true, bool cleanup_partials = true, bool merge_abutting = true, bool expand_over_gaps = true, 60 CMessageListener_Basic* pMessageListener = 0, 61 CObject_id::TId* feat_id = nullptr); ~CFeaturePropagator()62 ~CFeaturePropagator() {} 63 64 CRef<CSeq_feat> Propagate(const objects::CSeq_feat& orig_feat); 65 vector<CRef<CSeq_feat> > PropagateAll(); 66 vector<CRef<CSeq_feat> > PropagateAllReportFailures( 67 vector<CConstRef<CSeq_feat> >&); 68 69 CRef<CSeq_feat> ConstructProteinFeatureForPropagatedCodingRegion(const CSeq_feat& orig_cds, const CSeq_feat& new_cds); 70 71 /// Propagates a feature list from the source sequence 72 /// The propagated protein feature is stored right after the propagated cds 73 vector<CRef<CSeq_feat>> PropagateFeatureList(const vector<CConstRef<CSeq_feat>>& orig_feats); 74 75 typedef enum { 76 eFeaturePropagationProblem_None = 0, 77 eFeaturePropagationProblem_FeatureLocation, 78 eFeaturePropagationProblem_CodeBreakLocation, 79 eFeaturePropagationProblem_AnticodonLocation 80 } EGapIntervalType; 81 82 private: 83 // Prohibit copy constructor and assignment operator 84 CFeaturePropagator(const CFeaturePropagator& value); 85 CFeaturePropagator& operator=(const CFeaturePropagator& value); 86 87 void x_PropagateCds(CSeq_feat& feat, const CSeq_id& targetId, bool origIsPartialStart); 88 void x_CdsMapCodeBreaks(CSeq_feat& feat, const CSeq_id& targetId); 89 void x_CdsStopAtStopCodon(CSeq_feat& cds); 90 void x_CdsCleanupPartials(CSeq_feat& cds, bool origIsPartialStart); 91 92 void x_PropagatetRNA(CSeq_feat& feat, const CSeq_id& targetId); 93 94 CRef<CSeq_loc> x_MapLocation(const CSeq_loc& sourceLoc, const CSeq_id& targetId); 95 TSignedSeqPos SeqPosToAlignPos(TSignedSeqPos pos, CDense_seg::TDim row, bool left, bool &partial5, bool &partial3); 96 TSignedSeqPos AlignPosToSeqPos(TSignedSeqPos pos, CDense_seg::TDim row, bool left, bool &partial5, bool &partial3); 97 CDense_seg::TDim FindRow(const CSeq_align& align, CBioseq_Handle bsh); 98 CRef<CSeq_loc> CreateRowSeq_loc(const CSeq_align& align, CDense_seg::TDim row); 99 bool IsOrdered(const CSeq_loc &loc); 100 CRef<CSeq_loc> MakeOrdered(const CSeq_loc &loc); 101 102 CRef<CSeq_loc> x_TruncateToStopCodon(const CSeq_loc& loc, unsigned int truncLen); 103 CRef<CSeq_loc> x_ExtendToStopCodon(CSeq_feat& feat); 104 105 CBioseq_Handle m_Src; 106 CBioseq_Handle m_Target; 107 CConstRef<CSeq_align> m_Alignment; 108 CScope& m_Scope; 109 bool m_CdsStopAtStopCodon; 110 bool m_CdsCleanupPartials; 111 CMessageListener_Basic* m_MessageListener; 112 CObject_id::TId* m_MaxFeatId = nullptr; 113 map<CObject_id::TId, CObject_id::TId> m_FeatIdMap; // map old feat-id to propagated feat-id 114 bool m_MergeAbutting; 115 bool m_ExpandOverGaps; 116 117 class CSynonymMapper : public ISynonymMapper 118 { 119 public: CSynonymMapper(CFeaturePropagator * e)120 CSynonymMapper(CFeaturePropagator *e) : m_e(e) {} ~CSynonymMapper(void)121 virtual ~CSynonymMapper(void) {} GetBestSynonym(const CSeq_id & id)122 virtual CSeq_id_Handle GetBestSynonym(const CSeq_id& id) {return sequence::GetId(m_e->m_Scope.GetBioseqHandle(id), sequence::eGetId_Best);} 123 private: 124 CFeaturePropagator* m_e; 125 }; 126 CSynonymMapper m_synonym_mapper; 127 }; 128 129 END_SCOPE(edit) 130 END_SCOPE(objects) 131 END_NCBI_SCOPE 132 133 #endif 134 // _EDIT_FEATURE_PROPAGATE__HPP_ 135