1 /*  $Id: feature_propagate.hpp 632623 2021-06-03 17:38:11Z ivanov $
2  * ===========================================================================
3  *
4  *                            PUBLIC DOMAIN NOTICE
5  *               National Center for Biotechnology Information
6  *
7  *  This software/database is a "United States Government Work" under the
8  *  terms of the United States Copyright Act.  It was written as part of
9  *  the author's official duties as a United States Government employee and
10  *  thus cannot be copyrighted.  This software/database is freely available
11  *  to the public for use. The National Library of Medicine and the U.S.
12  *  Government have not placed any restriction on its use or reproduction.
13  *
14  *  Although all reasonable efforts have been taken to ensure the accuracy
15  *  and reliability of the software and data,  the NLM and the U.S.
16  *  Government do not and cannot warrant the performance or results that
17  *  may be obtained by using this software or data. The NLM and the U.S.
18  *  Government disclaim all warranties,  express or implied,  including
19  *  warranties of performance,  merchantability or fitness for any particular
20  *  purpose.
21  *
22  *  Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors:  Colleen Bollin, Igor Filippov
27  */
28 
29 #ifndef _EDIT_FEATURE_PROPAGATE__HPP_
30 #define _EDIT_FEATURE_PROPAGATE__HPP_
31 
32 #include <corelib/ncbistd.hpp>
33 #include <corelib/ncbiobj.hpp>
34 #include <corelib/ncbi_message.hpp>
35 #include <objects/seqfeat/Seq_feat.hpp>
36 #include <objects/seqloc/Seq_loc.hpp>
37 #include <objects/general/Object_id.hpp>
38 #include <objects/seqloc/Seq_interval.hpp>
39 #include <objects/seqalign/Dense_seg.hpp>
40 #include <objmgr/seq_entry_handle.hpp>
41 #include <objmgr/bioseq_handle.hpp>
42 #include <objmgr/bioseq_ci.hpp>
43 #include <objmgr/scope.hpp>
44 #include <objmgr/util/sequence.hpp>
45 
46 
47 BEGIN_NCBI_SCOPE
48 BEGIN_SCOPE(objects)
49 BEGIN_SCOPE(edit)
50 
51 class NCBI_XOBJEDIT_EXPORT CFeaturePropagator
52 {
53 public:
54     NCBI_DEPRECATED CFeaturePropagator(CBioseq_Handle src, CBioseq_Handle target, const CSeq_align& align,
55         bool stop_at_stop = true, bool cleanup_partials = true, bool merge_abutting = true,
56         CMessageListener_Basic* pMessageListener = 0,
57         CObject_id::TId* feat_id = nullptr);
58     CFeaturePropagator(CBioseq_Handle src, CBioseq_Handle target, const CSeq_align& align,
59                        bool stop_at_stop = true, bool cleanup_partials = true, bool merge_abutting = true, bool expand_over_gaps = true,
60                        CMessageListener_Basic* pMessageListener = 0,
61                        CObject_id::TId* feat_id = nullptr);
~CFeaturePropagator()62     ~CFeaturePropagator() {}
63 
64     CRef<CSeq_feat> Propagate(const objects::CSeq_feat& orig_feat);
65     vector<CRef<CSeq_feat> > PropagateAll();
66     vector<CRef<CSeq_feat> > PropagateAllReportFailures(
67         vector<CConstRef<CSeq_feat> >&);
68 
69     CRef<CSeq_feat> ConstructProteinFeatureForPropagatedCodingRegion(const CSeq_feat& orig_cds, const CSeq_feat& new_cds);
70 
71     /// Propagates a feature list from the source sequence
72     /// The propagated protein feature is stored right after the propagated cds
73     vector<CRef<CSeq_feat>> PropagateFeatureList(const vector<CConstRef<CSeq_feat>>& orig_feats);
74 
75     typedef enum {
76         eFeaturePropagationProblem_None = 0,
77         eFeaturePropagationProblem_FeatureLocation,
78         eFeaturePropagationProblem_CodeBreakLocation,
79         eFeaturePropagationProblem_AnticodonLocation
80     } EGapIntervalType;
81 
82 private:
83    // Prohibit copy constructor and assignment operator
84     CFeaturePropagator(const CFeaturePropagator& value);
85     CFeaturePropagator& operator=(const CFeaturePropagator& value);
86 
87     void x_PropagateCds(CSeq_feat& feat, const CSeq_id& targetId, bool origIsPartialStart);
88     void x_CdsMapCodeBreaks(CSeq_feat& feat, const CSeq_id& targetId);
89     void x_CdsStopAtStopCodon(CSeq_feat& cds);
90     void x_CdsCleanupPartials(CSeq_feat& cds, bool origIsPartialStart);
91 
92     void x_PropagatetRNA(CSeq_feat& feat, const CSeq_id& targetId);
93 
94     CRef<CSeq_loc> x_MapLocation(const CSeq_loc& sourceLoc, const CSeq_id& targetId);
95     TSignedSeqPos SeqPosToAlignPos(TSignedSeqPos pos, CDense_seg::TDim row, bool left, bool &partial5, bool &partial3);
96     TSignedSeqPos AlignPosToSeqPos(TSignedSeqPos pos, CDense_seg::TDim row, bool left, bool &partial5, bool &partial3);
97     CDense_seg::TDim  FindRow(const CSeq_align& align, CBioseq_Handle bsh);
98     CRef<CSeq_loc>  CreateRowSeq_loc(const CSeq_align& align, CDense_seg::TDim  row);
99     bool IsOrdered(const CSeq_loc &loc);
100     CRef<CSeq_loc> MakeOrdered(const CSeq_loc &loc);
101 
102     CRef<CSeq_loc> x_TruncateToStopCodon(const CSeq_loc& loc, unsigned int truncLen);
103     CRef<CSeq_loc> x_ExtendToStopCodon(CSeq_feat& feat);
104 
105     CBioseq_Handle m_Src;
106     CBioseq_Handle m_Target;
107     CConstRef<CSeq_align> m_Alignment;
108     CScope& m_Scope;
109     bool m_CdsStopAtStopCodon;
110     bool m_CdsCleanupPartials;
111     CMessageListener_Basic* m_MessageListener;
112     CObject_id::TId* m_MaxFeatId = nullptr;
113     map<CObject_id::TId, CObject_id::TId> m_FeatIdMap; // map old feat-id to propagated feat-id
114     bool m_MergeAbutting;
115     bool m_ExpandOverGaps;
116 
117     class CSynonymMapper : public ISynonymMapper
118     {
119     public:
CSynonymMapper(CFeaturePropagator * e)120         CSynonymMapper(CFeaturePropagator *e) : m_e(e) {}
~CSynonymMapper(void)121         virtual ~CSynonymMapper(void) {}
GetBestSynonym(const CSeq_id & id)122         virtual CSeq_id_Handle GetBestSynonym(const CSeq_id& id) {return sequence::GetId(m_e->m_Scope.GetBioseqHandle(id), sequence::eGetId_Best);}
123     private:
124         CFeaturePropagator* m_e;
125     };
126     CSynonymMapper m_synonym_mapper;
127 };
128 
129 END_SCOPE(edit)
130 END_SCOPE(objects)
131 END_NCBI_SCOPE
132 
133 #endif
134         // _EDIT_FEATURE_PROPAGATE__HPP_
135