1 #ifndef EDIT___GAP_TRIM__HPP
2 #define EDIT___GAP_TRIM__HPP
3 
4 /*  $Id: gap_trim.hpp 632623 2021-06-03 17:38:11Z ivanov $
5  * ===========================================================================
6  *
7  *                            PUBLIC DOMAIN NOTICE
8  *               National Center for Biotechnology Information
9  *
10  *  This software/database is a "United States Government Work" under the
11  *  terms of the United States Copyright Act.  It was written as part of
12  *  the author's official duties as a United States Government employee and
13  *  thus cannot be copyrighted.  This software/database is freely available
14  *  to the public for use. The National Library of Medicine and the U.S.
15  *  Government have not placed any restriction on its use or reproduction.
16  *
17  *  Although all reasonable efforts have been taken to ensure the accuracy
18  *  and reliability of the software and data, the NLM and the U.S.
19  *  Government do not and cannot warrant the performance or results that
20  *  may be obtained by using this software or data. The NLM and the U.S.
21  *  Government disclaim all warranties, express or implied, including
22  *  warranties of performance, merchantability or fitness for any particular
23  *  purpose.
24  *
25  *  Please cite the author in any work or product based on this material.
26  *
27  * ===========================================================================
28  *
29  * Author:  Colleen Bollin
30  *
31  * File Description:
32  *   Adjusting features for gaps
33  *   .......
34  *
35  */
36 
37 #include <corelib/ncbistd.hpp>
38 
39 #include <objmgr/scope.hpp>
40 #include <objects/seqloc/Seq_loc.hpp>
41 #include <objects/seqfeat/Cdregion.hpp>
42 #include <objects/general/Object_id.hpp>
43 #include <objmgr/seq_feat_handle.hpp>
44 
45 
46 BEGIN_NCBI_SCOPE
47 BEGIN_SCOPE(objects)
48 
49 class CSeq_entry;
50 class CBioseq;
51 class CBioseq_set;
52 class CSeq_annot;
53 class CSeq_feat;
54 
55 
56 
57 class CSeq_entry_Handle;
58 class CBioseq_Handle;
59 class CBioseq_set_Handle;
60 class CSeq_annot_Handle;
61 class CSeq_feat_Handle;
62 
63 BEGIN_SCOPE(edit)
64 
65 class NCBI_XOBJEDIT_EXPORT CFeatGapInfo : public CObject {
66 public:
CFeatGapInfo()67     CFeatGapInfo() {};
68     CFeatGapInfo(CSeq_feat_Handle sf);
~CFeatGapInfo()69     ~CFeatGapInfo() {};
70 
71     void CollectGaps(const CSeq_loc& feat_loc, CScope& scope);
72     void CalculateRelevantIntervals(bool unknown_length, bool known_length, bool ns = false);
HasKnown() const73     bool HasKnown() const { return m_Known; };
HasUnknown() const74     bool HasUnknown() const { return m_Unknown; };
HasNs() const75     bool HasNs() const { return m_Ns; };
76 
77     bool Trimmable() const;
78     bool Splittable() const;
79     bool ShouldRemove() const;
80 
81     void Trim(CSeq_loc& loc, bool make_partial, CScope& scope);
82     typedef vector< CRef<CSeq_loc> > TLocList;
83     TLocList Split(const CSeq_loc& orig, bool in_intron, bool make_partial);
84 
85     vector<CRef<CSeq_feat> > AdjustForRelevantGapIntervals(bool make_partial, bool trim, bool split, bool in_intron, bool create_general_only = false);
GetFeature() const86     CSeq_feat_Handle GetFeature() const { return m_Feature; };
87 
88     static CRef<CBioseq> AdjustProteinSeq(const CBioseq& seq, const CSeq_feat& feat, const CSeq_feat& orig_cds, CScope& scope);
89 
90     bool IsRelatedByCrossRef(const CFeatGapInfo& other) const;
91 
92 protected:
93     typedef enum {
94         eGapIntervalType_unknown = 0,
95         eGapIntervalType_known,
96         eGapIntervalType_n
97     } EGapIntervalType;
98 
99     typedef pair<EGapIntervalType, pair<size_t, size_t> > TGapInterval;
100     typedef vector<TGapInterval> TGapIntervalList;
101     TGapIntervalList m_Gaps;
102 
103     typedef vector<pair<size_t, size_t> > TIntervalList;
104     TIntervalList m_InsideGaps;
105     TIntervalList m_LeftGaps;
106     TIntervalList m_RightGaps;
107 
108     TSeqPos m_Start;
109     TSeqPos m_Stop;
110 
111     bool m_Known;
112     bool m_Unknown;
113     bool m_Ns;
114 
115     CSeq_feat_Handle m_Feature;
116 
117     void x_AdjustOrigLabel(CSeq_feat& feat, size_t& id_offset, string& id_label, const string& qual);
118     static void x_AdjustFrame(CCdregion& cdregion, TSeqPos frame_adjust);
119     void x_AdjustCodebreaks(CSeq_feat& feat);
120     void x_AdjustAnticodons(CSeq_feat& feat);
121     bool x_UsableInterval(const TGapInterval& interval, bool unknown_length, bool known_length, bool ns);
122 };
123 
124 typedef vector<CRef<CFeatGapInfo> > TGappedFeatList;
125 NCBI_XOBJEDIT_EXPORT
126 TGappedFeatList ListGappedFeatures(CFeat_CI& feat_it, CScope& scope);
127 
128 NCBI_XOBJEDIT_EXPORT
129 void ProcessForTrimAndSplitUpdates(CSeq_feat_Handle cds, vector<CRef<CSeq_feat> > updates);
130 
131 // for adjusting feature IDs after splitting
132 
133 // for fixing a list of features from a split: pass in entire list, feature IDs will be changed
134 // for features after the first starting with the value of next_id, which will be increased
135 // after each use
136 NCBI_XOBJEDIT_EXPORT
137 void FixFeatureIdsForUpdates(vector<CRef<CSeq_feat> > updates, objects::CObject_id::TId& next_id);
138 
139 // adjust feature ID for a single feature (will increase next_id if feature had an ID to be adjusted)
140 NCBI_XOBJEDIT_EXPORT
141 void FixFeatureIdsForUpdates(CSeq_feat& feat, CObject_id::TId& next_id);
142 
143 // adjust two lists of features, that are the result of splitting two features that had cross-references
144 // to each other. Both features must have been split into the same number of pieces.
145 // The new cross-references pair the elements of the list in order.
146 NCBI_XOBJEDIT_EXPORT
147 void FixFeatureIdsForUpdatePair(vector<CRef<CSeq_feat> >& updates1, vector<CRef<CSeq_feat> >& updates2);
148 
149 
150 END_SCOPE(edit)
151 END_SCOPE(objects)
152 END_NCBI_SCOPE
153 
154 #endif  /* EDIT___GAP_TRIM__HPP */
155