1 #ifndef EDIT___GAP_TRIM__HPP 2 #define EDIT___GAP_TRIM__HPP 3 4 /* $Id: gap_trim.hpp 632623 2021-06-03 17:38:11Z ivanov $ 5 * =========================================================================== 6 * 7 * PUBLIC DOMAIN NOTICE 8 * National Center for Biotechnology Information 9 * 10 * This software/database is a "United States Government Work" under the 11 * terms of the United States Copyright Act. It was written as part of 12 * the author's official duties as a United States Government employee and 13 * thus cannot be copyrighted. This software/database is freely available 14 * to the public for use. The National Library of Medicine and the U.S. 15 * Government have not placed any restriction on its use or reproduction. 16 * 17 * Although all reasonable efforts have been taken to ensure the accuracy 18 * and reliability of the software and data, the NLM and the U.S. 19 * Government do not and cannot warrant the performance or results that 20 * may be obtained by using this software or data. The NLM and the U.S. 21 * Government disclaim all warranties, express or implied, including 22 * warranties of performance, merchantability or fitness for any particular 23 * purpose. 24 * 25 * Please cite the author in any work or product based on this material. 26 * 27 * =========================================================================== 28 * 29 * Author: Colleen Bollin 30 * 31 * File Description: 32 * Adjusting features for gaps 33 * ....... 34 * 35 */ 36 37 #include <corelib/ncbistd.hpp> 38 39 #include <objmgr/scope.hpp> 40 #include <objects/seqloc/Seq_loc.hpp> 41 #include <objects/seqfeat/Cdregion.hpp> 42 #include <objects/general/Object_id.hpp> 43 #include <objmgr/seq_feat_handle.hpp> 44 45 46 BEGIN_NCBI_SCOPE 47 BEGIN_SCOPE(objects) 48 49 class CSeq_entry; 50 class CBioseq; 51 class CBioseq_set; 52 class CSeq_annot; 53 class CSeq_feat; 54 55 56 57 class CSeq_entry_Handle; 58 class CBioseq_Handle; 59 class CBioseq_set_Handle; 60 class CSeq_annot_Handle; 61 class CSeq_feat_Handle; 62 63 BEGIN_SCOPE(edit) 64 65 class NCBI_XOBJEDIT_EXPORT CFeatGapInfo : public CObject { 66 public: CFeatGapInfo()67 CFeatGapInfo() {}; 68 CFeatGapInfo(CSeq_feat_Handle sf); ~CFeatGapInfo()69 ~CFeatGapInfo() {}; 70 71 void CollectGaps(const CSeq_loc& feat_loc, CScope& scope); 72 void CalculateRelevantIntervals(bool unknown_length, bool known_length, bool ns = false); HasKnown() const73 bool HasKnown() const { return m_Known; }; HasUnknown() const74 bool HasUnknown() const { return m_Unknown; }; HasNs() const75 bool HasNs() const { return m_Ns; }; 76 77 bool Trimmable() const; 78 bool Splittable() const; 79 bool ShouldRemove() const; 80 81 void Trim(CSeq_loc& loc, bool make_partial, CScope& scope); 82 typedef vector< CRef<CSeq_loc> > TLocList; 83 TLocList Split(const CSeq_loc& orig, bool in_intron, bool make_partial); 84 85 vector<CRef<CSeq_feat> > AdjustForRelevantGapIntervals(bool make_partial, bool trim, bool split, bool in_intron, bool create_general_only = false); GetFeature() const86 CSeq_feat_Handle GetFeature() const { return m_Feature; }; 87 88 static CRef<CBioseq> AdjustProteinSeq(const CBioseq& seq, const CSeq_feat& feat, const CSeq_feat& orig_cds, CScope& scope); 89 90 bool IsRelatedByCrossRef(const CFeatGapInfo& other) const; 91 92 protected: 93 typedef enum { 94 eGapIntervalType_unknown = 0, 95 eGapIntervalType_known, 96 eGapIntervalType_n 97 } EGapIntervalType; 98 99 typedef pair<EGapIntervalType, pair<size_t, size_t> > TGapInterval; 100 typedef vector<TGapInterval> TGapIntervalList; 101 TGapIntervalList m_Gaps; 102 103 typedef vector<pair<size_t, size_t> > TIntervalList; 104 TIntervalList m_InsideGaps; 105 TIntervalList m_LeftGaps; 106 TIntervalList m_RightGaps; 107 108 TSeqPos m_Start; 109 TSeqPos m_Stop; 110 111 bool m_Known; 112 bool m_Unknown; 113 bool m_Ns; 114 115 CSeq_feat_Handle m_Feature; 116 117 void x_AdjustOrigLabel(CSeq_feat& feat, size_t& id_offset, string& id_label, const string& qual); 118 static void x_AdjustFrame(CCdregion& cdregion, TSeqPos frame_adjust); 119 void x_AdjustCodebreaks(CSeq_feat& feat); 120 void x_AdjustAnticodons(CSeq_feat& feat); 121 bool x_UsableInterval(const TGapInterval& interval, bool unknown_length, bool known_length, bool ns); 122 }; 123 124 typedef vector<CRef<CFeatGapInfo> > TGappedFeatList; 125 NCBI_XOBJEDIT_EXPORT 126 TGappedFeatList ListGappedFeatures(CFeat_CI& feat_it, CScope& scope); 127 128 NCBI_XOBJEDIT_EXPORT 129 void ProcessForTrimAndSplitUpdates(CSeq_feat_Handle cds, vector<CRef<CSeq_feat> > updates); 130 131 // for adjusting feature IDs after splitting 132 133 // for fixing a list of features from a split: pass in entire list, feature IDs will be changed 134 // for features after the first starting with the value of next_id, which will be increased 135 // after each use 136 NCBI_XOBJEDIT_EXPORT 137 void FixFeatureIdsForUpdates(vector<CRef<CSeq_feat> > updates, objects::CObject_id::TId& next_id); 138 139 // adjust feature ID for a single feature (will increase next_id if feature had an ID to be adjusted) 140 NCBI_XOBJEDIT_EXPORT 141 void FixFeatureIdsForUpdates(CSeq_feat& feat, CObject_id::TId& next_id); 142 143 // adjust two lists of features, that are the result of splitting two features that had cross-references 144 // to each other. Both features must have been split into the same number of pieces. 145 // The new cross-references pair the elements of the list in order. 146 NCBI_XOBJEDIT_EXPORT 147 void FixFeatureIdsForUpdatePair(vector<CRef<CSeq_feat> >& updates1, vector<CRef<CSeq_feat> >& updates2); 148 149 150 END_SCOPE(edit) 151 END_SCOPE(objects) 152 END_NCBI_SCOPE 153 154 #endif /* EDIT___GAP_TRIM__HPP */ 155