1 /* $Id: splice_problems.hpp 566666 2018-07-05 10:48:05Z bollin $ 2 * =========================================================================== 3 * 4 * PUBLIC DOMAIN NOTICE 5 * National Center for Biotechnology Information 6 * 7 * This software/database is a "United States Government Work" under the 8 * terms of the United States Copyright Act. It was written as part of 9 * the author's official duties as a United States Government employee and 10 * thus cannot be copyrighted. This software/database is freely available 11 * to the public for use. The National Library of Medicine and the U.S. 12 * Government have not placed any restriction on its use or reproduction. 13 * 14 * Although all reasonable efforts have been taken to ensure the accuracy 15 * and reliability of the software and data, the NLM and the U.S. 16 * Government do not and cannot warrant the performance or results that 17 * may be obtained by using this software or data. The NLM and the U.S. 18 * Government disclaim all warranties, express or implied, including 19 * warranties of performance, merchantability or fitness for any particular 20 * purpose. 21 * 22 * Please cite the author in any work or product based on this material. 23 * 24 * =========================================================================== 25 *` 26 * Author: Colleen Bollin 27 * 28 * File Description: 29 * For validating splice sites 30 * ....... 31 * 32 */ 33 34 #ifndef VALIDATOR___SPLICE_PROBLEMS__HPP 35 #define VALIDATOR___SPLICE_PROBLEMS__HPP 36 37 #include <corelib/ncbistd.hpp> 38 #include <corelib/ncbi_autoinit.hpp> 39 40 #include <objmgr/scope.hpp> 41 #include <objects/seqfeat/Seq_feat.hpp> 42 #include <objects/seqfeat/SeqFeatData.hpp> 43 44 #include <objmgr/util/feature.hpp> 45 46 BEGIN_NCBI_SCOPE 47 BEGIN_SCOPE(objects) 48 49 class CSeq_entry; 50 class CCit_sub; 51 class CCit_art; 52 class CCit_gen; 53 class CSeq_feat; 54 class CBioseq; 55 class CSeqdesc; 56 class CSeq_annot; 57 class CTrna_ext; 58 class CProt_ref; 59 class CSeq_loc; 60 class CFeat_CI; 61 class CPub_set; 62 class CAuth_list; 63 class CTitle; 64 class CMolInfo; 65 class CUser_object; 66 class CSeqdesc_CI; 67 class CDense_diag; 68 class CDense_seg; 69 class CSeq_align_set; 70 class CPubdesc; 71 class CBioSource; 72 class COrg_ref; 73 class CDelta_seq; 74 class CGene_ref; 75 class CCdregion; 76 class CRNA_ref; 77 class CImp_feat; 78 class CSeq_literal; 79 class CBioseq_Handle; 80 class CSeq_feat_Handle; 81 class CCountries; 82 class CInferencePrefixList; 83 class CComment_set; 84 class CTaxon3_reply; 85 class ITaxon3; 86 class CT3Error; 87 88 BEGIN_SCOPE(validator) 89 90 class CValidError_imp; 91 class CTaxValidationAndCleanup; 92 class CGeneCache; 93 class CValidError_base; 94 95 typedef Char(&TSpliceSite)[2]; 96 97 // ============================= Validate SeqFeat ============================ 98 99 100 101 class NCBI_VALIDATOR_EXPORT CSpliceProblems { 102 public: CSpliceProblems()103 CSpliceProblems() : m_ExceptionUnnecessary(false), m_ErrorsNotExpected(false) {}; ~CSpliceProblems()104 ~CSpliceProblems() {}; 105 106 void CalculateSpliceProblems(const CSeq_feat& feat, bool check_all, bool pseudo, CBioseq_Handle loc_handle); 107 108 // first is problem flags, second is position 109 typedef pair<size_t, TSeqPos> TSpliceProblem; 110 typedef vector<TSpliceProblem> TSpliceProblemList; 111 112 typedef enum { 113 eSpliceSiteRead_OK = 0, 114 eSpliceSiteRead_BadSeq, 115 eSpliceSiteRead_Gap, 116 eSpliceSiteRead_OutOfRange, 117 eSpliceSiteRead_WrongNT 118 } ESpliceSiteRead; 119 120 bool SpliceSitesHaveErrors(); IsExceptionUnnecessary() const121 bool IsExceptionUnnecessary() const { return m_ExceptionUnnecessary; } AreErrorsUnexpected() const122 bool AreErrorsUnexpected() const { return m_ErrorsNotExpected; } GetDonorProblems() const123 const TSpliceProblemList& GetDonorProblems() const { return m_DonorProblems; } GetAcceptorProblems() const124 const TSpliceProblemList& GetAcceptorProblems() const { return m_AcceptorProblems; } 125 126 private: 127 TSpliceProblemList m_DonorProblems; 128 TSpliceProblemList m_AcceptorProblems; 129 bool m_ExceptionUnnecessary; 130 bool m_ErrorsNotExpected; 131 132 void ValidateSpliceCdregion(const CSeq_feat& feat, const CBioseq_Handle& bsh, ENa_strand strand); 133 void ValidateSpliceMrna(const CSeq_feat& feat, const CBioseq_Handle& bsh, ENa_strand strand); 134 void ValidateSpliceExon(const CSeq_feat& feat, const CBioseq_Handle& bsh, ENa_strand strand); 135 void ValidateDonorAcceptorPair(ENa_strand strand, TSeqPos stop, const CSeqVector& vec_donor, TSeqPos seq_len_donor, 136 TSeqPos start, const CSeqVector& vec_acceptor, TSeqPos seq_len_acceptor); 137 138 139 ESpliceSiteRead ReadDonorSpliceSite(ENa_strand strand, TSeqPos stop, const CSeqVector& vec, TSeqPos seq_len, TSpliceSite& site); 140 ESpliceSiteRead ReadDonorSpliceSite(ENa_strand strand, TSeqPos stop, const CSeqVector& vec, TSeqPos seq_len); 141 ESpliceSiteRead ReadAcceptorSpliceSite(ENa_strand strand, TSeqPos start, const CSeqVector& vec, TSeqPos seq_len, TSpliceSite& site); 142 ESpliceSiteRead ReadAcceptorSpliceSite(ENa_strand strand, TSeqPos start, const CSeqVector& vec, TSeqPos seq_len); 143 }; 144 145 146 const string kSpliceSiteGTAG = "GT-AG"; 147 const string kSpliceSiteGCAG = "GC-AG"; 148 const string kSpliceSiteATAC = "AT-AC"; 149 const string kSpliceSiteGT = "GT"; 150 const string kSpliceSiteGC = "GC"; 151 const string kSpliceSiteAG = "AG"; 152 153 typedef Char const (&TConstSpliceSite)[2]; 154 155 bool CheckAdjacentSpliceSites(const string& signature, ENa_strand strand, TConstSpliceSite donor, TConstSpliceSite acceptor); 156 bool CheckSpliceSite(const string& signature, ENa_strand strand, TConstSpliceSite site); 157 bool CheckIntronSpliceSites(ENa_strand strand, TConstSpliceSite donor, TConstSpliceSite acceptor); 158 bool CheckIntronDonor(ENa_strand strand, TConstSpliceSite donor); 159 bool CheckIntronAcceptor(ENa_strand strand, TConstSpliceSite acceptor); 160 161 162 163 164 END_SCOPE(validator) 165 END_SCOPE(objects) 166 END_NCBI_SCOPE 167 168 #endif /* VALIDATOR___SPLICE_PROBLEMS__HPP */ 169