1 /* $Id: translation_problems.hpp 575890 2018-12-05 21:35:58Z kans $ 2 * =========================================================================== 3 * 4 * PUBLIC DOMAIN NOTICE 5 * National Center for Biotechnology Information 6 * 7 * This software/database is a "United States Government Work" under the 8 * terms of the United States Copyright Act. It was written as part of 9 * the author's official duties as a United States Government employee and 10 * thus cannot be copyrighted. This software/database is freely available 11 * to the public for use. The National Library of Medicine and the U.S. 12 * Government have not placed any restriction on its use or reproduction. 13 * 14 * Although all reasonable efforts have been taken to ensure the accuracy 15 * and reliability of the software and data, the NLM and the U.S. 16 * Government do not and cannot warrant the performance or results that 17 * may be obtained by using this software or data. The NLM and the U.S. 18 * Government disclaim all warranties, express or implied, including 19 * warranties of performance, merchantability or fitness for any particular 20 * purpose. 21 * 22 * Please cite the author in any work or product based on this material. 23 * 24 * =========================================================================== 25 *` 26 * Author: Colleen Bollin 27 * 28 * File Description: 29 * For detecting translation problems 30 * ....... 31 * 32 */ 33 34 #ifndef VALIDATOR___TRANSLATION_PROBLEMS__HPP 35 #define VALIDATOR___TRANSLATION_PROBLEMS__HPP 36 37 #include <corelib/ncbistd.hpp> 38 #include <corelib/ncbi_autoinit.hpp> 39 40 #include <objmgr/scope.hpp> 41 #include <objects/seqfeat/Seq_feat.hpp> 42 43 BEGIN_NCBI_SCOPE 44 BEGIN_SCOPE(objects) 45 46 class CSeq_feat; 47 class CBioseq_Handle; 48 49 BEGIN_SCOPE(validator) 50 51 typedef Char(&TSpliceSite)[2]; 52 53 // ============================= Validate SeqFeat ============================ 54 55 56 class NCBI_VALIDATOR_EXPORT CCDSTranslationProblems { 57 public: 58 CCDSTranslationProblems(); ~CCDSTranslationProblems()59 ~CCDSTranslationProblems() {}; 60 61 void CalculateTranslationProblems 62 (const CSeq_feat& feat, 63 CBioseq_Handle loc_handle, 64 CBioseq_Handle prot_handle, 65 bool ignore_exceptions, 66 bool far_fetch_cds, 67 bool standalone_annot, 68 bool single_seq, 69 bool is_gpipe, 70 bool is_genomic, 71 bool is_refseq, 72 bool is_nt_or_ng_or_nw, 73 bool is_nc, 74 bool has_accession, 75 CScope* scope); 76 77 typedef enum { 78 eCDSTranslationProblem_FrameNotPartial = 1, 79 eCDSTranslationProblem_FrameNotConsensus = 2, 80 eCDSTranslationProblem_NoStop = 4, 81 eCDSTranslationProblem_StopPartial = 8, 82 eCDSTranslationProblem_PastStop = 16, 83 eCDSTranslationProblem_ShouldStartPartial = 32, 84 eCDSTranslationProblem_Mismatches = 64, 85 eCDSTranslationProblem_BadStart = 128, 86 eCDSTranslationProblem_TooManyX = 256, 87 eCDSTranslationProblem_UnableToFetch = 512, 88 eCDSTranslationProblem_NoProtein = 1024, 89 eCDSTranslationProblem_ShouldBePartialButIsnt = 2048, 90 eCDSTranslationProblem_ShouldNotBePartialButIs = 4096, 91 eCDSTranslationProblem_UnnecessaryException = 8192, 92 eCDSTranslationProblem_UnqualifiedException = 16384, 93 eCDSTranslationProblem_ErroneousException = 32768, 94 95 } ECDSTranslationProblem; 96 GetTranslationProblemFlags() const97 size_t GetTranslationProblemFlags() const { return m_ProblemFlags; } 98 99 typedef enum { 100 eTranslExceptPhase = 0, 101 eTranslExceptSuspicious, 102 eTranslExceptUnnecessary, 103 eTranslExceptUnexpected 104 } ETranslExceptType; 105 typedef struct { 106 ETranslExceptType problem; 107 unsigned char ex; 108 size_t prot_pos; 109 } STranslExceptProblem; 110 typedef vector<STranslExceptProblem> TTranslExceptProblems; 111 GetTranslExceptProblems() const112 const TTranslExceptProblems& GetTranslExceptProblems() const 113 { 114 return m_TranslExceptProblems; 115 } 116 117 typedef struct { 118 unsigned char prot_res; 119 unsigned char transl_res; 120 TSeqPos pos; 121 } STranslationMismatch; 122 typedef vector<STranslationMismatch> TTranslationMismatches; 123 GetTranslationMismatches() const124 const TTranslationMismatches& GetTranslationMismatches() const { return m_Mismatches; } 125 HasException() const126 bool HasException() const { return m_HasException; } UnableToTranslate() const127 bool UnableToTranslate() const { return m_UnableToTranslate; } HasUnparsedTranslExcept() const128 bool HasUnparsedTranslExcept() const { return m_UnparsedTranslExcept; } GetInternalStopCodons() const129 size_t GetInternalStopCodons() const { return m_InternalStopCodons; } GetNumNonsenseIntrons() const130 size_t GetNumNonsenseIntrons() const { return m_NumNonsenseIntrons; } AltStart() const131 bool AltStart() const { return m_AltStart; } GetTranslStartCharacter() const132 char GetTranslStartCharacter() const { return m_TranslStart; } GetRaggedLength() const133 int GetRaggedLength() const { return m_RaggedLength; } GetProtLen() const134 size_t GetProtLen() const { return m_ProtLen; } GetTransLen() const135 size_t GetTransLen() const { return m_TransLen; } GetTranslTerminalX() const136 size_t GetTranslTerminalX() const { return m_TranslTerminalX; } GetProdTerminalX() const137 size_t GetProdTerminalX() const { return m_ProdTerminalX; } 138 139 static vector<CRef<CSeq_loc> > GetNonsenseIntrons(const CSeq_feat& feat, CScope& scope); 140 141 private: 142 143 size_t m_ProblemFlags; 144 int m_RaggedLength; 145 bool m_HasDashXStart; 146 size_t m_ProtLen; 147 size_t m_TransLen; 148 TTranslationMismatches m_Mismatches; 149 char m_TranslStart; 150 size_t m_InternalStopCodons; 151 bool m_UnableToTranslate; 152 size_t m_TranslTerminalX; 153 size_t m_ProdTerminalX; 154 bool m_UnparsedTranslExcept; 155 bool m_AltStart; 156 size_t m_NumNonsenseIntrons; 157 TTranslExceptProblems m_TranslExceptProblems; 158 bool m_HasException; 159 160 void x_Reset(); 161 162 TTranslExceptProblems x_GetTranslExceptProblems(const CSeq_feat& feat, CBioseq_Handle loc_handle, CScope* scope, bool is_refseq); 163 164 static size_t x_CountNonsenseIntrons(const CSeq_feat& feat, CScope* scope); 165 static bool x_ProteinHasTooManyXs(const string& transl_prot); 166 167 static bool x_IsThreeBaseNonsense(const CSeq_feat& feat, 168 const CSeq_id& id, 169 const CCdregion& cdr, 170 TSeqPos start, 171 TSeqPos stop, 172 ENa_strand strand, 173 CScope *scope); 174 175 static void x_GetExceptionFlags 176 (const string& except_text, 177 bool& unclassified_except, 178 bool& mismatch_except, 179 bool& frameshift_except, 180 bool& rearrange_except, 181 bool& product_replaced, 182 bool& mixed_population, 183 bool& low_quality, 184 bool& rna_editing, 185 bool& transcript_or_proteomic); 186 187 static size_t x_CheckCDSFrame(const CSeq_feat& feat, CScope* scope); 188 189 static bool x_Is5AtEndSpliceSiteOrGap(const CSeq_loc& loc, CScope& scope); 190 191 static size_t x_CountTerminalXs(const string& transl_prot, bool skip_stop); 192 static size_t x_CountTerminalXs(const CSeqVector& prot_vec); 193 194 void x_GetCdTransErrors(const CSeq_feat& feat, CBioseq_Handle product, bool show_stop, bool got_stop, CScope* scope); 195 196 static TTranslationMismatches x_GetTranslationMismatches(const CSeq_feat& feat, 197 const CSeqVector& prot_vec, 198 const string& transl_prot, 199 bool has_accession); 200 201 static bool x_JustifiesException(const TTranslExceptProblems& problems); 202 bool x_JustifiesException() const; 203 204 static int x_CheckForRaggedEnd(const CSeq_feat& feat, CScope* scope); 205 static int x_CheckForRaggedEnd(const CSeq_loc&, const CCdregion& cdr, CScope* scope); 206 }; 207 208 209 typedef enum { 210 eMRNAProblem_TransFail = 1, 211 eMRNAProblem_UnableToFetch = 2, 212 eMRNAProblem_TranscriptLenLess = 4, 213 eMRNAProblem_PolyATail100 = 8, 214 eMRNAProblem_PolyATail95 = 16, 215 eMRNAProblem_TranscriptLenMore = 32, 216 eMRNAProblem_Mismatch = 64, 217 eMRNAProblem_UnnecessaryException = 128, 218 eMRNAProblem_ErroneousException = 256, 219 eMRNAProblem_ProductReplaced = 512 220 } EMRNAProblem; 221 222 size_t GetMRNATranslationProblems 223 (const CSeq_feat& feat, 224 size_t& mismatches, 225 bool ignore_exceptions, 226 CBioseq_Handle nuc, 227 CBioseq_Handle rna, 228 bool far_fetch, 229 bool is_gpipe, 230 bool is_genomic, 231 CScope* scope); 232 233 END_SCOPE(validator) 234 END_SCOPE(objects) 235 END_NCBI_SCOPE 236 237 #endif /* VALIDATOR___TRANSLATION_PROBLEMS__HPP */ 238