1 /*  $Id: translation_problems.hpp 575890 2018-12-05 21:35:58Z kans $
2  * ===========================================================================
3  *
4  *                            PUBLIC DOMAIN NOTICE
5  *               National Center for Biotechnology Information
6  *
7  *  This software/database is a "United States Government Work" under the
8  *  terms of the United States Copyright Act.  It was written as part of
9  *  the author's official duties as a United States Government employee and
10  *  thus cannot be copyrighted.  This software/database is freely available
11  *  to the public for use. The National Library of Medicine and the U.S.
12  *  Government have not placed any restriction on its use or reproduction.
13  *
14  *  Although all reasonable efforts have been taken to ensure the accuracy
15  *  and reliability of the software and data, the NLM and the U.S.
16  *  Government do not and cannot warrant the performance or results that
17  *  may be obtained by using this software or data. The NLM and the U.S.
18  *  Government disclaim all warranties, express or implied, including
19  *  warranties of performance, merchantability or fitness for any particular
20  *  purpose.
21  *
22  *  Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *`
26  * Author:  Colleen Bollin
27  *
28  * File Description:
29  *   For detecting translation problems
30  *   .......
31  *
32  */
33 
34 #ifndef VALIDATOR___TRANSLATION_PROBLEMS__HPP
35 #define VALIDATOR___TRANSLATION_PROBLEMS__HPP
36 
37 #include <corelib/ncbistd.hpp>
38 #include <corelib/ncbi_autoinit.hpp>
39 
40 #include <objmgr/scope.hpp>
41 #include <objects/seqfeat/Seq_feat.hpp>
42 
43 BEGIN_NCBI_SCOPE
44 BEGIN_SCOPE(objects)
45 
46 class CSeq_feat;
47 class CBioseq_Handle;
48 
49 BEGIN_SCOPE(validator)
50 
51 typedef Char(&TSpliceSite)[2];
52 
53 // =============================  Validate SeqFeat  ============================
54 
55 
56 class NCBI_VALIDATOR_EXPORT CCDSTranslationProblems {
57 public:
58     CCDSTranslationProblems();
~CCDSTranslationProblems()59     ~CCDSTranslationProblems() {};
60 
61     void CalculateTranslationProblems
62         (const CSeq_feat& feat,
63         CBioseq_Handle loc_handle,
64         CBioseq_Handle prot_handle,
65         bool ignore_exceptions,
66         bool far_fetch_cds,
67         bool standalone_annot,
68         bool single_seq,
69         bool is_gpipe,
70         bool is_genomic,
71         bool is_refseq,
72         bool is_nt_or_ng_or_nw,
73         bool is_nc,
74         bool has_accession,
75         CScope* scope);
76 
77     typedef enum {
78         eCDSTranslationProblem_FrameNotPartial = 1,
79         eCDSTranslationProblem_FrameNotConsensus = 2,
80         eCDSTranslationProblem_NoStop = 4,
81         eCDSTranslationProblem_StopPartial = 8,
82         eCDSTranslationProblem_PastStop = 16,
83         eCDSTranslationProblem_ShouldStartPartial = 32,
84         eCDSTranslationProblem_Mismatches = 64,
85         eCDSTranslationProblem_BadStart = 128,
86         eCDSTranslationProblem_TooManyX = 256,
87         eCDSTranslationProblem_UnableToFetch = 512,
88         eCDSTranslationProblem_NoProtein = 1024,
89         eCDSTranslationProblem_ShouldBePartialButIsnt = 2048,
90         eCDSTranslationProblem_ShouldNotBePartialButIs = 4096,
91         eCDSTranslationProblem_UnnecessaryException = 8192,
92         eCDSTranslationProblem_UnqualifiedException = 16384,
93         eCDSTranslationProblem_ErroneousException = 32768,
94 
95     } ECDSTranslationProblem;
96 
GetTranslationProblemFlags() const97     size_t GetTranslationProblemFlags() const { return m_ProblemFlags; }
98 
99     typedef enum {
100         eTranslExceptPhase = 0,
101         eTranslExceptSuspicious,
102         eTranslExceptUnnecessary,
103         eTranslExceptUnexpected
104     } ETranslExceptType;
105     typedef struct {
106         ETranslExceptType problem;
107         unsigned char ex;
108         size_t prot_pos;
109     } STranslExceptProblem;
110     typedef vector<STranslExceptProblem> TTranslExceptProblems;
111 
GetTranslExceptProblems() const112     const TTranslExceptProblems& GetTranslExceptProblems() const
113     {
114         return m_TranslExceptProblems;
115     }
116 
117     typedef struct {
118         unsigned char prot_res;
119         unsigned char transl_res;
120         TSeqPos pos;
121     } STranslationMismatch;
122     typedef vector<STranslationMismatch> TTranslationMismatches;
123 
GetTranslationMismatches() const124     const TTranslationMismatches& GetTranslationMismatches() const { return m_Mismatches; }
125 
HasException() const126     bool HasException() const { return m_HasException; }
UnableToTranslate() const127     bool UnableToTranslate() const { return m_UnableToTranslate; }
HasUnparsedTranslExcept() const128     bool HasUnparsedTranslExcept() const { return m_UnparsedTranslExcept; }
GetInternalStopCodons() const129     size_t GetInternalStopCodons() const { return m_InternalStopCodons; }
GetNumNonsenseIntrons() const130     size_t GetNumNonsenseIntrons() const { return m_NumNonsenseIntrons; }
AltStart() const131     bool AltStart() const { return m_AltStart; }
GetTranslStartCharacter() const132     char GetTranslStartCharacter() const { return m_TranslStart; }
GetRaggedLength() const133     int GetRaggedLength() const { return m_RaggedLength; }
GetProtLen() const134     size_t GetProtLen() const { return m_ProtLen; }
GetTransLen() const135     size_t GetTransLen() const { return m_TransLen; }
GetTranslTerminalX() const136     size_t GetTranslTerminalX() const { return m_TranslTerminalX; }
GetProdTerminalX() const137     size_t GetProdTerminalX() const { return m_ProdTerminalX; }
138 
139     static vector<CRef<CSeq_loc> > GetNonsenseIntrons(const CSeq_feat& feat, CScope& scope);
140 
141 private:
142 
143     size_t m_ProblemFlags;
144     int    m_RaggedLength;
145     bool   m_HasDashXStart;
146     size_t m_ProtLen;
147     size_t m_TransLen;
148     TTranslationMismatches m_Mismatches;
149     char   m_TranslStart;
150     size_t m_InternalStopCodons;
151     bool   m_UnableToTranslate;
152     size_t m_TranslTerminalX;
153     size_t m_ProdTerminalX;
154     bool   m_UnparsedTranslExcept;
155     bool   m_AltStart;
156     size_t m_NumNonsenseIntrons;
157     TTranslExceptProblems m_TranslExceptProblems;
158     bool   m_HasException;
159 
160     void x_Reset();
161 
162     TTranslExceptProblems x_GetTranslExceptProblems(const CSeq_feat& feat, CBioseq_Handle loc_handle, CScope* scope, bool is_refseq);
163 
164     static size_t x_CountNonsenseIntrons(const CSeq_feat& feat, CScope* scope);
165     static bool x_ProteinHasTooManyXs(const string& transl_prot);
166 
167     static bool x_IsThreeBaseNonsense(const CSeq_feat& feat,
168         const CSeq_id& id,
169         const CCdregion& cdr,
170         TSeqPos start,
171         TSeqPos stop,
172         ENa_strand strand,
173         CScope *scope);
174 
175     static void x_GetExceptionFlags
176         (const string& except_text,
177         bool& unclassified_except,
178         bool& mismatch_except,
179         bool& frameshift_except,
180         bool& rearrange_except,
181         bool& product_replaced,
182         bool& mixed_population,
183         bool& low_quality,
184         bool& rna_editing,
185         bool& transcript_or_proteomic);
186 
187     static size_t x_CheckCDSFrame(const CSeq_feat& feat, CScope* scope);
188 
189     static bool x_Is5AtEndSpliceSiteOrGap(const CSeq_loc& loc, CScope& scope);
190 
191     static size_t x_CountTerminalXs(const string& transl_prot, bool skip_stop);
192     static size_t x_CountTerminalXs(const CSeqVector& prot_vec);
193 
194     void x_GetCdTransErrors(const CSeq_feat& feat, CBioseq_Handle product, bool show_stop, bool got_stop, CScope* scope);
195 
196     static TTranslationMismatches x_GetTranslationMismatches(const CSeq_feat& feat,
197         const CSeqVector& prot_vec,
198         const string& transl_prot,
199         bool has_accession);
200 
201     static bool x_JustifiesException(const TTranslExceptProblems& problems);
202     bool x_JustifiesException() const;
203 
204     static int x_CheckForRaggedEnd(const CSeq_feat& feat, CScope* scope);
205     static int x_CheckForRaggedEnd(const CSeq_loc&, const CCdregion& cdr, CScope* scope);
206 };
207 
208 
209 typedef enum {
210     eMRNAProblem_TransFail = 1,
211     eMRNAProblem_UnableToFetch = 2,
212     eMRNAProblem_TranscriptLenLess = 4,
213     eMRNAProblem_PolyATail100 = 8,
214     eMRNAProblem_PolyATail95 = 16,
215     eMRNAProblem_TranscriptLenMore = 32,
216     eMRNAProblem_Mismatch = 64,
217     eMRNAProblem_UnnecessaryException = 128,
218     eMRNAProblem_ErroneousException = 256,
219     eMRNAProblem_ProductReplaced = 512
220 } EMRNAProblem;
221 
222 size_t GetMRNATranslationProblems
223 (const CSeq_feat& feat,
224 size_t& mismatches,
225 bool ignore_exceptions,
226 CBioseq_Handle nuc,
227 CBioseq_Handle rna,
228 bool far_fetch,
229 bool is_gpipe,
230 bool is_genomic,
231 CScope* scope);
232 
233 END_SCOPE(validator)
234 END_SCOPE(objects)
235 END_NCBI_SCOPE
236 
237 #endif  /* VALIDATOR___TRANSLATION_PROBLEMS__HPP */
238