1 #ifndef ALGO_GNOMON___SCORE__HPP 2 #define ALGO_GNOMON___SCORE__HPP 3 4 /* $Id: score.hpp 620759 2020-11-30 16:00:57Z souvorov $ 5 * =========================================================================== 6 * 7 * PUBLIC DOMAIN NOTICE 8 * National Center for Biotechnology Information 9 * 10 * This software/database is a "United States Government Work" under the 11 * terms of the United States Copyright Act. It was written as part of 12 * the author's official duties as a United States Government employee and 13 * thus cannot be copyrighted. This software/database is freely available 14 * to the public for use. The National Library of Medicine and the U.S. 15 * Government have not placed any restriction on its use or reproduction. 16 * 17 * Although all reasonable efforts have been taken to ensure the accuracy 18 * and reliability of the software and data, the NLM and the U.S. 19 * Government do not and cannot warrant the performance or results that 20 * may be obtained by using this software or data. The NLM and the U.S. 21 * Government disclaim all warranties, express or implied, including 22 * warranties of performance, merchantability or fitness for any particular 23 * purpose. 24 * 25 * Please cite the author in any work or product based on this material. 26 * 27 * =========================================================================== 28 * 29 * Authors: Alexandre Souvorov 30 * 31 * File Description: 32 * 33 */ 34 35 #include <corelib/ncbistd.hpp> 36 37 #include <algo/gnomon/chainer.hpp> 38 #include "gnomon_seq.hpp" 39 40 BEGIN_NCBI_SCOPE 41 BEGIN_SCOPE(gnomon) 42 43 class CTerminal; 44 class CCodingRegion; 45 class CNonCodingRegion; 46 class CIntronParameters; 47 class CIntergenicParameters; 48 49 class CSeqScores { 50 public: 51 CSeqScores (const CTerminal& a, const CTerminal& d,const CTerminal& stt, const CTerminal& stp, 52 const CCodingRegion& cr, const CNonCodingRegion& ncr, const CNonCodingRegion& ing, 53 const CIntronParameters& intron_params, 54 TSignedSeqPos from, TSignedSeqPos to, const TGeneModelList& cls, 55 const TInDels& initial_fshifts, double mpp, const CGnomonEngine& gnomon); 56 void Init(CResidueVec& original_sequence, bool leftwall, 57 bool rightwall, double consensuspenalty, 58 const CGnomonAnnotator_Base::TIntMap& notbridgeable_gaps_len, 59 const CGnomonAnnotator_Base::TGgapInfo& ggapinfo 60 ); 61 From() const62 TSignedSeqPos From() const { return m_chunk_start; } To() const63 TSignedSeqPos To() const { return m_chunk_stop; } AcceptorNumber(int strand) const64 int AcceptorNumber(int strand) const { return m_anum[strand]; } DonorNumber(int strand) const65 int DonorNumber(int strand) const { return m_dnum[strand]; } StartNumber(int strand) const66 int StartNumber(int strand) const { return m_sttnum[strand]; } StopNumber(int strand) const67 int StopNumber(int strand) const { return m_stpnum[strand]; } AcceptorScore(int i,int strand) const68 double AcceptorScore(int i, int strand) const { return m_ascr[strand][i]; } DonorScore(int i,int strand) const69 double DonorScore(int i, int strand) const { return m_dscr[strand][i]; } StartScore(int i,int strand) const70 double StartScore(int i, int strand) const { return m_sttscr[strand][i]; } StopScore(int i,int strand) const71 double StopScore(int i, int strand) const { return m_stpscr[strand][i]; } Acceptor() const72 const CTerminal& Acceptor() const { return m_acceptor; } Donor() const73 const CTerminal& Donor() const { return m_donor; } Start() const74 const CTerminal& Start() const { return m_start; } Stop() const75 const CTerminal& Stop() const { return m_stop; } Alignments() const76 const TGeneModelList& Alignments() const { return m_align_list; } SeqTInDels() const77 const TInDels& SeqTInDels() const { return m_fshifts; } FrameShiftedSeqMap() const78 const CAlignMap& FrameShiftedSeqMap() const { return m_map; } 79 bool StopInside(int a, int b, int strand, int frame) const; 80 bool OpenCodingRegion(int a, int b, int strand, int frame) const; 81 double CodingScore(int a, int b, int strand, int frame) const; ProtNumber(int a,int b) const82 int ProtNumber(int a, int b) const { return (m_protnum[b]-m_protnum[a]); } MultiProtPenalty() const83 double MultiProtPenalty() const { return m_mpp; } 84 bool OpenNonCodingRegion(int a, int b, int strand) const; 85 double NonCodingScore(int a, int b, int strand) const; 86 bool OpenIntergenicRegion(int a, int b) const; LeftAlignmentBoundary(int b) const87 int LeftAlignmentBoundary(int b) const { return m_inalign[b]; } 88 double IntergenicScore(int a, int b, int strand) const; SeqLen() const89 int SeqLen() const { return (int)m_seq[0].size(); } SplittedStop(int id,int ia,int strand,int ph) const90 bool SplittedStop(int id, int ia, int strand, int ph) const 91 { return (m_dsplit[strand][ph][id]&m_asplit[strand][ph][ia]) != 0; } 92 bool isStart(int i, int strand) const; 93 bool isStop(int i, int strand) const; 94 bool isReadingFrameLeftEnd(int i, int strand) const; 95 bool isReadingFrameRightEnd(int i, int strand) const; 96 bool isAG(int i, int strand) const; 97 bool isGT(int i, int strand) const; 98 bool isConsensusIntron(int i, int j, int strand) const; 99 const EResidue* SeqPtr(int i, int strand) const; 100 101 private: 102 CSeqScores& operator=(const CSeqScores&); 103 const CTerminal &m_acceptor, &m_donor, &m_start, &m_stop; 104 const CCodingRegion &m_cdr; 105 const CNonCodingRegion &m_ncdr, &m_intrg; 106 TGeneModelList m_align_list; 107 TInDels m_fshifts; 108 CEResidueVec m_seq[2]; 109 TIVec m_laststop[2][3], m_notinexon[2][3], m_notinintron[2], m_notining; 110 CAlignMap m_map; 111 TDVec m_ascr[2], m_dscr[2], m_sttscr[2], m_stpscr[2], m_ncdrscr[2], m_ingscr[2], m_cdrscr[2][3]; 112 TIVec m_asplit[2][2], m_dsplit[2][2]; 113 TIVec m_inalign; 114 TIVec m_protnum; 115 int m_anum[2], m_dnum[2], m_sttnum[2], m_stpnum[2]; 116 TSignedSeqPos m_chunk_start, m_chunk_stop; 117 double m_mpp; 118 CResidueVec ConstructSequenceAndMaps(const TGeneModelList& aligns, const CResidueVec& original_sequence); 119 }; 120 121 END_SCOPE(gnomon) 122 END_NCBI_SCOPE 123 124 #endif // ALGO_GNOMON___SCORE__HPP 125