1 /* $Id: cuPssmScorer.cpp 438050 2014-06-12 15:02:18Z lanczyck $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Charlie Liu
27 *
28 * File Description:
29 *
30 * Score an seq-align by PSSM
31 *
32 * ===========================================================================
33 */
34
35 #include <ncbi_pch.hpp>
36 #include <algo/structure/cd_utils/cuPssmScorer.hpp>
37 #include <objects/scoremat/PssmFinalData.hpp>
38 #include <objects/scoremat/Pssm.hpp>
39 #include <algo/structure/cd_utils/cuBlock.hpp>
40 #include <algo/structure/cd_utils/cuSequence.hpp>
41
42 BEGIN_NCBI_SCOPE
BEGIN_SCOPE(cd_utils)43 BEGIN_SCOPE(cd_utils)
44
45 PssmScorer::PssmScorer(CRef< CPssmWithParameters > pssm)
46 :m_pssm(pssm),
47 m_scoresFromPssm(pssm->GetPssm().GetNumColumns(), vector<int>(pssm->GetPssm().GetNumRows()))
48 {
49 if (m_pssm->GetPssm().GetByRow()) //deal with byColumn for now
50 {
51 m_scoresFromPssm.clear();
52 }
53 else
54 {
55 if (m_pssm->GetPssm().CanGetFinalData())
56 {
57 const list< int >& scoreList = m_pssm->GetPssm().GetFinalData().GetScores();
58 list<int>::const_iterator lit = scoreList.begin();
59 int nCol = m_pssm->GetPssm().GetNumColumns();
60 int nRow = pssm->GetPssm().GetNumRows();
61 for (int col = 0; col < nCol; col++)
62 {
63 for (int row = 0; row < nRow; row++)
64 {
65 m_scoresFromPssm[col][row] = *lit;
66 lit++;
67 }
68 }
69 }
70 else
71 m_scoresFromPssm.clear();
72 }
73 }
74
75 //assume the master is the query/consensus in pssm
score(const CRef<CSeq_align> align,const CRef<CBioseq> bioseq)76 int PssmScorer::score(const CRef<CSeq_align> align, const CRef<CBioseq> bioseq)
77 {
78 BlockModelPair bmp(align);
79 return score(bmp, bioseq);
80 }
score(BlockModelPair & bmp,const CRef<CBioseq> bioseq)81 int PssmScorer::score(BlockModelPair& bmp, const CRef<CBioseq> bioseq)
82 {
83 int score = -1;
84 const BlockModel& master = bmp.getMaster();
85 const BlockModel& slave = bmp.getSlave();
86 int masterLen = m_pssm->GetPssm().GetQuery().GetSeq().GetInst().GetLength();
87 vector<char> slaveSeq;
88 GetNcbistdSeq(*bioseq, slaveSeq);
89 if ((master.getLastAlignedPosition() >= masterLen)
90 || (slave.getLastAlignedPosition() >= (int) slaveSeq.size()))
91 return score;
92 if (m_scoresFromPssm.size() == 0)
93 return score;
94 int nBlocks = master.getBlocks().size();
95 for (int b = 0; b < nBlocks; b++)
96 {
97 const Block& mb = master.getBlocks()[b];
98 const Block& sb = slave.getBlocks()[b];
99 for (int cb = 0; cb < mb.getLen(); cb++)
100 {
101 score += scoreOneColumn(mb.getStart() + cb, slaveSeq[sb.getStart()+cb]);
102 }
103 }
104 return score;
105 }
106
scoreOneColumn(int col,char aa)107 int PssmScorer::scoreOneColumn(int col, char aa)
108 {
109 return m_scoresFromPssm[col][aa];
110 }
111
112
113 END_SCOPE(cd_utils)
114 END_NCBI_SCOPE
115
116