1 /* $Id: cuPssmScorer.cpp 438050 2014-06-12 15:02:18Z lanczyck $
2  * ===========================================================================
3  *
4  *                            PUBLIC DOMAIN NOTICE
5  *               National Center for Biotechnology Information
6  *
7  *  This software/database is a "United States Government Work" under the
8  *  terms of the United States Copyright Act.  It was written as part of
9  *  the author's official duties as a United States Government employee and
10  *  thus cannot be copyrighted.  This software/database is freely available
11  *  to the public for use. The National Library of Medicine and the U.S.
12  *  Government have not placed any restriction on its use or reproduction.
13  *
14  *  Although all reasonable efforts have been taken to ensure the accuracy
15  *  and reliability of the software and data, the NLM and the U.S.
16  *  Government do not and cannot warrant the performance or results that
17  *  may be obtained by using this software or data. The NLM and the U.S.
18  *  Government disclaim all warranties, express or implied, including
19  *  warranties of performance, merchantability or fitness for any particular
20  *  purpose.
21  *
22  *  Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author:  Charlie Liu
27  *
28  * File Description:
29  *
30  *       Score an seq-align by PSSM
31  *
32  * ===========================================================================
33  */
34 
35 #include <ncbi_pch.hpp>
36 #include <algo/structure/cd_utils/cuPssmScorer.hpp>
37 #include <objects/scoremat/PssmFinalData.hpp>
38 #include <objects/scoremat/Pssm.hpp>
39 #include <algo/structure/cd_utils/cuBlock.hpp>
40 #include <algo/structure/cd_utils/cuSequence.hpp>
41 
42 BEGIN_NCBI_SCOPE
BEGIN_SCOPE(cd_utils)43 BEGIN_SCOPE(cd_utils)
44 
45 PssmScorer::PssmScorer(CRef< CPssmWithParameters > pssm)
46 :m_pssm(pssm),
47 m_scoresFromPssm(pssm->GetPssm().GetNumColumns(), vector<int>(pssm->GetPssm().GetNumRows()))
48 {
49 	if (m_pssm->GetPssm().GetByRow()) //deal with byColumn for now
50 	{
51 		m_scoresFromPssm.clear();
52 	}
53 	else
54 	{
55 		if (m_pssm->GetPssm().CanGetFinalData())
56 		{
57 			const list< int >& scoreList = m_pssm->GetPssm().GetFinalData().GetScores();
58 			list<int>::const_iterator lit = scoreList.begin();
59 			int nCol = m_pssm->GetPssm().GetNumColumns();
60 			int nRow = pssm->GetPssm().GetNumRows();
61 			for (int col = 0; col < nCol; col++)
62 			{
63 				for (int row = 0; row < nRow; row++)
64 				{
65 					m_scoresFromPssm[col][row] = *lit;
66 					lit++;
67 				}
68 			}
69 		}
70 		else
71 			m_scoresFromPssm.clear();
72 	}
73 }
74 
75 //assume the master is the query/consensus in pssm
score(const CRef<CSeq_align> align,const CRef<CBioseq> bioseq)76 int PssmScorer::score(const CRef<CSeq_align>  align, const CRef<CBioseq> bioseq)
77 {
78 	BlockModelPair bmp(align);
79 	return score(bmp, bioseq);
80 }
score(BlockModelPair & bmp,const CRef<CBioseq> bioseq)81 int PssmScorer::score(BlockModelPair& bmp, const CRef<CBioseq> bioseq)
82 {
83 	int score = -1;
84 	const BlockModel& master = bmp.getMaster();
85 	const BlockModel& slave = bmp.getSlave();
86 	int masterLen = m_pssm->GetPssm().GetQuery().GetSeq().GetInst().GetLength();
87 	vector<char> slaveSeq;
88 	GetNcbistdSeq(*bioseq, slaveSeq);
89 	if ((master.getLastAlignedPosition() >= masterLen)
90 		|| (slave.getLastAlignedPosition() >= (int) slaveSeq.size()))
91 		return score;
92 	if (m_scoresFromPssm.size() == 0)
93 		return score;
94 	int nBlocks = master.getBlocks().size();
95 	for (int b = 0; b < nBlocks; b++)
96 	{
97 		const Block& mb = master.getBlocks()[b];
98 		const Block& sb = slave.getBlocks()[b];
99 		for (int cb = 0; cb < mb.getLen(); cb++)
100 		{
101 			score += scoreOneColumn(mb.getStart() + cb, slaveSeq[sb.getStart()+cb]);
102 		}
103 	}
104 	return score;
105 }
106 
scoreOneColumn(int col,char aa)107 int PssmScorer::scoreOneColumn(int col, char aa)
108 {
109 	return m_scoresFromPssm[col][aa];
110 }
111 
112 
113 END_SCOPE(cd_utils)
114 END_NCBI_SCOPE
115 
116