1 // ==========================================================================
2 //                 SeqAn - The Library for Sequence Analysis
3 // ==========================================================================
4 // Copyright (c) 2006-2018, Knut Reinert, FU Berlin
5 // All rights reserved.
6 //
7 // Redistribution and use in source and binary forms, with or without
8 // modification, are permitted provided that the following conditions are met:
9 //
10 //     * Redistributions of source code must retain the above copyright
11 //       notice, this list of conditions and the following disclaimer.
12 //     * Redistributions in binary form must reproduce the above copyright
13 //       notice, this list of conditions and the following disclaimer in the
14 //       documentation and/or other materials provided with the distribution.
15 //     * Neither the name of Knut Reinert or the FU Berlin nor the names of
16 //       its contributors may be used to endorse or promote products derived
17 //       from this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 // ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE
23 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 // OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
29 // DAMAGE.
30 //
31 // ==========================================================================
32 // Author: Manuel Holtgrewe <manuel.holtgrewe@fu-berlin.de>
33 // ==========================================================================
34 // Compute alignment score given a pairwise alignment.
35 // ==========================================================================
36 
37 #ifndef INCLUDE_SEQAN_ALIGN_EVALUATE_ALIGNMENT_H_
38 #define INCLUDE_SEQAN_ALIGN_EVALUATE_ALIGNMENT_H_
39 
40 namespace seqan {
41 
42 // ============================================================================
43 // Forwards
44 // ============================================================================
45 
46 // ============================================================================
47 // Tags, Classes, Enums
48 // ============================================================================
49 
50 // ----------------------------------------------------------------------------
51 // Class AlignmentStats
52 // ----------------------------------------------------------------------------
53 
54 /*!
55  * @class AlignmentStats
56  * @headerfile <seqan/align.h>
57  * @brief Statistics about a tabular alignment.
58  *
59  * @signature struct AlignmentStats;
60  *
61  * @see computeAlignmentStats
62  *
63  * @fn AlignmentStats::AlignmentStats
64  * @brief Constructor
65  *
66  * @signature AlignmentStats::AlignmentStats();
67  *
68  * All members are initialized to <tt>0</tt>.
69  *
70  * @var unsigned AlignmentStats::numGaps;
71  * @brief Number of gap characters (sum of numGapOpens and numGapExtensions)
72  *
73  * @var unsigned AlignmentStats::numGapOpens;
74  * @brief Number of gap open events.
75  *
76  * @var unsigned AlignmentStats::numGapExtensions;
77  * @brief Number of gap extension events.
78  *
79  * @var unsigned AlignmentStats::numInsertions;
80  * @brief Number of gaps in reference relative to query.
81  *
82  * @var unsigned AlignmentStats::numDeletions;
83  * @brief Number of gaps in query relative to reference.
84  *
85  * @var unsigned AlignmentStats::numMatches;
86  * @brief Number of match (identity) events.
87  *
88  * @var unsigned AlignmentStats::numMismatches;
89  * @brief Number of mismatch (not identity) events.
90  *
91  * @var unsigned AlignmentStats::numPositiveScores;
92  * @brief Number of residues aligned with positive score (0 is counted as positive).
93  *
94  * @var unsigned AlignmentStats::numNegativeScores;
95  * @brief Number of residues aligned with negative score.
96  *
97  * @var unsigned AlignmentStats::alignmentLength;
98  * @brief Length of the aligned region
99  *
100  * @var float AlignmentStats::alignmentSimilarity;
101  * @brief The resulting alignment percent similarity (positive).
102  *
103  * @var float AlignmentStats::alignmentIdentity;
104  * @brief The resulting alignment percent identity (match).
105  *
106  * @var int AlignmentStats::alignmentScore;
107  * @brief The resulting alignment score.
108  */
109 
110 struct AlignmentStats
111 {
112     // Number of gap characters/opens/gap extensions.
113     unsigned numGaps;
114     unsigned numGapOpens;
115     unsigned numGapExtensions;
116     // Number of insertions and deletions.
117     unsigned numInsertions;
118     unsigned numDeletions;
119     // Number of matches, mismatches.
120     unsigned numMatches;
121     unsigned numMismatches;
122     // Number of aligned residues with positive/negative scores.
123     unsigned numPositiveScores;
124     unsigned numNegativeScores;
125 
126     // length of the alignment
127     unsigned alignmentLength;
128 
129     // the alignment identity and similarity scores
130     float alignmentSimilarity;
131     float alignmentIdentity;
132 
133     // The alignment score.
134     int alignmentScore;
135 
AlignmentStatsAlignmentStats136     AlignmentStats() : numGaps(0), numGapOpens(0), numGapExtensions(0), numInsertions(0), numDeletions(0),
137                        numMatches(0), numMismatches(0), numPositiveScores(0), numNegativeScores(0),
138                        alignmentLength(0), alignmentSimilarity(0.0), alignmentIdentity(0.0), alignmentScore(0)
139     {}
140 };
141 
142 // ============================================================================
143 // Metafunctions
144 // ============================================================================
145 
146 // ============================================================================
147 // Functions
148 // ============================================================================
149 
150 // ----------------------------------------------------------------------------
151 // Function clear()
152 // ----------------------------------------------------------------------------
153 
154 /*!
155  * @fn AlignmentStats#clear
156  * @brief Resets all members to <tt>0</tt>.
157  *
158  * @signature void clear(stats);
159  *
160  * @param[in,out] stats AlignmentStats object to clear.
161  */
162 
163 inline
clear(AlignmentStats & stats)164 void clear(AlignmentStats & stats)
165 {
166     stats.numGaps = 0;
167     stats.numGapOpens = 0;
168     stats.numGapExtensions = 0;
169     stats.numInsertions = 0;
170     stats.numDeletions = 0;
171     stats.numMatches = 0;
172     stats.numMismatches = 0;
173     stats.numPositiveScores = 0;
174     stats.numNegativeScores = 0;
175     stats.alignmentLength = 0;
176     stats.alignmentSimilarity = 0.0;
177     stats.alignmentIdentity = 0.0;
178     stats.alignmentScore = 0;
179 }
180 
181 // ----------------------------------------------------------------------------
182 // Function computeAlignmentStats()
183 // ----------------------------------------------------------------------------
184 
185 /*!
186  * @fn computeAlignmentStats
187  * @headerfile <seqan/align.h>
188  * @brief Compute alignment statistics.
189  *
190  * @signature TScoreVal computeAlignmentStats(stats, align, scoringScheme);
191  * @signature TScoreVal computeAlignmentStats(stats, row0, row1, scoringScheme);
192  *
193  * @param[out] stats The @link AlignmentStats @endlink object to store alignment statistics in.
194  * @param[in]  align The @link Align @endlink object to score.
195  * @param[in]  row0  The first row (@link Gaps @endlink object).
196  * @param[in]  row1  The second row (@link Gaps @endlink object).
197  * @param[in]  score The @link Score @endlink object to use for the scoring scheme.
198  *
199  * @return TScoreVal The score value of the alignment, of the same type as the value type of <tt>scoringScheme</tt>
200  *
201  * @see AlignmentStats
202  *
203  * @section Examples
204  *
205  * @include demos/dox/align/compute_alignment_stats.cpp
206  *
207  * The output is as follows:
208  *
209  * @include demos/dox/align/compute_alignment_stats.cpp.stdout
210  */
211 
212 template <typename TSource0, typename TGapsSpec0,
213           typename TSource1, typename TGapsSpec1,
214           typename TScoreVal, typename TScoreSpec>
computeAlignmentStats(AlignmentStats & stats,Gaps<TSource0,TGapsSpec0> const & row0,Gaps<TSource1,TGapsSpec1> const & row1,Score<TScoreVal,TScoreSpec> const & scoringScheme)215 TScoreVal computeAlignmentStats(AlignmentStats & stats,
216                                 Gaps<TSource0, TGapsSpec0> const & row0,
217                                 Gaps<TSource1, TGapsSpec1> const & row1,
218                                 Score<TScoreVal, TScoreSpec> const & scoringScheme)
219 {
220     clear(stats);
221 
222     typedef typename Iterator<Gaps<TSource0, TGapsSpec0> const, Standard>::Type TGapsIter0;
223     typedef typename Iterator<Gaps<TSource1, TGapsSpec1> const, Standard>::Type TGapsIter1;
224     typedef typename Value<TSource0>::Type TAlphabet;
225 
226     // Get iterators.
227     TGapsIter0 it0      = begin(row0);
228     TGapsIter0 itEnd0   = end(row0);
229     TGapsIter1 it1      = begin(row1);
230     TGapsIter1 itEnd1   = end(row1);
231 
232     // State whether we have already opened a gap.
233     bool isGapOpen0 = false, isGapOpen1 = false;
234 
235     for (; it0 != itEnd0 && it1 != itEnd1; ++it0, ++it1)
236     {
237         if (isGap(it0))
238         {
239             if (!isGapOpen0)
240             {
241                 stats.numGapOpens += 1;
242                 stats.alignmentScore += scoreGapOpen(scoringScheme);
243             }
244             else
245             {
246                 stats.numGapExtensions += 1;
247                 stats.alignmentScore += scoreGapExtend(scoringScheme);
248             }
249             stats.numDeletions += 1;
250             isGapOpen0 = true;
251         }
252         else
253         {
254             isGapOpen0 = false;
255         }
256 
257         if (isGap(it1))
258         {
259             if (!isGapOpen1)
260             {
261                 stats.numGapOpens += 1;
262                 stats.alignmentScore += scoreGapOpen(scoringScheme);
263             }
264             else
265             {
266                 stats.numGapExtensions += 1;
267                 stats.alignmentScore += scoreGapExtend(scoringScheme);
268             }
269             stats.numInsertions += 1;
270             isGapOpen1 = true;
271         }
272         else
273         {
274             isGapOpen1 = false;
275         }
276 
277         if (!isGap(it0) && !isGap(it1))
278         {
279             // Compute the alignment score and register in stats.
280             TAlphabet c0 = *it0;
281             TAlphabet c1 = static_cast<TAlphabet>(*it1);
282             TScoreVal scoreVal = score(scoringScheme, c0, c1);
283             stats.alignmentScore += scoreVal;
284             // Register other statistics.
285             bool isMatch = (c0 == c1);
286             bool isPositive = (scoreVal > 0);
287             stats.numMatches += isMatch;
288             stats.numMismatches += !isMatch;
289             stats.numPositiveScores += isPositive;
290             stats.numNegativeScores += !isPositive;
291         }
292     }
293     SEQAN_ASSERT(it0 == itEnd0);
294     SEQAN_ASSERT(it1 == itEnd1);
295 
296     stats.numGaps = stats.numGapOpens + stats.numGapExtensions;
297 
298     // Finally, compute the alignment similarity from the various counts
299     stats.alignmentLength = length(row0);
300     stats.alignmentSimilarity = 100.0 * static_cast<float>(stats.numPositiveScores)
301                                 / static_cast<float>(stats.alignmentLength);
302     stats.alignmentIdentity = 100.0 * static_cast<float>(stats.numMatches)
303                               / static_cast<float>(stats.alignmentLength);
304 
305     return stats.alignmentScore;
306 }
307 
308 template <typename TSource, typename TAlignSpec, typename TScoreVal, typename TScoreSpec>
computeAlignmentStats(AlignmentStats & stats,Align<TSource,TAlignSpec> const & align,Score<TScoreVal,TScoreSpec> const & scoringScheme)309 TScoreVal computeAlignmentStats(AlignmentStats & stats,
310                                 Align<TSource, TAlignSpec> const & align,
311                                 Score<TScoreVal, TScoreSpec> const & scoringScheme)
312 {
313     SEQAN_ASSERT_EQ_MSG(length(rows(align)), 2u, "Only works with pairwise alignments.");
314     SEQAN_ASSERT_EQ_MSG(length(row(align, 0)), length(row(align, 1)), "Invalid alignment!");
315 
316     return computeAlignmentStats(stats, row(align, 0), row(align, 1), scoringScheme);
317 }
318 
319 template <typename TGaps, typename TAlignSpec, typename TScoreVal, typename TScoreSpec>
320 [[deprecated("Use computeAlignmentStats(stats, align, scoringScheme) instead.")]]
computeAlignmentStats(Align<TGaps,TAlignSpec> const & align,Score<TScoreVal,TScoreSpec> const & scoringScheme)321 TScoreVal computeAlignmentStats(Align<TGaps, TAlignSpec> const & align,
322                                 Score<TScoreVal, TScoreSpec> const & scoringScheme)
323 {
324     AlignmentStats stats;
325     (void)stats;
326     return computeAlignmentStats(stats, align, scoringScheme);
327 }
328 
329 }  // namespace seqan
330 
331 #endif  // #ifndef INCLUDE_SEQAN_ALIGN_EVALUATE_ALIGNMENT_H_
332