1 // ==========================================================================
2 // SeqAn - The Library for Sequence Analysis
3 // ==========================================================================
4 // Copyright (c) 2006-2018, Knut Reinert, FU Berlin
5 // All rights reserved.
6 //
7 // Redistribution and use in source and binary forms, with or without
8 // modification, are permitted provided that the following conditions are met:
9 //
10 // * Redistributions of source code must retain the above copyright
11 // notice, this list of conditions and the following disclaimer.
12 // * Redistributions in binary form must reproduce the above copyright
13 // notice, this list of conditions and the following disclaimer in the
14 // documentation and/or other materials provided with the distribution.
15 // * Neither the name of Knut Reinert or the FU Berlin nor the names of
16 // its contributors may be used to endorse or promote products derived
17 // from this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 // ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE
23 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 // OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
29 // DAMAGE.
30 //
31 // ==========================================================================
32 // Author: Manuel Holtgrewe <manuel.holtgrewe@fu-berlin.de>
33 // ==========================================================================
34 // Compute alignment score given a pairwise alignment.
35 // ==========================================================================
36
37 #ifndef INCLUDE_SEQAN_ALIGN_EVALUATE_ALIGNMENT_H_
38 #define INCLUDE_SEQAN_ALIGN_EVALUATE_ALIGNMENT_H_
39
40 namespace seqan {
41
42 // ============================================================================
43 // Forwards
44 // ============================================================================
45
46 // ============================================================================
47 // Tags, Classes, Enums
48 // ============================================================================
49
50 // ----------------------------------------------------------------------------
51 // Class AlignmentStats
52 // ----------------------------------------------------------------------------
53
54 /*!
55 * @class AlignmentStats
56 * @headerfile <seqan/align.h>
57 * @brief Statistics about a tabular alignment.
58 *
59 * @signature struct AlignmentStats;
60 *
61 * @see computeAlignmentStats
62 *
63 * @fn AlignmentStats::AlignmentStats
64 * @brief Constructor
65 *
66 * @signature AlignmentStats::AlignmentStats();
67 *
68 * All members are initialized to <tt>0</tt>.
69 *
70 * @var unsigned AlignmentStats::numGaps;
71 * @brief Number of gap characters (sum of numGapOpens and numGapExtensions)
72 *
73 * @var unsigned AlignmentStats::numGapOpens;
74 * @brief Number of gap open events.
75 *
76 * @var unsigned AlignmentStats::numGapExtensions;
77 * @brief Number of gap extension events.
78 *
79 * @var unsigned AlignmentStats::numInsertions;
80 * @brief Number of gaps in reference relative to query.
81 *
82 * @var unsigned AlignmentStats::numDeletions;
83 * @brief Number of gaps in query relative to reference.
84 *
85 * @var unsigned AlignmentStats::numMatches;
86 * @brief Number of match (identity) events.
87 *
88 * @var unsigned AlignmentStats::numMismatches;
89 * @brief Number of mismatch (not identity) events.
90 *
91 * @var unsigned AlignmentStats::numPositiveScores;
92 * @brief Number of residues aligned with positive score (0 is counted as positive).
93 *
94 * @var unsigned AlignmentStats::numNegativeScores;
95 * @brief Number of residues aligned with negative score.
96 *
97 * @var unsigned AlignmentStats::alignmentLength;
98 * @brief Length of the aligned region
99 *
100 * @var float AlignmentStats::alignmentSimilarity;
101 * @brief The resulting alignment percent similarity (positive).
102 *
103 * @var float AlignmentStats::alignmentIdentity;
104 * @brief The resulting alignment percent identity (match).
105 *
106 * @var int AlignmentStats::alignmentScore;
107 * @brief The resulting alignment score.
108 */
109
110 struct AlignmentStats
111 {
112 // Number of gap characters/opens/gap extensions.
113 unsigned numGaps;
114 unsigned numGapOpens;
115 unsigned numGapExtensions;
116 // Number of insertions and deletions.
117 unsigned numInsertions;
118 unsigned numDeletions;
119 // Number of matches, mismatches.
120 unsigned numMatches;
121 unsigned numMismatches;
122 // Number of aligned residues with positive/negative scores.
123 unsigned numPositiveScores;
124 unsigned numNegativeScores;
125
126 // length of the alignment
127 unsigned alignmentLength;
128
129 // the alignment identity and similarity scores
130 float alignmentSimilarity;
131 float alignmentIdentity;
132
133 // The alignment score.
134 int alignmentScore;
135
AlignmentStatsAlignmentStats136 AlignmentStats() : numGaps(0), numGapOpens(0), numGapExtensions(0), numInsertions(0), numDeletions(0),
137 numMatches(0), numMismatches(0), numPositiveScores(0), numNegativeScores(0),
138 alignmentLength(0), alignmentSimilarity(0.0), alignmentIdentity(0.0), alignmentScore(0)
139 {}
140 };
141
142 // ============================================================================
143 // Metafunctions
144 // ============================================================================
145
146 // ============================================================================
147 // Functions
148 // ============================================================================
149
150 // ----------------------------------------------------------------------------
151 // Function clear()
152 // ----------------------------------------------------------------------------
153
154 /*!
155 * @fn AlignmentStats#clear
156 * @brief Resets all members to <tt>0</tt>.
157 *
158 * @signature void clear(stats);
159 *
160 * @param[in,out] stats AlignmentStats object to clear.
161 */
162
163 inline
clear(AlignmentStats & stats)164 void clear(AlignmentStats & stats)
165 {
166 stats.numGaps = 0;
167 stats.numGapOpens = 0;
168 stats.numGapExtensions = 0;
169 stats.numInsertions = 0;
170 stats.numDeletions = 0;
171 stats.numMatches = 0;
172 stats.numMismatches = 0;
173 stats.numPositiveScores = 0;
174 stats.numNegativeScores = 0;
175 stats.alignmentLength = 0;
176 stats.alignmentSimilarity = 0.0;
177 stats.alignmentIdentity = 0.0;
178 stats.alignmentScore = 0;
179 }
180
181 // ----------------------------------------------------------------------------
182 // Function computeAlignmentStats()
183 // ----------------------------------------------------------------------------
184
185 /*!
186 * @fn computeAlignmentStats
187 * @headerfile <seqan/align.h>
188 * @brief Compute alignment statistics.
189 *
190 * @signature TScoreVal computeAlignmentStats(stats, align, scoringScheme);
191 * @signature TScoreVal computeAlignmentStats(stats, row0, row1, scoringScheme);
192 *
193 * @param[out] stats The @link AlignmentStats @endlink object to store alignment statistics in.
194 * @param[in] align The @link Align @endlink object to score.
195 * @param[in] row0 The first row (@link Gaps @endlink object).
196 * @param[in] row1 The second row (@link Gaps @endlink object).
197 * @param[in] score The @link Score @endlink object to use for the scoring scheme.
198 *
199 * @return TScoreVal The score value of the alignment, of the same type as the value type of <tt>scoringScheme</tt>
200 *
201 * @see AlignmentStats
202 *
203 * @section Examples
204 *
205 * @include demos/dox/align/compute_alignment_stats.cpp
206 *
207 * The output is as follows:
208 *
209 * @include demos/dox/align/compute_alignment_stats.cpp.stdout
210 */
211
212 template <typename TSource0, typename TGapsSpec0,
213 typename TSource1, typename TGapsSpec1,
214 typename TScoreVal, typename TScoreSpec>
computeAlignmentStats(AlignmentStats & stats,Gaps<TSource0,TGapsSpec0> const & row0,Gaps<TSource1,TGapsSpec1> const & row1,Score<TScoreVal,TScoreSpec> const & scoringScheme)215 TScoreVal computeAlignmentStats(AlignmentStats & stats,
216 Gaps<TSource0, TGapsSpec0> const & row0,
217 Gaps<TSource1, TGapsSpec1> const & row1,
218 Score<TScoreVal, TScoreSpec> const & scoringScheme)
219 {
220 clear(stats);
221
222 typedef typename Iterator<Gaps<TSource0, TGapsSpec0> const, Standard>::Type TGapsIter0;
223 typedef typename Iterator<Gaps<TSource1, TGapsSpec1> const, Standard>::Type TGapsIter1;
224 typedef typename Value<TSource0>::Type TAlphabet;
225
226 // Get iterators.
227 TGapsIter0 it0 = begin(row0);
228 TGapsIter0 itEnd0 = end(row0);
229 TGapsIter1 it1 = begin(row1);
230 TGapsIter1 itEnd1 = end(row1);
231
232 // State whether we have already opened a gap.
233 bool isGapOpen0 = false, isGapOpen1 = false;
234
235 for (; it0 != itEnd0 && it1 != itEnd1; ++it0, ++it1)
236 {
237 if (isGap(it0))
238 {
239 if (!isGapOpen0)
240 {
241 stats.numGapOpens += 1;
242 stats.alignmentScore += scoreGapOpen(scoringScheme);
243 }
244 else
245 {
246 stats.numGapExtensions += 1;
247 stats.alignmentScore += scoreGapExtend(scoringScheme);
248 }
249 stats.numDeletions += 1;
250 isGapOpen0 = true;
251 }
252 else
253 {
254 isGapOpen0 = false;
255 }
256
257 if (isGap(it1))
258 {
259 if (!isGapOpen1)
260 {
261 stats.numGapOpens += 1;
262 stats.alignmentScore += scoreGapOpen(scoringScheme);
263 }
264 else
265 {
266 stats.numGapExtensions += 1;
267 stats.alignmentScore += scoreGapExtend(scoringScheme);
268 }
269 stats.numInsertions += 1;
270 isGapOpen1 = true;
271 }
272 else
273 {
274 isGapOpen1 = false;
275 }
276
277 if (!isGap(it0) && !isGap(it1))
278 {
279 // Compute the alignment score and register in stats.
280 TAlphabet c0 = *it0;
281 TAlphabet c1 = static_cast<TAlphabet>(*it1);
282 TScoreVal scoreVal = score(scoringScheme, c0, c1);
283 stats.alignmentScore += scoreVal;
284 // Register other statistics.
285 bool isMatch = (c0 == c1);
286 bool isPositive = (scoreVal > 0);
287 stats.numMatches += isMatch;
288 stats.numMismatches += !isMatch;
289 stats.numPositiveScores += isPositive;
290 stats.numNegativeScores += !isPositive;
291 }
292 }
293 SEQAN_ASSERT(it0 == itEnd0);
294 SEQAN_ASSERT(it1 == itEnd1);
295
296 stats.numGaps = stats.numGapOpens + stats.numGapExtensions;
297
298 // Finally, compute the alignment similarity from the various counts
299 stats.alignmentLength = length(row0);
300 stats.alignmentSimilarity = 100.0 * static_cast<float>(stats.numPositiveScores)
301 / static_cast<float>(stats.alignmentLength);
302 stats.alignmentIdentity = 100.0 * static_cast<float>(stats.numMatches)
303 / static_cast<float>(stats.alignmentLength);
304
305 return stats.alignmentScore;
306 }
307
308 template <typename TSource, typename TAlignSpec, typename TScoreVal, typename TScoreSpec>
computeAlignmentStats(AlignmentStats & stats,Align<TSource,TAlignSpec> const & align,Score<TScoreVal,TScoreSpec> const & scoringScheme)309 TScoreVal computeAlignmentStats(AlignmentStats & stats,
310 Align<TSource, TAlignSpec> const & align,
311 Score<TScoreVal, TScoreSpec> const & scoringScheme)
312 {
313 SEQAN_ASSERT_EQ_MSG(length(rows(align)), 2u, "Only works with pairwise alignments.");
314 SEQAN_ASSERT_EQ_MSG(length(row(align, 0)), length(row(align, 1)), "Invalid alignment!");
315
316 return computeAlignmentStats(stats, row(align, 0), row(align, 1), scoringScheme);
317 }
318
319 template <typename TGaps, typename TAlignSpec, typename TScoreVal, typename TScoreSpec>
320 [[deprecated("Use computeAlignmentStats(stats, align, scoringScheme) instead.")]]
computeAlignmentStats(Align<TGaps,TAlignSpec> const & align,Score<TScoreVal,TScoreSpec> const & scoringScheme)321 TScoreVal computeAlignmentStats(Align<TGaps, TAlignSpec> const & align,
322 Score<TScoreVal, TScoreSpec> const & scoringScheme)
323 {
324 AlignmentStats stats;
325 (void)stats;
326 return computeAlignmentStats(stats, align, scoringScheme);
327 }
328
329 } // namespace seqan
330
331 #endif // #ifndef INCLUDE_SEQAN_ALIGN_EVALUATE_ALIGNMENT_H_
332