1 /* $Id: block_align.cpp 103491 2007-05-04 17:18:18Z kazimird $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Authors: Paul Thiessen
27 *
28 * File Description:
29 * Dynamic programming-based alignment algorithms: block aligner
30 *
31 * ===========================================================================
32 */
33
34 #include <ncbi_pch.hpp>
35 #include <corelib/ncbistd.hpp>
36 #include <corelib/ncbidiag.hpp>
37
38 #include <vector>
39 #include <list>
40 #include <algorithm>
41
42 #include <algo/structure/struct_dp/struct_dp.h>
43
44 USING_NCBI_SCOPE;
45
46
47 BEGIN_SCOPE(struct_dp)
48
49 #define ERROR_MESSAGE(s) ERR_POST(Error << "block_align: " << s << '!')
50 #define WARNING_MESSAGE(s) ERR_POST(Warning << "block_align: " << s)
51 #define INFO_MESSAGE(s) ERR_POST(Info << "block_align: " << s)
52
53 #define NO_TRACEBACK kMax_UInt
54
55 class Cell
56 {
57 public:
58 int score;
59 unsigned int tracebackResidue;
Cell(void)60 Cell(void) : score(DP_NEGATIVE_INFINITY), tracebackResidue(NO_TRACEBACK) { }
61 };
62
63 class Matrix
64 {
65 public:
66 typedef vector < Cell > ResidueRow;
67 typedef vector < ResidueRow > Grid;
68 Grid grid;
Matrix(unsigned int nBlocks,unsigned int nResidues)69 Matrix(unsigned int nBlocks, unsigned int nResidues) : grid(nBlocks + 1)
70 { for (unsigned int i=0; i<nBlocks; ++i) grid[i].resize(nResidues + 1); }
operator [](unsigned int block)71 ResidueRow& operator [] (unsigned int block) { return grid[block]; }
operator [](unsigned int block) const72 const ResidueRow& operator [] (unsigned int block) const { return grid[block]; }
73 };
74
75 // checks to make sure frozen block positions are legal (borrowing my own code from blockalign.c)
ValidateFrozenBlockPositions(const DP_BlockInfo * blocks,unsigned int queryFrom,unsigned int queryTo,bool checkGapSum)76 int ValidateFrozenBlockPositions(const DP_BlockInfo *blocks,
77 unsigned int queryFrom, unsigned int queryTo, bool checkGapSum)
78 {
79 static const unsigned int NONE = kMax_UInt;
80 unsigned int
81 block,
82 unfrozenBlocksLength = 0,
83 prevFrozenBlock = NONE,
84 maxGapsLength = 0;
85
86 for (block=0; block<blocks->nBlocks; ++block) {
87
88 /* keep track of max gap space between frozen blocks */
89 if (block > 0)
90 maxGapsLength += blocks->maxLoops[block - 1];
91
92 /* to allow room for unfrozen blocks between frozen ones */
93 if (blocks->freezeBlocks[block] == DP_UNFROZEN_BLOCK) {
94 unfrozenBlocksLength += blocks->blockSizes[block];
95 continue;
96 }
97
98 /* check absolute block end positions */
99 if (blocks->freezeBlocks[block] < queryFrom) {
100 ERROR_MESSAGE("Frozen block " << (block+1) << " can't start < " << (queryFrom+1));
101 return STRUCT_DP_PARAMETER_ERROR;
102 }
103 if (blocks->freezeBlocks[block] + blocks->blockSizes[block] - 1 > queryTo) {
104 ERROR_MESSAGE("Frozen block " << (block+1) << " can't end > " << (queryTo+1));
105 return STRUCT_DP_PARAMETER_ERROR;
106 }
107
108 /* checks for legal distances between frozen blocks */
109 if (prevFrozenBlock != NONE) {
110 unsigned int prevFrozenBlockEnd =
111 blocks->freezeBlocks[prevFrozenBlock] + blocks->blockSizes[prevFrozenBlock] - 1;
112
113 /* check for adequate room for unfrozen blocks between frozen blocks */
114 if (blocks->freezeBlocks[block] <= (prevFrozenBlockEnd + unfrozenBlocksLength)) {
115 ERROR_MESSAGE("Frozen block " << (block+1) << " starts before end of prior frozen block, "
116 "or doesn't leave room for intervening unfrozen block(s)");
117 return STRUCT_DP_PARAMETER_ERROR;
118 }
119
120 /* check for too much gap space since last frozen block,
121 but if frozen blocks are adjacent, issue warning only */
122 if (checkGapSum &&
123 blocks->freezeBlocks[block] > prevFrozenBlockEnd + 1 + unfrozenBlocksLength + maxGapsLength)
124 {
125 if (prevFrozenBlock == block - 1) {
126 WARNING_MESSAGE("Frozen block " << (block+1) << " is further than allowed loop length"
127 " from adjacent frozen block " << (prevFrozenBlock+1));
128 } else {
129 ERROR_MESSAGE("Frozen block " << (block+1) << " is too far away from prior frozen block"
130 " given allowed intervening loop length(s) " << maxGapsLength
131 << " plus unfrozen block length(s) " << unfrozenBlocksLength);
132 return STRUCT_DP_PARAMETER_ERROR;
133 }
134 }
135 }
136
137 /* reset counters after each frozen block */
138 prevFrozenBlock = block;
139 unfrozenBlocksLength = maxGapsLength = 0;
140 }
141
142 return STRUCT_DP_OKAY;
143 }
144
145 // fill matrix values; return true on success. Matrix must have only default values when passed in.
CalculateGlobalMatrix(Matrix & matrix,const DP_BlockInfo * blocks,DP_BlockScoreFunction BlockScore,unsigned int queryFrom,unsigned int queryTo)146 int CalculateGlobalMatrix(Matrix& matrix,
147 const DP_BlockInfo *blocks, DP_BlockScoreFunction BlockScore,
148 unsigned int queryFrom, unsigned int queryTo)
149 {
150 unsigned int block, residue, prevResidue, lastBlock = blocks->nBlocks - 1;
151 int score = 0, sum;
152
153 // find possible block positions, based purely on block lengths
154 vector < unsigned int > firstPos(blocks->nBlocks), lastPos(blocks->nBlocks);
155 for (block=0; block<=lastBlock; ++block) {
156 if (block == 0) {
157 firstPos[0] = queryFrom;
158 lastPos[lastBlock] = queryTo - blocks->blockSizes[lastBlock] + 1;
159 } else {
160 firstPos[block] = firstPos[block - 1] + blocks->blockSizes[block - 1];
161 lastPos[lastBlock - block] =
162 lastPos[lastBlock - block + 1] - blocks->blockSizes[lastBlock - block];
163 }
164 }
165
166 // further restrict the search if blocks are frozen
167 for (block=0; block<=lastBlock; ++block) {
168 if (blocks->freezeBlocks[block] != DP_UNFROZEN_BLOCK) {
169 if (blocks->freezeBlocks[block] < firstPos[block] ||
170 blocks->freezeBlocks[block] > lastPos[block])
171 {
172 ERROR_MESSAGE("CalculateGlobalMatrix() - frozen block "
173 << (block+1) << " does not leave room for unfrozen blocks");
174 return STRUCT_DP_PARAMETER_ERROR;
175 }
176 firstPos[block] = lastPos[block] = blocks->freezeBlocks[block];
177 }
178 }
179
180 // fill in first row with scores of first block at all possible positions
181 for (residue=firstPos[0]; residue<=lastPos[0]; ++residue)
182 matrix[0][residue - queryFrom].score = BlockScore(0, residue);
183
184 // for each successive block, find the best allowed pairing of the block with the previous block
185 bool blockScoreCalculated;
186 for (block=1; block<=lastBlock; ++block) {
187 for (residue=firstPos[block]; residue<=lastPos[block]; ++residue) {
188 blockScoreCalculated = false;
189
190 for (prevResidue=firstPos[block - 1]; prevResidue<=lastPos[block - 1]; ++prevResidue) {
191
192 // current block must come after the previous block
193 if (residue < prevResidue + blocks->blockSizes[block - 1])
194 break;
195
196 // cut off at max loop length from previous block, but not if both blocks are frozen
197 if (residue > prevResidue + blocks->blockSizes[block - 1] + blocks->maxLoops[block - 1] &&
198 (blocks->freezeBlocks[block] == DP_UNFROZEN_BLOCK ||
199 blocks->freezeBlocks[block - 1] == DP_UNFROZEN_BLOCK))
200 continue;
201
202 // make sure previous block is at an allowed position
203 if (matrix[block - 1][prevResidue - queryFrom].score == DP_NEGATIVE_INFINITY)
204 continue;
205
206 // get score at this position
207 if (!blockScoreCalculated) {
208 score = BlockScore(block, residue);
209 if (score == DP_NEGATIVE_INFINITY)
210 break;
211 blockScoreCalculated = true;
212 }
213
214 // find highest sum of scores for allowed pairing of this block with any previous
215 sum = score + matrix[block - 1][prevResidue - queryFrom].score;
216 if (sum > matrix[block][residue - queryFrom].score) {
217 matrix[block][residue - queryFrom].score = sum;
218 matrix[block][residue - queryFrom].tracebackResidue = prevResidue;
219 }
220 }
221 }
222 }
223
224 return STRUCT_DP_OKAY;
225 }
226
227 // fill matrix values; return true on success. Matrix must have only default values when passed in.
CalculateGlobalMatrixGeneric(Matrix & matrix,const DP_BlockInfo * blocks,DP_BlockScoreFunction BlockScore,DP_LoopPenaltyFunction LoopScore,unsigned int queryFrom,unsigned int queryTo)228 int CalculateGlobalMatrixGeneric(Matrix& matrix,
229 const DP_BlockInfo *blocks, DP_BlockScoreFunction BlockScore, DP_LoopPenaltyFunction LoopScore,
230 unsigned int queryFrom, unsigned int queryTo)
231 {
232 unsigned int block, residue, prevResidue, lastBlock = blocks->nBlocks - 1;
233 int blockScore = 0, sum;
234 unsigned int loopPenalty;
235
236 // find possible block positions, based purely on block lengths
237 vector < unsigned int > firstPos(blocks->nBlocks), lastPos(blocks->nBlocks);
238 for (block=0; block<=lastBlock; ++block) {
239 if (block == 0) {
240 firstPos[0] = queryFrom;
241 lastPos[lastBlock] = queryTo - blocks->blockSizes[lastBlock] + 1;
242 } else {
243 firstPos[block] = firstPos[block - 1] + blocks->blockSizes[block - 1];
244 lastPos[lastBlock - block] =
245 lastPos[lastBlock - block + 1] - blocks->blockSizes[lastBlock - block];
246 }
247 }
248
249 // further restrict the search if blocks are frozen
250 for (block=0; block<=lastBlock; ++block) {
251 if (blocks->freezeBlocks[block] != DP_UNFROZEN_BLOCK) {
252 if (blocks->freezeBlocks[block] < firstPos[block] ||
253 blocks->freezeBlocks[block] > lastPos[block])
254 {
255 ERROR_MESSAGE("CalculateGlobalMatrix() - frozen block "
256 << (block+1) << " does not leave room for unfrozen blocks");
257 return STRUCT_DP_PARAMETER_ERROR;
258 }
259 firstPos[block] = lastPos[block] = blocks->freezeBlocks[block];
260 }
261 }
262
263 // fill in first row with scores of first block at all possible positions
264 for (residue=firstPos[0]; residue<=lastPos[0]; ++residue)
265 matrix[0][residue - queryFrom].score = BlockScore(0, residue);
266
267 // for each successive block, find the best allowed pairing of the block with the previous block
268 bool blockScoreCalculated;
269 for (block=1; block<=lastBlock; ++block) {
270 for (residue=firstPos[block]; residue<=lastPos[block]; ++residue) {
271 blockScoreCalculated = false;
272
273 for (prevResidue=firstPos[block - 1]; prevResidue<=lastPos[block - 1]; ++prevResidue) {
274
275 // current block must come after the previous block
276 if (residue < prevResidue + blocks->blockSizes[block - 1])
277 break;
278
279 // make sure previous block is at an allowed position
280 if (matrix[block - 1][prevResidue - queryFrom].score == DP_NEGATIVE_INFINITY)
281 continue;
282
283 // get loop score at this position; assume loop score zero if both frozen
284 if (blocks->freezeBlocks[block] != DP_UNFROZEN_BLOCK &&
285 blocks->freezeBlocks[block - 1] != DP_UNFROZEN_BLOCK)
286 {
287 loopPenalty = 0;
288 } else {
289 loopPenalty = LoopScore(block - 1, residue - prevResidue - blocks->blockSizes[block - 1]);
290 if (loopPenalty == DP_POSITIVE_INFINITY)
291 continue;
292 }
293
294 // get score at this position
295 if (!blockScoreCalculated) {
296 blockScore = BlockScore(block, residue);
297 if (blockScore == DP_NEGATIVE_INFINITY)
298 break;
299 blockScoreCalculated = true;
300 }
301
302 // find highest sum of scores + loop score for allowed pairing of this block with previous
303 sum = blockScore + matrix[block - 1][prevResidue - queryFrom].score - loopPenalty;
304 if (sum > matrix[block][residue - queryFrom].score) {
305 matrix[block][residue - queryFrom].score = sum;
306 matrix[block][residue - queryFrom].tracebackResidue = prevResidue;
307 }
308 }
309 }
310 }
311
312 return STRUCT_DP_OKAY;
313 }
314
315 // fill matrix values; return true on success. Matrix must have only default values when passed in.
CalculateLocalMatrix(Matrix & matrix,const DP_BlockInfo * blocks,DP_BlockScoreFunction BlockScore,unsigned int queryFrom,unsigned int queryTo)316 int CalculateLocalMatrix(Matrix& matrix,
317 const DP_BlockInfo *blocks, DP_BlockScoreFunction BlockScore,
318 unsigned int queryFrom, unsigned int queryTo)
319 {
320 unsigned int block, residue, prevResidue, lastBlock = blocks->nBlocks - 1, tracebackResidue = 0;
321 int score, sum, bestPrevScore;
322
323 // find last possible block positions, based purely on block lengths
324 vector < unsigned int > lastPos(blocks->nBlocks);
325 for (block=0; block<=lastBlock; ++block) {
326 if (blocks->blockSizes[block] > queryTo - queryFrom + 1) {
327 ERROR_MESSAGE("Block " << (block+1) << " too large for this query range");
328 return STRUCT_DP_PARAMETER_ERROR;
329 }
330 lastPos[block] = queryTo - blocks->blockSizes[block] + 1;
331 }
332
333 // first row: positive scores of first block at all possible positions
334 for (residue=queryFrom; residue<=lastPos[0]; ++residue) {
335 score = BlockScore(0, residue);
336 matrix[0][residue - queryFrom].score = (score > 0) ? score : 0;
337 }
338
339 // first column: positive scores of all blocks at first positions
340 for (block=1; block<=lastBlock; ++block) {
341 score = BlockScore(block, queryFrom);
342 matrix[block][0].score = (score > 0) ? score : 0;
343 }
344
345 // for each successive block, find the best positive scoring with a previous block, if any
346 for (block=1; block<=lastBlock; ++block) {
347 for (residue=queryFrom+1; residue<=lastPos[block]; ++residue) {
348
349 // get score at this position
350 score = BlockScore(block, residue);
351 if (score == DP_NEGATIVE_INFINITY)
352 continue;
353
354 // find max score of any allowed previous block
355 bestPrevScore = DP_NEGATIVE_INFINITY;
356 for (prevResidue=queryFrom; prevResidue<=lastPos[block - 1]; ++prevResidue) {
357
358 // current block must come after the previous block
359 if (residue < prevResidue + blocks->blockSizes[block - 1])
360 break;
361
362 // cut off at max loop length from previous block
363 if (residue > prevResidue + blocks->blockSizes[block - 1] + blocks->maxLoops[block - 1])
364 continue;
365
366 // keep maximum score
367 if (matrix[block - 1][prevResidue - queryFrom].score > bestPrevScore) {
368 bestPrevScore = matrix[block - 1][prevResidue - queryFrom].score;
369 tracebackResidue = prevResidue;
370 }
371 }
372
373 // extend alignment if the sum of this block's + previous block's score is positive
374 if (bestPrevScore > 0 && (sum=bestPrevScore+score) > 0) {
375 matrix[block][residue - queryFrom].score = sum;
376 matrix[block][residue - queryFrom].tracebackResidue = tracebackResidue;
377 }
378
379 // otherwise, start new alignment if score is positive
380 else if (score > 0)
381 matrix[block][residue - queryFrom].score = score;
382 }
383 }
384
385 return STRUCT_DP_OKAY;
386 }
387
388 // fill matrix values; return true on success. Matrix must have only default values when passed in.
CalculateLocalMatrixGeneric(Matrix & matrix,const DP_BlockInfo * blocks,DP_BlockScoreFunction BlockScore,DP_LoopPenaltyFunction LoopScore,unsigned int queryFrom,unsigned int queryTo)389 int CalculateLocalMatrixGeneric(Matrix& matrix,
390 const DP_BlockInfo *blocks, DP_BlockScoreFunction BlockScore, DP_LoopPenaltyFunction LoopScore,
391 unsigned int queryFrom, unsigned int queryTo)
392 {
393 unsigned int block, residue, prevResidue, loopPenalty,
394 lastBlock = blocks->nBlocks - 1, tracebackResidue = 0;
395 int score, sum, bestPrevScore;
396
397 // find last possible block positions, based purely on block lengths
398 vector < unsigned int > lastPos(blocks->nBlocks);
399 for (block=0; block<=lastBlock; ++block) {
400 if (blocks->blockSizes[block] > queryTo - queryFrom + 1) {
401 ERROR_MESSAGE("Block " << (block+1) << " too large for this query range");
402 return STRUCT_DP_PARAMETER_ERROR;
403 }
404 lastPos[block] = queryTo - blocks->blockSizes[block] + 1;
405 }
406
407 // first row: positive scores of first block at all possible positions
408 for (residue=queryFrom; residue<=lastPos[0]; ++residue) {
409 score = BlockScore(0, residue);
410 matrix[0][residue - queryFrom].score = (score > 0) ? score : 0;
411 }
412
413 // first column: positive scores of all blocks at first positions
414 for (block=1; block<=lastBlock; ++block) {
415 score = BlockScore(block, queryFrom);
416 matrix[block][0].score = (score > 0) ? score : 0;
417 }
418
419 // for each successive block, find the best positive scoring with a previous block, if any
420 for (block=1; block<=lastBlock; ++block) {
421 for (residue=queryFrom+1; residue<=lastPos[block]; ++residue) {
422
423 // get score at this position
424 score = BlockScore(block, residue);
425 if (score == DP_NEGATIVE_INFINITY)
426 continue;
427
428 // find max score of any allowed previous block
429 bestPrevScore = DP_NEGATIVE_INFINITY;
430 for (prevResidue=queryFrom; prevResidue<=lastPos[block - 1]; ++prevResidue) {
431
432 // current block must come after the previous block
433 if (residue < prevResidue + blocks->blockSizes[block - 1])
434 break;
435
436 // make sure previous block is at an allowed position
437 if (matrix[block - 1][prevResidue - queryFrom].score == DP_NEGATIVE_INFINITY)
438 continue;
439
440 // get loop score
441 loopPenalty = LoopScore(block - 1, residue - prevResidue - blocks->blockSizes[block - 1]);
442 if (loopPenalty == DP_POSITIVE_INFINITY)
443 continue;
444
445 // keep maximum score
446 sum = matrix[block - 1][prevResidue - queryFrom].score - loopPenalty;
447 if (sum > bestPrevScore) {
448 bestPrevScore = sum;
449 tracebackResidue = prevResidue;
450 }
451 }
452
453 // extend alignment if the sum of this block's + previous block's score is positive
454 if (bestPrevScore > 0 && (sum=bestPrevScore+score) > 0) {
455 matrix[block][residue - queryFrom].score = sum;
456 matrix[block][residue - queryFrom].tracebackResidue = tracebackResidue;
457 }
458
459 // otherwise, start new alignment if score is positive
460 else if (score > 0)
461 matrix[block][residue - queryFrom].score = score;
462 }
463 }
464
465 return STRUCT_DP_OKAY;
466 }
467
TracebackAlignment(const Matrix & matrix,unsigned int lastBlock,unsigned int lastBlockPos,unsigned int queryFrom,DP_AlignmentResult * alignment)468 int TracebackAlignment(const Matrix& matrix, unsigned int lastBlock, unsigned int lastBlockPos,
469 unsigned int queryFrom, DP_AlignmentResult *alignment)
470 {
471 // trace backwards from last block/pos
472 vector < unsigned int > blockPositions;
473 unsigned int block = lastBlock, pos = lastBlockPos;
474 do {
475 blockPositions.push_back(pos); // list is backwards after this...
476 pos = matrix[block][pos - queryFrom].tracebackResidue;
477 --block;
478 } while (pos != NO_TRACEBACK);
479 unsigned int firstBlock = block + 1; // last block traced to == first block of the alignment
480
481 // allocate and fill in alignment result structure
482 alignment->score = matrix[lastBlock][lastBlockPos - queryFrom].score;
483 alignment->firstBlock = firstBlock;
484 alignment->nBlocks = blockPositions.size();
485 alignment->blockPositions = new unsigned int[blockPositions.size()];
486 for (block=0; block<blockPositions.size(); ++block)
487 alignment->blockPositions[block] = blockPositions[lastBlock - firstBlock - block];
488
489 return STRUCT_DP_FOUND_ALIGNMENT;
490 }
491
TracebackGlobalAlignment(const Matrix & matrix,const DP_BlockInfo * blocks,unsigned int queryFrom,unsigned int queryTo,DP_AlignmentResult ** alignment)492 int TracebackGlobalAlignment(const Matrix& matrix,
493 const DP_BlockInfo *blocks, unsigned int queryFrom, unsigned int queryTo,
494 DP_AlignmentResult **alignment)
495 {
496 if (!alignment) {
497 ERROR_MESSAGE("TracebackGlobalAlignment() - NULL alignment handle");
498 return STRUCT_DP_PARAMETER_ERROR;
499 }
500 *alignment = NULL;
501
502 // find max score (e.g., best-scoring position of last block)
503 int score = DP_NEGATIVE_INFINITY;
504 unsigned int residue, lastBlockPos = 0;
505 for (residue=queryFrom; residue<=queryTo; ++residue) {
506 if (matrix[blocks->nBlocks - 1][residue - queryFrom].score > score) {
507 score = matrix[blocks->nBlocks - 1][residue - queryFrom].score;
508 lastBlockPos = residue;
509 }
510 }
511
512 if (score == DP_NEGATIVE_INFINITY) {
513 ERROR_MESSAGE("TracebackGlobalAlignment() - somehow failed to find any allowed global alignment");
514 return STRUCT_DP_ALGORITHM_ERROR;
515 }
516
517 // INFO_MESSAGE("Score of best global alignment: " << score);
518 *alignment = new DP_AlignmentResult;
519 return TracebackAlignment(matrix, blocks->nBlocks - 1, lastBlockPos, queryFrom, *alignment);
520 }
521
TracebackLocalAlignment(const Matrix & matrix,const DP_BlockInfo * blocks,unsigned int queryFrom,unsigned int queryTo,DP_AlignmentResult ** alignment)522 int TracebackLocalAlignment(const Matrix& matrix,
523 const DP_BlockInfo *blocks, unsigned int queryFrom, unsigned int queryTo,
524 DP_AlignmentResult **alignment)
525 {
526 if (!alignment) {
527 ERROR_MESSAGE("TracebackLocalAlignment() - NULL alignment handle");
528 return STRUCT_DP_PARAMETER_ERROR;
529 }
530 *alignment = NULL;
531
532 // find max score (e.g., best-scoring position of any block)
533 int score = DP_NEGATIVE_INFINITY;
534 unsigned int block, residue, lastBlock = 0, lastBlockPos = 0;
535 for (block=0; block<blocks->nBlocks; ++block) {
536 for (residue=queryFrom; residue<=queryTo; ++residue) {
537 if (matrix[block][residue - queryFrom].score > score) {
538 score = matrix[block][residue - queryFrom].score;
539 lastBlock = block;
540 lastBlockPos = residue;
541 }
542 }
543 }
544
545 if (score <= 0) {
546 // INFO_MESSAGE("No positive-scoring local alignment found.");
547 return STRUCT_DP_NO_ALIGNMENT;
548 }
549
550 // INFO_MESSAGE("Score of best local alignment: " << score);
551 *alignment = new DP_AlignmentResult;
552 return TracebackAlignment(matrix, lastBlock, lastBlockPos, queryFrom, *alignment);
553 }
554
555 typedef struct {
556 unsigned int block;
557 unsigned int residue;
558 } Traceback;
559
TracebackMultipleLocalAlignments(const Matrix & matrix,const DP_BlockInfo * blocks,unsigned int queryFrom,unsigned int queryTo,DP_MultipleAlignmentResults ** alignments,unsigned int maxAlignments)560 int TracebackMultipleLocalAlignments(const Matrix& matrix,
561 const DP_BlockInfo *blocks, unsigned int queryFrom, unsigned int queryTo,
562 DP_MultipleAlignmentResults **alignments, unsigned int maxAlignments)
563 {
564 if (!alignments) {
565 ERROR_MESSAGE("TracebackMultipleLocalAlignments() - NULL alignments handle");
566 return STRUCT_DP_PARAMETER_ERROR;
567 }
568 *alignments = NULL;
569
570 unsigned int block, residue;
571 vector < vector < bool > > usedCells(blocks->nBlocks);
572 for (block=0; block<blocks->nBlocks; ++block)
573 usedCells[block].resize(queryTo - queryFrom + 1, false);
574
575 // find N max scores
576 list < Traceback > tracebacks;
577 while (maxAlignments == 0 || tracebacks.size() < maxAlignments) {
578
579 // find next best scoring cell that's not part of an alignment already reported
580 int score = DP_NEGATIVE_INFINITY;
581 unsigned int lastBlock = 0, lastBlockPos = 0;
582 for (block=0; block<blocks->nBlocks; ++block) {
583 for (residue=queryFrom; residue<=queryTo; ++residue) {
584 if (!usedCells[block][residue - queryFrom] &&
585 matrix[block][residue - queryFrom].score > score)
586 {
587 score = matrix[block][residue - queryFrom].score;
588 lastBlock = block;
589 lastBlockPos = residue;
590 }
591 }
592 }
593 if (score <= 0)
594 break;
595
596 // mark cells of this alignment as used, and see if any part of this alignment
597 // has been reported before
598 block = lastBlock;
599 residue = lastBlockPos;
600 bool repeated = false;
601 do {
602 if (usedCells[block][residue - queryFrom]) {
603 repeated = true;
604 break;
605 } else {
606 usedCells[block][residue - queryFrom] = true;
607 }
608 residue = matrix[block][residue - queryFrom].tracebackResidue;
609 --block;
610 } while (residue != NO_TRACEBACK);
611 if (repeated)
612 continue;
613
614 // add traceback start to list
615 tracebacks.resize(tracebacks.size() + 1);
616 tracebacks.back().block = lastBlock;
617 tracebacks.back().residue = lastBlockPos;
618 }
619
620 if (tracebacks.size() == 0) {
621 // INFO_MESSAGE("No positive-scoring local alignment found.");
622 return STRUCT_DP_NO_ALIGNMENT;
623 }
624
625 // allocate result structure
626 *alignments = new DP_MultipleAlignmentResults;
627 (*alignments)->nAlignments = 0;
628 (*alignments)->alignments = new DP_AlignmentResult[tracebacks.size()];
629 unsigned int a;
630 for (a=0; a<tracebacks.size(); ++a)
631 (*alignments)->alignments[a].blockPositions = NULL;
632
633 // fill in results from tracebacks
634 list < Traceback >::const_iterator t, te = tracebacks.end();
635 for (t=tracebacks.begin(), a=0; t!=te; ++t, ++a) {
636 if (TracebackAlignment(matrix, t->block, t->residue, queryFrom, &((*alignments)->alignments[a]))
637 == STRUCT_DP_FOUND_ALIGNMENT)
638 {
639 ++((*alignments)->nAlignments);
640 // if (a == 0)
641 // INFO_MESSAGE("Score of best local alignment: " << (*alignments)->alignments[a].score);
642 // else
643 // INFO_MESSAGE("Score of next local alignment: " << (*alignments)->alignments[a].score);
644 } else
645 return STRUCT_DP_ALGORITHM_ERROR;
646 }
647
648 return STRUCT_DP_FOUND_ALIGNMENT;
649 }
650
651 END_SCOPE(struct_dp)
652
653
654 // leave the C-called functions outside any namespace, just in case that might
655 // cause any problems when linking to C code...
656 USING_SCOPE(struct_dp);
657
658
DP_GlobalBlockAlign(const DP_BlockInfo * blocks,DP_BlockScoreFunction BlockScore,unsigned int queryFrom,unsigned int queryTo,DP_AlignmentResult ** alignment)659 int DP_GlobalBlockAlign(
660 const DP_BlockInfo *blocks, DP_BlockScoreFunction BlockScore,
661 unsigned int queryFrom, unsigned int queryTo,
662 DP_AlignmentResult **alignment)
663 {
664 if (!blocks || blocks->nBlocks < 1 || !blocks->blockSizes || !BlockScore || queryTo < queryFrom) {
665 ERROR_MESSAGE("DP_GlobalBlockAlign() - invalid parameters");
666 return STRUCT_DP_PARAMETER_ERROR;
667 }
668
669 unsigned int i, sumBlockLen = 0;
670 for (i=0; i<blocks->nBlocks; ++i)
671 sumBlockLen += blocks->blockSizes[i];
672 if (sumBlockLen > queryTo - queryFrom + 1) {
673 ERROR_MESSAGE("DP_GlobalBlockAlign() - sum of block lengths longer than query region");
674 return STRUCT_DP_PARAMETER_ERROR;
675 }
676
677 int status = ValidateFrozenBlockPositions(blocks, queryFrom, queryTo, true);
678 if (status != STRUCT_DP_OKAY) {
679 ERROR_MESSAGE("DP_GlobalBlockAlign() - ValidateFrozenBlockPositions() returned error");
680 return status;
681 }
682
683 Matrix matrix(blocks->nBlocks, queryTo - queryFrom + 1);
684
685 status = CalculateGlobalMatrix(matrix, blocks, BlockScore, queryFrom, queryTo);
686 if (status != STRUCT_DP_OKAY) {
687 ERROR_MESSAGE("DP_GlobalBlockAlign() - CalculateGlobalMatrix() failed");
688 return status;
689 }
690
691 return TracebackGlobalAlignment(matrix, blocks, queryFrom, queryTo, alignment);
692 }
693
DP_GlobalBlockAlignGeneric(const DP_BlockInfo * blocks,DP_BlockScoreFunction BlockScore,DP_LoopPenaltyFunction LoopScore,unsigned int queryFrom,unsigned int queryTo,DP_AlignmentResult ** alignment)694 int DP_GlobalBlockAlignGeneric(
695 const DP_BlockInfo *blocks,
696 DP_BlockScoreFunction BlockScore, DP_LoopPenaltyFunction LoopScore,
697 unsigned int queryFrom, unsigned int queryTo,
698 DP_AlignmentResult **alignment)
699 {
700 if (!blocks || blocks->nBlocks < 1 || !blocks->blockSizes || queryTo < queryFrom ||
701 !BlockScore || !LoopScore) {
702 ERROR_MESSAGE("DP_GlobalBlockAlignGeneric() - invalid parameters");
703 return STRUCT_DP_PARAMETER_ERROR;
704 }
705
706 unsigned int i, sumBlockLen = 0;
707 for (i=0; i<blocks->nBlocks; ++i)
708 sumBlockLen += blocks->blockSizes[i];
709 if (sumBlockLen > queryTo - queryFrom + 1) {
710 ERROR_MESSAGE("DP_GlobalBlockAlignGeneric() - sum of block lengths longer than query region");
711 return STRUCT_DP_PARAMETER_ERROR;
712 }
713
714 int status = ValidateFrozenBlockPositions(blocks, queryFrom, queryTo, false);
715 if (status != STRUCT_DP_OKAY) {
716 ERROR_MESSAGE("DP_GlobalBlockAlignGeneric() - ValidateFrozenBlockPositions() returned error");
717 return status;
718 }
719
720 Matrix matrix(blocks->nBlocks, queryTo - queryFrom + 1);
721
722 status = CalculateGlobalMatrixGeneric(matrix, blocks, BlockScore, LoopScore, queryFrom, queryTo);
723 if (status != STRUCT_DP_OKAY) {
724 ERROR_MESSAGE("DP_GlobalBlockAlignGeneric() - CalculateGlobalMatrixGeneric() failed");
725 return status;
726 }
727
728 return TracebackGlobalAlignment(matrix, blocks, queryFrom, queryTo, alignment);
729 }
730
DP_LocalBlockAlign(const DP_BlockInfo * blocks,DP_BlockScoreFunction BlockScore,unsigned int queryFrom,unsigned int queryTo,DP_AlignmentResult ** alignment)731 int DP_LocalBlockAlign(
732 const DP_BlockInfo *blocks, DP_BlockScoreFunction BlockScore,
733 unsigned int queryFrom, unsigned int queryTo,
734 DP_AlignmentResult **alignment)
735 {
736 if (!blocks || blocks->nBlocks < 1 || !blocks->blockSizes || !BlockScore || queryTo < queryFrom) {
737 ERROR_MESSAGE("DP_LocalBlockAlign() - invalid parameters");
738 return STRUCT_DP_PARAMETER_ERROR;
739 }
740 for (unsigned int block=0; block<blocks->nBlocks; ++block) {
741 if (blocks->freezeBlocks[block] != DP_UNFROZEN_BLOCK) {
742 WARNING_MESSAGE("DP_LocalBlockAlign() - frozen block specifications are ignored...");
743 break;
744 }
745 }
746
747 Matrix matrix(blocks->nBlocks, queryTo - queryFrom + 1);
748
749 int status = CalculateLocalMatrix(matrix, blocks, BlockScore, queryFrom, queryTo);
750 if (status != STRUCT_DP_OKAY) {
751 ERROR_MESSAGE("DP_LocalBlockAlign() - CalculateLocalMatrix() failed");
752 return status;
753 }
754
755 return TracebackLocalAlignment(matrix, blocks, queryFrom, queryTo, alignment);
756 }
757
DP_LocalBlockAlignGeneric(const DP_BlockInfo * blocks,DP_BlockScoreFunction BlockScore,DP_LoopPenaltyFunction LoopScore,unsigned int queryFrom,unsigned int queryTo,DP_AlignmentResult ** alignment)758 int DP_LocalBlockAlignGeneric(
759 const DP_BlockInfo *blocks, DP_BlockScoreFunction BlockScore, DP_LoopPenaltyFunction LoopScore,
760 unsigned int queryFrom, unsigned int queryTo,
761 DP_AlignmentResult **alignment)
762 {
763 if (!blocks || blocks->nBlocks < 1 || !blocks->blockSizes || !BlockScore || queryTo < queryFrom) {
764 ERROR_MESSAGE("DP_LocalBlockAlignGeneric() - invalid parameters");
765 return STRUCT_DP_PARAMETER_ERROR;
766 }
767 for (unsigned int block=0; block<blocks->nBlocks; ++block) {
768 if (blocks->freezeBlocks[block] != DP_UNFROZEN_BLOCK) {
769 WARNING_MESSAGE("DP_LocalBlockAlignGeneric() - frozen block specifications are ignored...");
770 break;
771 }
772 }
773
774 Matrix matrix(blocks->nBlocks, queryTo - queryFrom + 1);
775
776 int status = CalculateLocalMatrixGeneric(matrix, blocks, BlockScore, LoopScore, queryFrom, queryTo);
777 if (status != STRUCT_DP_OKAY) {
778 ERROR_MESSAGE("DP_LocalBlockAlignGeneric() - CalculateLocalMatrixGeneric() failed");
779 return status;
780 }
781
782 return TracebackLocalAlignment(matrix, blocks, queryFrom, queryTo, alignment);
783 }
784
DP_MultipleLocalBlockAlign(const DP_BlockInfo * blocks,DP_BlockScoreFunction BlockScore,unsigned int queryFrom,unsigned int queryTo,DP_MultipleAlignmentResults ** alignments,unsigned int maxAlignments)785 int DP_MultipleLocalBlockAlign(
786 const DP_BlockInfo *blocks, DP_BlockScoreFunction BlockScore,
787 unsigned int queryFrom, unsigned int queryTo,
788 DP_MultipleAlignmentResults **alignments, unsigned int maxAlignments)
789 {
790 if (!blocks || blocks->nBlocks < 1 || !blocks->blockSizes || !BlockScore || queryTo < queryFrom) {
791 ERROR_MESSAGE("DP_MultipleLocalBlockAlign() - invalid parameters");
792 return STRUCT_DP_PARAMETER_ERROR;
793 }
794 for (unsigned int block=0; block<blocks->nBlocks; ++block) {
795 if (blocks->freezeBlocks[block] != DP_UNFROZEN_BLOCK) {
796 WARNING_MESSAGE("DP_MultipleLocalBlockAlign() - frozen block specifications are ignored...");
797 break;
798 }
799 }
800
801 Matrix matrix(blocks->nBlocks, queryTo - queryFrom + 1);
802
803 int status = CalculateLocalMatrix(matrix, blocks, BlockScore, queryFrom, queryTo);
804 if (status != STRUCT_DP_OKAY) {
805 ERROR_MESSAGE("DP_MultipleLocalBlockAlign() - CalculateLocalMatrix() failed");
806 return status;
807 }
808
809 return TracebackMultipleLocalAlignments(matrix, blocks, queryFrom, queryTo, alignments, maxAlignments);
810 }
811
DP_MultipleLocalBlockAlignGeneric(const DP_BlockInfo * blocks,DP_BlockScoreFunction BlockScore,DP_LoopPenaltyFunction LoopScore,unsigned int queryFrom,unsigned int queryTo,DP_MultipleAlignmentResults ** alignments,unsigned int maxAlignments)812 int DP_MultipleLocalBlockAlignGeneric(
813 const DP_BlockInfo *blocks, DP_BlockScoreFunction BlockScore, DP_LoopPenaltyFunction LoopScore,
814 unsigned int queryFrom, unsigned int queryTo,
815 DP_MultipleAlignmentResults **alignments, unsigned int maxAlignments)
816 {
817 if (!blocks || blocks->nBlocks < 1 || !blocks->blockSizes || !BlockScore || queryTo < queryFrom) {
818 ERROR_MESSAGE("DP_MultipleLocalBlockAlignGeneric() - invalid parameters");
819 return STRUCT_DP_PARAMETER_ERROR;
820 }
821 for (unsigned int block=0; block<blocks->nBlocks; ++block) {
822 if (blocks->freezeBlocks[block] != DP_UNFROZEN_BLOCK) {
823 WARNING_MESSAGE("DP_MultipleLocalBlockAlignGeneric() - frozen block specifications are ignored...");
824 break;
825 }
826 }
827
828 Matrix matrix(blocks->nBlocks, queryTo - queryFrom + 1);
829
830 int status = CalculateLocalMatrixGeneric(matrix, blocks, BlockScore, LoopScore, queryFrom, queryTo);
831 if (status != STRUCT_DP_OKAY) {
832 ERROR_MESSAGE("DP_MultipleLocalBlockAlignGeneric() - CalculateLocalMatrixGeneric() failed");
833 return status;
834 }
835
836 return TracebackMultipleLocalAlignments(matrix, blocks, queryFrom, queryTo, alignments, maxAlignments);
837 }
838
DP_CreateBlockInfo(unsigned int nBlocks)839 DP_BlockInfo * DP_CreateBlockInfo(unsigned int nBlocks)
840 {
841 DP_BlockInfo *blocks = new DP_BlockInfo;
842 blocks->nBlocks = nBlocks;
843 blocks->blockPositions = new unsigned int[nBlocks];
844 blocks->blockSizes = new unsigned int[nBlocks];
845 blocks->maxLoops = new unsigned int[nBlocks - 1];
846 blocks->freezeBlocks = new unsigned int[nBlocks];
847 for (unsigned int i=0; i<nBlocks; ++i)
848 blocks->freezeBlocks[i] = DP_UNFROZEN_BLOCK;
849 return blocks;
850 }
851
DP_DestroyBlockInfo(DP_BlockInfo * blocks)852 void DP_DestroyBlockInfo(DP_BlockInfo *blocks)
853 {
854 if (!blocks)
855 return;
856 delete[] blocks->blockPositions;
857 delete[] blocks->blockSizes;
858 delete[] blocks->maxLoops;
859 delete[] blocks->freezeBlocks;
860 delete blocks;
861 }
862
DP_DestroyAlignmentResult(DP_AlignmentResult * alignment)863 void DP_DestroyAlignmentResult(DP_AlignmentResult *alignment)
864 {
865 if (!alignment)
866 return;
867 delete[] alignment->blockPositions;
868 delete alignment;
869 }
870
DP_DestroyMultipleAlignmentResults(DP_MultipleAlignmentResults * alignments)871 void DP_DestroyMultipleAlignmentResults(DP_MultipleAlignmentResults *alignments)
872 {
873 if (!alignments) return;
874 for (unsigned int i=0; i<alignments->nAlignments; ++i)
875 if (alignments->alignments[i].blockPositions)
876 delete[] alignments->alignments[i].blockPositions;
877 delete[] alignments->alignments;
878 delete alignments;
879 }
880
DP_CalculateMaxLoopLength(unsigned int nLoops,const unsigned int * loops,double percentile,unsigned int extension,unsigned int cutoff)881 unsigned int DP_CalculateMaxLoopLength(
882 unsigned int nLoops, const unsigned int *loops,
883 double percentile, unsigned int extension, unsigned int cutoff)
884 {
885 vector < unsigned int > loopLengths(nLoops);
886 unsigned int index, max;
887 for (index=0; index<nLoops; ++index)
888 loopLengths[index] = loops[index];
889
890 stable_sort(loopLengths.begin(), loopLengths.end());
891
892 if (percentile < 1.0) {
893 index = (unsigned int)(percentile * (nLoops - 1) + 0.5);
894 max = loopLengths[index] + extension;
895 } else { // percentile >= 1.0
896 max = ((unsigned int)(percentile * loopLengths[nLoops - 1] + 0.5)) + extension;
897 }
898
899 if (cutoff > 0 && max > cutoff)
900 max = cutoff;
901
902 return max;
903 }
904