1 /*  $Id: block_align.cpp 103491 2007-05-04 17:18:18Z kazimird $
2 * ===========================================================================
3 *
4 *                            PUBLIC DOMAIN NOTICE
5 *               National Center for Biotechnology Information
6 *
7 *  This software/database is a "United States Government Work" under the
8 *  terms of the United States Copyright Act.  It was written as part of
9 *  the author's official duties as a United States Government employee and
10 *  thus cannot be copyrighted.  This software/database is freely available
11 *  to the public for use. The National Library of Medicine and the U.S.
12 *  Government have not placed any restriction on its use or reproduction.
13 *
14 *  Although all reasonable efforts have been taken to ensure the accuracy
15 *  and reliability of the software and data, the NLM and the U.S.
16 *  Government do not and cannot warrant the performance or results that
17 *  may be obtained by using this software or data. The NLM and the U.S.
18 *  Government disclaim all warranties, express or implied, including
19 *  warranties of performance, merchantability or fitness for any particular
20 *  purpose.
21 *
22 *  Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Authors:  Paul Thiessen
27 *
28 * File Description:
29 *      Dynamic programming-based alignment algorithms: block aligner
30 *
31 * ===========================================================================
32 */
33 
34 #include <ncbi_pch.hpp>
35 #include <corelib/ncbistd.hpp>
36 #include <corelib/ncbidiag.hpp>
37 
38 #include <vector>
39 #include <list>
40 #include <algorithm>
41 
42 #include <algo/structure/struct_dp/struct_dp.h>
43 
44 USING_NCBI_SCOPE;
45 
46 
47 BEGIN_SCOPE(struct_dp)
48 
49 #define ERROR_MESSAGE(s) ERR_POST(Error << "block_align: " << s << '!')
50 #define WARNING_MESSAGE(s) ERR_POST(Warning << "block_align: " << s)
51 #define INFO_MESSAGE(s) ERR_POST(Info << "block_align: " << s)
52 
53 #define NO_TRACEBACK kMax_UInt
54 
55 class Cell
56 {
57 public:
58     int score;
59     unsigned int tracebackResidue;
Cell(void)60     Cell(void) : score(DP_NEGATIVE_INFINITY), tracebackResidue(NO_TRACEBACK) { }
61 };
62 
63 class Matrix
64 {
65 public:
66     typedef vector < Cell > ResidueRow;
67     typedef vector < ResidueRow > Grid;
68     Grid grid;
Matrix(unsigned int nBlocks,unsigned int nResidues)69     Matrix(unsigned int nBlocks, unsigned int nResidues) : grid(nBlocks + 1)
70         { for (unsigned int i=0; i<nBlocks; ++i) grid[i].resize(nResidues + 1); }
operator [](unsigned int block)71     ResidueRow& operator [] (unsigned int block) { return grid[block]; }
operator [](unsigned int block) const72     const ResidueRow& operator [] (unsigned int block) const { return grid[block]; }
73 };
74 
75 // checks to make sure frozen block positions are legal (borrowing my own code from blockalign.c)
ValidateFrozenBlockPositions(const DP_BlockInfo * blocks,unsigned int queryFrom,unsigned int queryTo,bool checkGapSum)76 int ValidateFrozenBlockPositions(const DP_BlockInfo *blocks,
77     unsigned int queryFrom, unsigned int queryTo, bool checkGapSum)
78 {
79     static const unsigned int NONE = kMax_UInt;
80     unsigned int
81         block,
82         unfrozenBlocksLength = 0,
83         prevFrozenBlock = NONE,
84         maxGapsLength = 0;
85 
86     for (block=0; block<blocks->nBlocks; ++block) {
87 
88         /* keep track of max gap space between frozen blocks */
89         if (block > 0)
90             maxGapsLength += blocks->maxLoops[block - 1];
91 
92         /* to allow room for unfrozen blocks between frozen ones */
93         if (blocks->freezeBlocks[block] == DP_UNFROZEN_BLOCK) {
94             unfrozenBlocksLength += blocks->blockSizes[block];
95             continue;
96         }
97 
98         /* check absolute block end positions */
99         if (blocks->freezeBlocks[block] < queryFrom) {
100             ERROR_MESSAGE("Frozen block " << (block+1) << " can't start < " << (queryFrom+1));
101             return STRUCT_DP_PARAMETER_ERROR;
102         }
103         if (blocks->freezeBlocks[block] + blocks->blockSizes[block] - 1 > queryTo) {
104             ERROR_MESSAGE("Frozen block " << (block+1) << " can't end > " << (queryTo+1));
105             return STRUCT_DP_PARAMETER_ERROR;
106         }
107 
108         /* checks for legal distances between frozen blocks */
109         if (prevFrozenBlock != NONE) {
110             unsigned int prevFrozenBlockEnd =
111                 blocks->freezeBlocks[prevFrozenBlock] + blocks->blockSizes[prevFrozenBlock] - 1;
112 
113             /* check for adequate room for unfrozen blocks between frozen blocks */
114             if (blocks->freezeBlocks[block] <= (prevFrozenBlockEnd + unfrozenBlocksLength)) {
115                 ERROR_MESSAGE("Frozen block " << (block+1) << " starts before end of prior frozen block, "
116                     "or doesn't leave room for intervening unfrozen block(s)");
117                 return STRUCT_DP_PARAMETER_ERROR;
118             }
119 
120             /* check for too much gap space since last frozen block,
121                but if frozen blocks are adjacent, issue warning only */
122             if (checkGapSum &&
123                 blocks->freezeBlocks[block] > prevFrozenBlockEnd + 1 + unfrozenBlocksLength + maxGapsLength)
124             {
125                 if (prevFrozenBlock == block - 1) {
126                     WARNING_MESSAGE("Frozen block " << (block+1) << " is further than allowed loop length"
127                         " from adjacent frozen block " << (prevFrozenBlock+1));
128                 } else {
129                     ERROR_MESSAGE("Frozen block " << (block+1) << " is too far away from prior frozen block"
130                         " given allowed intervening loop length(s) " << maxGapsLength
131                         << " plus unfrozen block length(s) " << unfrozenBlocksLength);
132                     return STRUCT_DP_PARAMETER_ERROR;
133                 }
134             }
135         }
136 
137         /* reset counters after each frozen block */
138         prevFrozenBlock = block;
139         unfrozenBlocksLength = maxGapsLength = 0;
140     }
141 
142     return STRUCT_DP_OKAY;
143 }
144 
145 // fill matrix values; return true on success. Matrix must have only default values when passed in.
CalculateGlobalMatrix(Matrix & matrix,const DP_BlockInfo * blocks,DP_BlockScoreFunction BlockScore,unsigned int queryFrom,unsigned int queryTo)146 int CalculateGlobalMatrix(Matrix& matrix,
147     const DP_BlockInfo *blocks, DP_BlockScoreFunction BlockScore,
148     unsigned int queryFrom, unsigned int queryTo)
149 {
150     unsigned int block, residue, prevResidue, lastBlock = blocks->nBlocks - 1;
151     int score = 0, sum;
152 
153     // find possible block positions, based purely on block lengths
154     vector < unsigned int > firstPos(blocks->nBlocks), lastPos(blocks->nBlocks);
155     for (block=0; block<=lastBlock; ++block) {
156         if (block == 0) {
157             firstPos[0] = queryFrom;
158             lastPos[lastBlock] = queryTo - blocks->blockSizes[lastBlock] + 1;
159         } else {
160             firstPos[block] = firstPos[block - 1] + blocks->blockSizes[block - 1];
161             lastPos[lastBlock - block] =
162                 lastPos[lastBlock - block + 1] - blocks->blockSizes[lastBlock - block];
163         }
164     }
165 
166     // further restrict the search if blocks are frozen
167     for (block=0; block<=lastBlock; ++block) {
168         if (blocks->freezeBlocks[block] != DP_UNFROZEN_BLOCK) {
169             if (blocks->freezeBlocks[block] < firstPos[block] ||
170                 blocks->freezeBlocks[block] > lastPos[block])
171             {
172                 ERROR_MESSAGE("CalculateGlobalMatrix() - frozen block "
173                     << (block+1) << " does not leave room for unfrozen blocks");
174                 return STRUCT_DP_PARAMETER_ERROR;
175             }
176             firstPos[block] = lastPos[block] = blocks->freezeBlocks[block];
177         }
178     }
179 
180     // fill in first row with scores of first block at all possible positions
181     for (residue=firstPos[0]; residue<=lastPos[0]; ++residue)
182         matrix[0][residue - queryFrom].score = BlockScore(0, residue);
183 
184     // for each successive block, find the best allowed pairing of the block with the previous block
185     bool blockScoreCalculated;
186     for (block=1; block<=lastBlock; ++block) {
187         for (residue=firstPos[block]; residue<=lastPos[block]; ++residue) {
188             blockScoreCalculated = false;
189 
190             for (prevResidue=firstPos[block - 1]; prevResidue<=lastPos[block - 1]; ++prevResidue) {
191 
192                 // current block must come after the previous block
193                 if (residue < prevResidue + blocks->blockSizes[block - 1])
194                     break;
195 
196                 // cut off at max loop length from previous block, but not if both blocks are frozen
197                 if (residue > prevResidue + blocks->blockSizes[block - 1] + blocks->maxLoops[block - 1] &&
198                         (blocks->freezeBlocks[block] == DP_UNFROZEN_BLOCK ||
199                          blocks->freezeBlocks[block - 1] == DP_UNFROZEN_BLOCK))
200                     continue;
201 
202                 // make sure previous block is at an allowed position
203                 if (matrix[block - 1][prevResidue - queryFrom].score == DP_NEGATIVE_INFINITY)
204                     continue;
205 
206                 // get score at this position
207                 if (!blockScoreCalculated) {
208                     score = BlockScore(block, residue);
209                     if (score == DP_NEGATIVE_INFINITY)
210                         break;
211                     blockScoreCalculated = true;
212                 }
213 
214                 // find highest sum of scores for allowed pairing of this block with any previous
215                 sum = score + matrix[block - 1][prevResidue - queryFrom].score;
216                 if (sum > matrix[block][residue - queryFrom].score) {
217                     matrix[block][residue - queryFrom].score = sum;
218                     matrix[block][residue - queryFrom].tracebackResidue = prevResidue;
219                 }
220             }
221         }
222     }
223 
224     return STRUCT_DP_OKAY;
225 }
226 
227 // fill matrix values; return true on success. Matrix must have only default values when passed in.
CalculateGlobalMatrixGeneric(Matrix & matrix,const DP_BlockInfo * blocks,DP_BlockScoreFunction BlockScore,DP_LoopPenaltyFunction LoopScore,unsigned int queryFrom,unsigned int queryTo)228 int CalculateGlobalMatrixGeneric(Matrix& matrix,
229     const DP_BlockInfo *blocks, DP_BlockScoreFunction BlockScore, DP_LoopPenaltyFunction LoopScore,
230     unsigned int queryFrom, unsigned int queryTo)
231 {
232     unsigned int block, residue, prevResidue, lastBlock = blocks->nBlocks - 1;
233     int blockScore = 0, sum;
234     unsigned int loopPenalty;
235 
236     // find possible block positions, based purely on block lengths
237     vector < unsigned int > firstPos(blocks->nBlocks), lastPos(blocks->nBlocks);
238     for (block=0; block<=lastBlock; ++block) {
239         if (block == 0) {
240             firstPos[0] = queryFrom;
241             lastPos[lastBlock] = queryTo - blocks->blockSizes[lastBlock] + 1;
242         } else {
243             firstPos[block] = firstPos[block - 1] + blocks->blockSizes[block - 1];
244             lastPos[lastBlock - block] =
245                 lastPos[lastBlock - block + 1] - blocks->blockSizes[lastBlock - block];
246         }
247     }
248 
249     // further restrict the search if blocks are frozen
250     for (block=0; block<=lastBlock; ++block) {
251         if (blocks->freezeBlocks[block] != DP_UNFROZEN_BLOCK) {
252             if (blocks->freezeBlocks[block] < firstPos[block] ||
253                 blocks->freezeBlocks[block] > lastPos[block])
254             {
255                 ERROR_MESSAGE("CalculateGlobalMatrix() - frozen block "
256                     << (block+1) << " does not leave room for unfrozen blocks");
257                 return STRUCT_DP_PARAMETER_ERROR;
258             }
259             firstPos[block] = lastPos[block] = blocks->freezeBlocks[block];
260         }
261     }
262 
263     // fill in first row with scores of first block at all possible positions
264     for (residue=firstPos[0]; residue<=lastPos[0]; ++residue)
265         matrix[0][residue - queryFrom].score = BlockScore(0, residue);
266 
267     // for each successive block, find the best allowed pairing of the block with the previous block
268     bool blockScoreCalculated;
269     for (block=1; block<=lastBlock; ++block) {
270         for (residue=firstPos[block]; residue<=lastPos[block]; ++residue) {
271             blockScoreCalculated = false;
272 
273             for (prevResidue=firstPos[block - 1]; prevResidue<=lastPos[block - 1]; ++prevResidue) {
274 
275                 // current block must come after the previous block
276                 if (residue < prevResidue + blocks->blockSizes[block - 1])
277                     break;
278 
279                 // make sure previous block is at an allowed position
280                 if (matrix[block - 1][prevResidue - queryFrom].score == DP_NEGATIVE_INFINITY)
281                     continue;
282 
283                 // get loop score at this position; assume loop score zero if both frozen
284                 if (blocks->freezeBlocks[block] != DP_UNFROZEN_BLOCK &&
285                     blocks->freezeBlocks[block - 1] != DP_UNFROZEN_BLOCK)
286                 {
287                     loopPenalty = 0;
288                 } else {
289                     loopPenalty = LoopScore(block - 1, residue - prevResidue - blocks->blockSizes[block - 1]);
290                     if (loopPenalty == DP_POSITIVE_INFINITY)
291                         continue;
292                 }
293 
294                 // get score at this position
295                 if (!blockScoreCalculated) {
296                     blockScore = BlockScore(block, residue);
297                     if (blockScore == DP_NEGATIVE_INFINITY)
298                         break;
299                     blockScoreCalculated = true;
300                 }
301 
302                 // find highest sum of scores + loop score for allowed pairing of this block with previous
303                 sum = blockScore + matrix[block - 1][prevResidue - queryFrom].score - loopPenalty;
304                 if (sum > matrix[block][residue - queryFrom].score) {
305                     matrix[block][residue - queryFrom].score = sum;
306                     matrix[block][residue - queryFrom].tracebackResidue = prevResidue;
307                 }
308             }
309         }
310     }
311 
312     return STRUCT_DP_OKAY;
313 }
314 
315 // fill matrix values; return true on success. Matrix must have only default values when passed in.
CalculateLocalMatrix(Matrix & matrix,const DP_BlockInfo * blocks,DP_BlockScoreFunction BlockScore,unsigned int queryFrom,unsigned int queryTo)316 int CalculateLocalMatrix(Matrix& matrix,
317     const DP_BlockInfo *blocks, DP_BlockScoreFunction BlockScore,
318     unsigned int queryFrom, unsigned int queryTo)
319 {
320     unsigned int block, residue, prevResidue, lastBlock = blocks->nBlocks - 1, tracebackResidue = 0;
321     int score, sum, bestPrevScore;
322 
323     // find last possible block positions, based purely on block lengths
324     vector < unsigned int > lastPos(blocks->nBlocks);
325     for (block=0; block<=lastBlock; ++block) {
326         if (blocks->blockSizes[block] > queryTo - queryFrom + 1) {
327             ERROR_MESSAGE("Block " << (block+1) << " too large for this query range");
328             return STRUCT_DP_PARAMETER_ERROR;
329         }
330         lastPos[block] = queryTo - blocks->blockSizes[block] + 1;
331     }
332 
333     // first row: positive scores of first block at all possible positions
334     for (residue=queryFrom; residue<=lastPos[0]; ++residue) {
335         score = BlockScore(0, residue);
336         matrix[0][residue - queryFrom].score = (score > 0) ? score : 0;
337     }
338 
339     // first column: positive scores of all blocks at first positions
340     for (block=1; block<=lastBlock; ++block) {
341         score = BlockScore(block, queryFrom);
342         matrix[block][0].score = (score > 0) ? score : 0;
343     }
344 
345     // for each successive block, find the best positive scoring with a previous block, if any
346     for (block=1; block<=lastBlock; ++block) {
347         for (residue=queryFrom+1; residue<=lastPos[block]; ++residue) {
348 
349             // get score at this position
350             score = BlockScore(block, residue);
351             if (score == DP_NEGATIVE_INFINITY)
352                 continue;
353 
354             // find max score of any allowed previous block
355             bestPrevScore = DP_NEGATIVE_INFINITY;
356             for (prevResidue=queryFrom; prevResidue<=lastPos[block - 1]; ++prevResidue) {
357 
358                 // current block must come after the previous block
359                 if (residue < prevResidue + blocks->blockSizes[block - 1])
360                     break;
361 
362                 // cut off at max loop length from previous block
363                 if (residue > prevResidue + blocks->blockSizes[block - 1] + blocks->maxLoops[block - 1])
364                     continue;
365 
366                 // keep maximum score
367                 if (matrix[block - 1][prevResidue - queryFrom].score > bestPrevScore) {
368                     bestPrevScore = matrix[block - 1][prevResidue - queryFrom].score;
369                     tracebackResidue = prevResidue;
370                 }
371             }
372 
373             // extend alignment if the sum of this block's + previous block's score is positive
374             if (bestPrevScore > 0 && (sum=bestPrevScore+score) > 0) {
375                 matrix[block][residue - queryFrom].score = sum;
376                 matrix[block][residue - queryFrom].tracebackResidue = tracebackResidue;
377             }
378 
379             // otherwise, start new alignment if score is positive
380             else if (score > 0)
381                 matrix[block][residue - queryFrom].score = score;
382         }
383     }
384 
385     return STRUCT_DP_OKAY;
386 }
387 
388 // fill matrix values; return true on success. Matrix must have only default values when passed in.
CalculateLocalMatrixGeneric(Matrix & matrix,const DP_BlockInfo * blocks,DP_BlockScoreFunction BlockScore,DP_LoopPenaltyFunction LoopScore,unsigned int queryFrom,unsigned int queryTo)389 int CalculateLocalMatrixGeneric(Matrix& matrix,
390     const DP_BlockInfo *blocks, DP_BlockScoreFunction BlockScore, DP_LoopPenaltyFunction LoopScore,
391     unsigned int queryFrom, unsigned int queryTo)
392 {
393     unsigned int block, residue, prevResidue, loopPenalty,
394         lastBlock = blocks->nBlocks - 1, tracebackResidue = 0;
395     int score, sum, bestPrevScore;
396 
397     // find last possible block positions, based purely on block lengths
398     vector < unsigned int > lastPos(blocks->nBlocks);
399     for (block=0; block<=lastBlock; ++block) {
400         if (blocks->blockSizes[block] > queryTo - queryFrom + 1) {
401             ERROR_MESSAGE("Block " << (block+1) << " too large for this query range");
402             return STRUCT_DP_PARAMETER_ERROR;
403         }
404         lastPos[block] = queryTo - blocks->blockSizes[block] + 1;
405     }
406 
407     // first row: positive scores of first block at all possible positions
408     for (residue=queryFrom; residue<=lastPos[0]; ++residue) {
409         score = BlockScore(0, residue);
410         matrix[0][residue - queryFrom].score = (score > 0) ? score : 0;
411     }
412 
413     // first column: positive scores of all blocks at first positions
414     for (block=1; block<=lastBlock; ++block) {
415         score = BlockScore(block, queryFrom);
416         matrix[block][0].score = (score > 0) ? score : 0;
417     }
418 
419     // for each successive block, find the best positive scoring with a previous block, if any
420     for (block=1; block<=lastBlock; ++block) {
421         for (residue=queryFrom+1; residue<=lastPos[block]; ++residue) {
422 
423             // get score at this position
424             score = BlockScore(block, residue);
425             if (score == DP_NEGATIVE_INFINITY)
426                 continue;
427 
428             // find max score of any allowed previous block
429             bestPrevScore = DP_NEGATIVE_INFINITY;
430             for (prevResidue=queryFrom; prevResidue<=lastPos[block - 1]; ++prevResidue) {
431 
432                 // current block must come after the previous block
433                 if (residue < prevResidue + blocks->blockSizes[block - 1])
434                     break;
435 
436                 // make sure previous block is at an allowed position
437                 if (matrix[block - 1][prevResidue - queryFrom].score == DP_NEGATIVE_INFINITY)
438                     continue;
439 
440                 // get loop score
441                 loopPenalty = LoopScore(block - 1, residue - prevResidue - blocks->blockSizes[block - 1]);
442                 if (loopPenalty == DP_POSITIVE_INFINITY)
443                     continue;
444 
445                 // keep maximum score
446                 sum = matrix[block - 1][prevResidue - queryFrom].score - loopPenalty;
447                 if (sum > bestPrevScore) {
448                     bestPrevScore = sum;
449                     tracebackResidue = prevResidue;
450                 }
451             }
452 
453             // extend alignment if the sum of this block's + previous block's score is positive
454             if (bestPrevScore > 0 && (sum=bestPrevScore+score) > 0) {
455                 matrix[block][residue - queryFrom].score = sum;
456                 matrix[block][residue - queryFrom].tracebackResidue = tracebackResidue;
457             }
458 
459             // otherwise, start new alignment if score is positive
460             else if (score > 0)
461                 matrix[block][residue - queryFrom].score = score;
462         }
463     }
464 
465     return STRUCT_DP_OKAY;
466 }
467 
TracebackAlignment(const Matrix & matrix,unsigned int lastBlock,unsigned int lastBlockPos,unsigned int queryFrom,DP_AlignmentResult * alignment)468 int TracebackAlignment(const Matrix& matrix, unsigned int lastBlock, unsigned int lastBlockPos,
469     unsigned int queryFrom, DP_AlignmentResult *alignment)
470 {
471     // trace backwards from last block/pos
472     vector < unsigned int > blockPositions;
473     unsigned int block = lastBlock, pos = lastBlockPos;
474     do {
475         blockPositions.push_back(pos);  // list is backwards after this...
476         pos = matrix[block][pos - queryFrom].tracebackResidue;
477         --block;
478     } while (pos != NO_TRACEBACK);
479     unsigned int firstBlock = block + 1; // last block traced to == first block of the alignment
480 
481     // allocate and fill in alignment result structure
482     alignment->score = matrix[lastBlock][lastBlockPos - queryFrom].score;
483     alignment->firstBlock = firstBlock;
484     alignment->nBlocks = blockPositions.size();
485     alignment->blockPositions = new unsigned int[blockPositions.size()];
486     for (block=0; block<blockPositions.size(); ++block)
487         alignment->blockPositions[block] = blockPositions[lastBlock - firstBlock - block];
488 
489     return STRUCT_DP_FOUND_ALIGNMENT;
490 }
491 
TracebackGlobalAlignment(const Matrix & matrix,const DP_BlockInfo * blocks,unsigned int queryFrom,unsigned int queryTo,DP_AlignmentResult ** alignment)492 int TracebackGlobalAlignment(const Matrix& matrix,
493     const DP_BlockInfo *blocks, unsigned int queryFrom, unsigned int queryTo,
494     DP_AlignmentResult **alignment)
495 {
496     if (!alignment) {
497         ERROR_MESSAGE("TracebackGlobalAlignment() - NULL alignment handle");
498         return STRUCT_DP_PARAMETER_ERROR;
499     }
500     *alignment = NULL;
501 
502     // find max score (e.g., best-scoring position of last block)
503     int score = DP_NEGATIVE_INFINITY;
504     unsigned int residue, lastBlockPos = 0;
505     for (residue=queryFrom; residue<=queryTo; ++residue) {
506         if (matrix[blocks->nBlocks - 1][residue - queryFrom].score > score) {
507             score = matrix[blocks->nBlocks - 1][residue - queryFrom].score;
508             lastBlockPos = residue;
509         }
510     }
511 
512     if (score == DP_NEGATIVE_INFINITY) {
513         ERROR_MESSAGE("TracebackGlobalAlignment() - somehow failed to find any allowed global alignment");
514         return STRUCT_DP_ALGORITHM_ERROR;
515     }
516 
517 //    INFO_MESSAGE("Score of best global alignment: " << score);
518     *alignment = new DP_AlignmentResult;
519     return TracebackAlignment(matrix, blocks->nBlocks - 1, lastBlockPos, queryFrom, *alignment);
520 }
521 
TracebackLocalAlignment(const Matrix & matrix,const DP_BlockInfo * blocks,unsigned int queryFrom,unsigned int queryTo,DP_AlignmentResult ** alignment)522 int TracebackLocalAlignment(const Matrix& matrix,
523     const DP_BlockInfo *blocks, unsigned int queryFrom, unsigned int queryTo,
524     DP_AlignmentResult **alignment)
525 {
526     if (!alignment) {
527         ERROR_MESSAGE("TracebackLocalAlignment() - NULL alignment handle");
528         return STRUCT_DP_PARAMETER_ERROR;
529     }
530     *alignment = NULL;
531 
532     // find max score (e.g., best-scoring position of any block)
533     int score = DP_NEGATIVE_INFINITY;
534     unsigned int block, residue, lastBlock = 0, lastBlockPos = 0;
535     for (block=0; block<blocks->nBlocks; ++block) {
536         for (residue=queryFrom; residue<=queryTo; ++residue) {
537             if (matrix[block][residue - queryFrom].score > score) {
538                 score = matrix[block][residue - queryFrom].score;
539                 lastBlock = block;
540                 lastBlockPos = residue;
541             }
542         }
543     }
544 
545     if (score <= 0) {
546 //        INFO_MESSAGE("No positive-scoring local alignment found.");
547         return STRUCT_DP_NO_ALIGNMENT;
548     }
549 
550 //    INFO_MESSAGE("Score of best local alignment: " << score);
551     *alignment = new DP_AlignmentResult;
552     return TracebackAlignment(matrix, lastBlock, lastBlockPos, queryFrom, *alignment);
553 }
554 
555 typedef struct {
556     unsigned int block;
557     unsigned int residue;
558 } Traceback;
559 
TracebackMultipleLocalAlignments(const Matrix & matrix,const DP_BlockInfo * blocks,unsigned int queryFrom,unsigned int queryTo,DP_MultipleAlignmentResults ** alignments,unsigned int maxAlignments)560 int TracebackMultipleLocalAlignments(const Matrix& matrix,
561     const DP_BlockInfo *blocks, unsigned int queryFrom, unsigned int queryTo,
562     DP_MultipleAlignmentResults **alignments, unsigned int maxAlignments)
563 {
564     if (!alignments) {
565         ERROR_MESSAGE("TracebackMultipleLocalAlignments() - NULL alignments handle");
566         return STRUCT_DP_PARAMETER_ERROR;
567     }
568     *alignments = NULL;
569 
570     unsigned int block, residue;
571     vector < vector < bool > > usedCells(blocks->nBlocks);
572     for (block=0; block<blocks->nBlocks; ++block)
573         usedCells[block].resize(queryTo - queryFrom + 1, false);
574 
575     // find N max scores
576     list < Traceback > tracebacks;
577     while (maxAlignments == 0 || tracebacks.size() < maxAlignments) {
578 
579         // find next best scoring cell that's not part of an alignment already reported
580         int score = DP_NEGATIVE_INFINITY;
581         unsigned int lastBlock = 0, lastBlockPos = 0;
582         for (block=0; block<blocks->nBlocks; ++block) {
583             for (residue=queryFrom; residue<=queryTo; ++residue) {
584                 if (!usedCells[block][residue - queryFrom] &&
585                     matrix[block][residue - queryFrom].score > score)
586                 {
587                     score = matrix[block][residue - queryFrom].score;
588                     lastBlock = block;
589                     lastBlockPos = residue;
590                 }
591             }
592         }
593         if (score <= 0)
594             break;
595 
596         // mark cells of this alignment as used, and see if any part of this alignment
597         // has been reported before
598         block = lastBlock;
599         residue = lastBlockPos;
600         bool repeated = false;
601         do {
602             if (usedCells[block][residue - queryFrom]) {
603                 repeated = true;
604                 break;
605             } else {
606                 usedCells[block][residue - queryFrom] = true;
607             }
608             residue = matrix[block][residue - queryFrom].tracebackResidue;
609             --block;
610         } while (residue != NO_TRACEBACK);
611         if (repeated)
612             continue;
613 
614         // add traceback start to list
615         tracebacks.resize(tracebacks.size() + 1);
616         tracebacks.back().block = lastBlock;
617         tracebacks.back().residue = lastBlockPos;
618     }
619 
620     if (tracebacks.size() == 0) {
621 //        INFO_MESSAGE("No positive-scoring local alignment found.");
622         return STRUCT_DP_NO_ALIGNMENT;
623     }
624 
625     // allocate result structure
626     *alignments = new DP_MultipleAlignmentResults;
627     (*alignments)->nAlignments = 0;
628     (*alignments)->alignments = new DP_AlignmentResult[tracebacks.size()];
629     unsigned int a;
630     for (a=0; a<tracebacks.size(); ++a)
631         (*alignments)->alignments[a].blockPositions = NULL;
632 
633     // fill in results from tracebacks
634     list < Traceback >::const_iterator t, te = tracebacks.end();
635     for (t=tracebacks.begin(), a=0; t!=te; ++t, ++a) {
636         if (TracebackAlignment(matrix, t->block, t->residue, queryFrom, &((*alignments)->alignments[a]))
637                 == STRUCT_DP_FOUND_ALIGNMENT)
638         {
639             ++((*alignments)->nAlignments);
640 //            if (a == 0)
641 //                INFO_MESSAGE("Score of best local alignment: " << (*alignments)->alignments[a].score);
642 //            else
643 //                INFO_MESSAGE("Score of next local alignment: " << (*alignments)->alignments[a].score);
644         } else
645             return STRUCT_DP_ALGORITHM_ERROR;
646     }
647 
648     return STRUCT_DP_FOUND_ALIGNMENT;
649 }
650 
651 END_SCOPE(struct_dp)
652 
653 
654 // leave the C-called functions outside any namespace, just in case that might
655 // cause any problems when linking to C code...
656 USING_SCOPE(struct_dp);
657 
658 
DP_GlobalBlockAlign(const DP_BlockInfo * blocks,DP_BlockScoreFunction BlockScore,unsigned int queryFrom,unsigned int queryTo,DP_AlignmentResult ** alignment)659 int DP_GlobalBlockAlign(
660     const DP_BlockInfo *blocks, DP_BlockScoreFunction BlockScore,
661     unsigned int queryFrom, unsigned int queryTo,
662     DP_AlignmentResult **alignment)
663 {
664     if (!blocks || blocks->nBlocks < 1 || !blocks->blockSizes || !BlockScore || queryTo < queryFrom) {
665         ERROR_MESSAGE("DP_GlobalBlockAlign() - invalid parameters");
666         return STRUCT_DP_PARAMETER_ERROR;
667     }
668 
669     unsigned int i, sumBlockLen = 0;
670     for (i=0; i<blocks->nBlocks; ++i)
671         sumBlockLen += blocks->blockSizes[i];
672     if (sumBlockLen > queryTo - queryFrom + 1) {
673         ERROR_MESSAGE("DP_GlobalBlockAlign() - sum of block lengths longer than query region");
674         return STRUCT_DP_PARAMETER_ERROR;
675     }
676 
677     int status = ValidateFrozenBlockPositions(blocks, queryFrom, queryTo, true);
678     if (status != STRUCT_DP_OKAY) {
679         ERROR_MESSAGE("DP_GlobalBlockAlign() - ValidateFrozenBlockPositions() returned error");
680         return status;
681     }
682 
683     Matrix matrix(blocks->nBlocks, queryTo - queryFrom + 1);
684 
685     status = CalculateGlobalMatrix(matrix, blocks, BlockScore, queryFrom, queryTo);
686     if (status != STRUCT_DP_OKAY) {
687         ERROR_MESSAGE("DP_GlobalBlockAlign() - CalculateGlobalMatrix() failed");
688         return status;
689     }
690 
691     return TracebackGlobalAlignment(matrix, blocks, queryFrom, queryTo, alignment);
692 }
693 
DP_GlobalBlockAlignGeneric(const DP_BlockInfo * blocks,DP_BlockScoreFunction BlockScore,DP_LoopPenaltyFunction LoopScore,unsigned int queryFrom,unsigned int queryTo,DP_AlignmentResult ** alignment)694 int DP_GlobalBlockAlignGeneric(
695     const DP_BlockInfo *blocks,
696     DP_BlockScoreFunction BlockScore, DP_LoopPenaltyFunction LoopScore,
697     unsigned int queryFrom, unsigned int queryTo,
698     DP_AlignmentResult **alignment)
699 {
700     if (!blocks || blocks->nBlocks < 1 || !blocks->blockSizes || queryTo < queryFrom ||
701             !BlockScore || !LoopScore) {
702         ERROR_MESSAGE("DP_GlobalBlockAlignGeneric() - invalid parameters");
703         return STRUCT_DP_PARAMETER_ERROR;
704     }
705 
706     unsigned int i, sumBlockLen = 0;
707     for (i=0; i<blocks->nBlocks; ++i)
708         sumBlockLen += blocks->blockSizes[i];
709     if (sumBlockLen > queryTo - queryFrom + 1) {
710         ERROR_MESSAGE("DP_GlobalBlockAlignGeneric() - sum of block lengths longer than query region");
711         return STRUCT_DP_PARAMETER_ERROR;
712     }
713 
714     int status = ValidateFrozenBlockPositions(blocks, queryFrom, queryTo, false);
715     if (status != STRUCT_DP_OKAY) {
716         ERROR_MESSAGE("DP_GlobalBlockAlignGeneric() - ValidateFrozenBlockPositions() returned error");
717         return status;
718     }
719 
720     Matrix matrix(blocks->nBlocks, queryTo - queryFrom + 1);
721 
722     status = CalculateGlobalMatrixGeneric(matrix, blocks, BlockScore, LoopScore, queryFrom, queryTo);
723     if (status != STRUCT_DP_OKAY) {
724         ERROR_MESSAGE("DP_GlobalBlockAlignGeneric() - CalculateGlobalMatrixGeneric() failed");
725         return status;
726     }
727 
728     return TracebackGlobalAlignment(matrix, blocks, queryFrom, queryTo, alignment);
729 }
730 
DP_LocalBlockAlign(const DP_BlockInfo * blocks,DP_BlockScoreFunction BlockScore,unsigned int queryFrom,unsigned int queryTo,DP_AlignmentResult ** alignment)731 int DP_LocalBlockAlign(
732     const DP_BlockInfo *blocks, DP_BlockScoreFunction BlockScore,
733     unsigned int queryFrom, unsigned int queryTo,
734     DP_AlignmentResult **alignment)
735 {
736     if (!blocks || blocks->nBlocks < 1 || !blocks->blockSizes || !BlockScore || queryTo < queryFrom) {
737         ERROR_MESSAGE("DP_LocalBlockAlign() - invalid parameters");
738         return STRUCT_DP_PARAMETER_ERROR;
739     }
740     for (unsigned int block=0; block<blocks->nBlocks; ++block) {
741         if (blocks->freezeBlocks[block] != DP_UNFROZEN_BLOCK) {
742             WARNING_MESSAGE("DP_LocalBlockAlign() - frozen block specifications are ignored...");
743             break;
744         }
745     }
746 
747     Matrix matrix(blocks->nBlocks, queryTo - queryFrom + 1);
748 
749     int status = CalculateLocalMatrix(matrix, blocks, BlockScore, queryFrom, queryTo);
750     if (status != STRUCT_DP_OKAY) {
751         ERROR_MESSAGE("DP_LocalBlockAlign() - CalculateLocalMatrix() failed");
752         return status;
753     }
754 
755     return TracebackLocalAlignment(matrix, blocks, queryFrom, queryTo, alignment);
756 }
757 
DP_LocalBlockAlignGeneric(const DP_BlockInfo * blocks,DP_BlockScoreFunction BlockScore,DP_LoopPenaltyFunction LoopScore,unsigned int queryFrom,unsigned int queryTo,DP_AlignmentResult ** alignment)758 int DP_LocalBlockAlignGeneric(
759     const DP_BlockInfo *blocks, DP_BlockScoreFunction BlockScore, DP_LoopPenaltyFunction LoopScore,
760     unsigned int queryFrom, unsigned int queryTo,
761     DP_AlignmentResult **alignment)
762 {
763     if (!blocks || blocks->nBlocks < 1 || !blocks->blockSizes || !BlockScore || queryTo < queryFrom) {
764         ERROR_MESSAGE("DP_LocalBlockAlignGeneric() - invalid parameters");
765         return STRUCT_DP_PARAMETER_ERROR;
766     }
767     for (unsigned int block=0; block<blocks->nBlocks; ++block) {
768         if (blocks->freezeBlocks[block] != DP_UNFROZEN_BLOCK) {
769             WARNING_MESSAGE("DP_LocalBlockAlignGeneric() - frozen block specifications are ignored...");
770             break;
771         }
772     }
773 
774     Matrix matrix(blocks->nBlocks, queryTo - queryFrom + 1);
775 
776     int status = CalculateLocalMatrixGeneric(matrix, blocks, BlockScore, LoopScore, queryFrom, queryTo);
777     if (status != STRUCT_DP_OKAY) {
778         ERROR_MESSAGE("DP_LocalBlockAlignGeneric() - CalculateLocalMatrixGeneric() failed");
779         return status;
780     }
781 
782     return TracebackLocalAlignment(matrix, blocks, queryFrom, queryTo, alignment);
783 }
784 
DP_MultipleLocalBlockAlign(const DP_BlockInfo * blocks,DP_BlockScoreFunction BlockScore,unsigned int queryFrom,unsigned int queryTo,DP_MultipleAlignmentResults ** alignments,unsigned int maxAlignments)785 int DP_MultipleLocalBlockAlign(
786     const DP_BlockInfo *blocks, DP_BlockScoreFunction BlockScore,
787     unsigned int queryFrom, unsigned int queryTo,
788     DP_MultipleAlignmentResults **alignments, unsigned int maxAlignments)
789 {
790     if (!blocks || blocks->nBlocks < 1 || !blocks->blockSizes || !BlockScore || queryTo < queryFrom) {
791         ERROR_MESSAGE("DP_MultipleLocalBlockAlign() - invalid parameters");
792         return STRUCT_DP_PARAMETER_ERROR;
793     }
794     for (unsigned int block=0; block<blocks->nBlocks; ++block) {
795         if (blocks->freezeBlocks[block] != DP_UNFROZEN_BLOCK) {
796             WARNING_MESSAGE("DP_MultipleLocalBlockAlign() - frozen block specifications are ignored...");
797             break;
798         }
799     }
800 
801     Matrix matrix(blocks->nBlocks, queryTo - queryFrom + 1);
802 
803     int status = CalculateLocalMatrix(matrix, blocks, BlockScore, queryFrom, queryTo);
804     if (status != STRUCT_DP_OKAY) {
805         ERROR_MESSAGE("DP_MultipleLocalBlockAlign() - CalculateLocalMatrix() failed");
806         return status;
807     }
808 
809     return TracebackMultipleLocalAlignments(matrix, blocks, queryFrom, queryTo, alignments, maxAlignments);
810 }
811 
DP_MultipleLocalBlockAlignGeneric(const DP_BlockInfo * blocks,DP_BlockScoreFunction BlockScore,DP_LoopPenaltyFunction LoopScore,unsigned int queryFrom,unsigned int queryTo,DP_MultipleAlignmentResults ** alignments,unsigned int maxAlignments)812 int DP_MultipleLocalBlockAlignGeneric(
813     const DP_BlockInfo *blocks, DP_BlockScoreFunction BlockScore, DP_LoopPenaltyFunction LoopScore,
814     unsigned int queryFrom, unsigned int queryTo,
815     DP_MultipleAlignmentResults **alignments, unsigned int maxAlignments)
816 {
817     if (!blocks || blocks->nBlocks < 1 || !blocks->blockSizes || !BlockScore || queryTo < queryFrom) {
818         ERROR_MESSAGE("DP_MultipleLocalBlockAlignGeneric() - invalid parameters");
819         return STRUCT_DP_PARAMETER_ERROR;
820     }
821     for (unsigned int block=0; block<blocks->nBlocks; ++block) {
822         if (blocks->freezeBlocks[block] != DP_UNFROZEN_BLOCK) {
823             WARNING_MESSAGE("DP_MultipleLocalBlockAlignGeneric() - frozen block specifications are ignored...");
824             break;
825         }
826     }
827 
828     Matrix matrix(blocks->nBlocks, queryTo - queryFrom + 1);
829 
830     int status = CalculateLocalMatrixGeneric(matrix, blocks, BlockScore, LoopScore, queryFrom, queryTo);
831     if (status != STRUCT_DP_OKAY) {
832         ERROR_MESSAGE("DP_MultipleLocalBlockAlignGeneric() - CalculateLocalMatrixGeneric() failed");
833         return status;
834     }
835 
836     return TracebackMultipleLocalAlignments(matrix, blocks, queryFrom, queryTo, alignments, maxAlignments);
837 }
838 
DP_CreateBlockInfo(unsigned int nBlocks)839 DP_BlockInfo * DP_CreateBlockInfo(unsigned int nBlocks)
840 {
841     DP_BlockInfo *blocks = new DP_BlockInfo;
842     blocks->nBlocks = nBlocks;
843     blocks->blockPositions = new unsigned int[nBlocks];
844     blocks->blockSizes = new unsigned int[nBlocks];
845     blocks->maxLoops = new unsigned int[nBlocks - 1];
846     blocks->freezeBlocks = new unsigned int[nBlocks];
847     for (unsigned int i=0; i<nBlocks; ++i)
848         blocks->freezeBlocks[i] = DP_UNFROZEN_BLOCK;
849     return blocks;
850 }
851 
DP_DestroyBlockInfo(DP_BlockInfo * blocks)852 void DP_DestroyBlockInfo(DP_BlockInfo *blocks)
853 {
854     if (!blocks)
855         return;
856     delete[] blocks->blockPositions;
857     delete[] blocks->blockSizes;
858     delete[] blocks->maxLoops;
859     delete[] blocks->freezeBlocks;
860     delete blocks;
861 }
862 
DP_DestroyAlignmentResult(DP_AlignmentResult * alignment)863 void DP_DestroyAlignmentResult(DP_AlignmentResult *alignment)
864 {
865     if (!alignment)
866         return;
867     delete[] alignment->blockPositions;
868     delete alignment;
869 }
870 
DP_DestroyMultipleAlignmentResults(DP_MultipleAlignmentResults * alignments)871 void DP_DestroyMultipleAlignmentResults(DP_MultipleAlignmentResults *alignments)
872 {
873     if (!alignments) return;
874     for (unsigned int i=0; i<alignments->nAlignments; ++i)
875         if (alignments->alignments[i].blockPositions)
876             delete[] alignments->alignments[i].blockPositions;
877     delete[] alignments->alignments;
878     delete alignments;
879 }
880 
DP_CalculateMaxLoopLength(unsigned int nLoops,const unsigned int * loops,double percentile,unsigned int extension,unsigned int cutoff)881 unsigned int DP_CalculateMaxLoopLength(
882         unsigned int nLoops, const unsigned int *loops,
883         double percentile, unsigned int extension, unsigned int cutoff)
884 {
885     vector < unsigned int > loopLengths(nLoops);
886     unsigned int index, max;
887     for (index=0; index<nLoops; ++index)
888         loopLengths[index] = loops[index];
889 
890     stable_sort(loopLengths.begin(), loopLengths.end());
891 
892     if (percentile < 1.0) {
893         index = (unsigned int)(percentile * (nLoops - 1) + 0.5);
894         max = loopLengths[index] + extension;
895     } else {  // percentile >= 1.0
896         max = ((unsigned int)(percentile * loopLengths[nLoops - 1] + 0.5)) + extension;
897     }
898 
899     if (cutoff > 0 && max > cutoff)
900         max = cutoff;
901 
902     return max;
903 }
904