1 /*  $Id: su_block_multiple_alignment.cpp 121421 2008-03-06 13:58:33Z thiessen $
2 * ===========================================================================
3 *
4 *                            PUBLIC DOMAIN NOTICE
5 *               National Center for Biotechnology Information
6 *
7 *  This software/database is a "United States Government Work" under the
8 *  terms of the United States Copyright Act.  It was written as part of
9 *  the author's official duties as a United States Government employee and
10 *  thus cannot be copyrighted.  This software/database is freely available
11 *  to the public for use. The National Library of Medicine and the U.S.
12 *  Government have not placed any restriction on its use or reproduction.
13 *
14 *  Although all reasonable efforts have been taken to ensure the accuracy
15 *  and reliability of the software and data, the NLM and the U.S.
16 *  Government do not and cannot warrant the performance or results that
17 *  may be obtained by using this software or data. The NLM and the U.S.
18 *  Government disclaim all warranties, express or implied, including
19 *  warranties of performance, merchantability or fitness for any particular
20 *  purpose.
21 *
22 *  Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Authors:  Paul Thiessen
27 *
28 * File Description:
29 *      Classes to hold alignment data
30 *
31 * ===========================================================================
32 */
33 
34 #include <ncbi_pch.hpp>
35 #include <corelib/ncbistd.hpp>
36 #include <corelib/ncbi_limits.hpp>
37 #include <util/tables/raw_scoremat.h>
38 
39 #include <objects/seqalign/Dense_diag.hpp>
40 
41 #include <algo/structure/struct_util/su_block_multiple_alignment.hpp>
42 #include <algo/structure/struct_util/su_sequence_set.hpp>
43 #include <algo/structure/struct_util/su_pssm.hpp>
44 #include "su_private.hpp"
45 
46 USING_NCBI_SCOPE;
47 USING_SCOPE(objects);
48 
49 
BEGIN_SCOPE(struct_util)50 BEGIN_SCOPE(struct_util)
51 
52 BlockMultipleAlignment::BlockMultipleAlignment(const SequenceList& sequenceList)
53 {
54     m_sequences = sequenceList;
55     m_pssm = NULL;
56 
57     InitCache();
58     m_rowDoubles.resize(m_sequences.size(), 0.0);
59     m_rowStrings.resize(m_sequences.size());
60 }
61 
InitCache(void)62 void BlockMultipleAlignment::InitCache(void)
63 {
64     m_cachePrevRow = eUndefined;
65     m_cachePrevBlock = NULL;
66     m_cacheBlockIterator = m_blocks.begin();
67 }
68 
~BlockMultipleAlignment(void)69 BlockMultipleAlignment::~BlockMultipleAlignment(void)
70 {
71     RemovePSSM();
72 }
73 
Clone(void) const74 BlockMultipleAlignment * BlockMultipleAlignment::Clone(void) const
75 {
76     BlockMultipleAlignment *copy = new BlockMultipleAlignment(m_sequences);
77     BlockList::const_iterator b, be = m_blocks.end();
78     for (b=m_blocks.begin(); b!=be; ++b)
79         copy->m_blocks.push_back(CRef<Block>((*b)->Clone(copy)));
80     copy->UpdateBlockMap();
81     copy->m_rowDoubles = m_rowDoubles;
82     copy->m_rowStrings = m_rowStrings;
83     return copy;
84 }
85 
86 /*
87 static inline unsigned int ScreenResidueCharacter(char original)
88 {
89     char ch = toupper((unsigned char) original);
90     switch (ch) {
91         case 'A': case 'R': case 'N': case 'D': case 'C':
92         case 'Q': case 'E': case 'G': case 'H': case 'I':
93         case 'L': case 'K': case 'M': case 'F': case 'P':
94         case 'S': case 'T': case 'W': case 'Y': case 'V':
95         case 'B': case 'Z':
96             break;
97         default:
98             ch = 'X'; // make all but natural aa's just 'X'
99     }
100     return ch;
101 }
102 
103 static int GetBLOSUM62Score(char a, char b)
104 {
105     static SNCBIFullScoreMatrix Blosum62Matrix;
106     static bool unpacked = false;
107 
108     if (!unpacked) {
109         NCBISM_Unpack(&NCBISM_Blosum62, &Blosum62Matrix);
110         unpacked = true;
111     }
112 
113     return Blosum62Matrix.s[ScreenResidueCharacter(a)][ScreenResidueCharacter(b)];
114 }
115 */
116 
GetPSSM(void) const117 const BLAST_Matrix * BlockMultipleAlignment::GetPSSM(void) const
118 {
119     if (m_pssm) return m_pssm;
120     m_pssm = CreateBlastMatrix(this);
121     return m_pssm;
122 }
123 
RemovePSSM(void) const124 void BlockMultipleAlignment::RemovePSSM(void) const
125 {
126     if (m_pssm) {
127         delete m_pssm;
128         m_pssm = NULL;
129     }
130 }
131 
GetBlockList(ConstBlockList & cbl) const132 void BlockMultipleAlignment::GetBlockList(ConstBlockList& cbl) const
133 {
134     cbl.clear();
135     cbl.reserve(m_blocks.size());
136     BlockList::const_iterator b, be = m_blocks.end();
137     for (b=m_blocks.begin(); b!=be; ++b)
138         cbl.push_back(CConstRef<Block>(b->GetPointer()));
139 }
140 
CheckAlignedBlock(const Block * block) const141 bool BlockMultipleAlignment::CheckAlignedBlock(const Block *block) const
142 {
143     if (!block || !block->IsAligned()) {
144         ERROR_MESSAGE("CheckAlignedBlock() - checks aligned blocks only");
145         return false;
146     }
147     if (block->NSequences() != m_sequences.size()) {
148         ERROR_MESSAGE("CheckAlignedBlock() - block size mismatch");
149         return false;
150     }
151 
152     // make sure ranges are reasonable for each sequence
153     unsigned int row;
154     const Block
155         *prevBlock = GetBlockBefore(block),
156         *nextBlock = GetBlockAfter(block);
157     const Block::Range *range, *prevRange = NULL, *nextRange = NULL;
158     SequenceList::const_iterator sequence = m_sequences.begin();
159     for (row=0; row<block->NSequences(); ++row, ++sequence) {
160         range = block->GetRangeOfRow(row);
161         if (prevBlock) prevRange = prevBlock->GetRangeOfRow(row);
162         if (nextBlock) nextRange = nextBlock->GetRangeOfRow(row);
163         if (range->to - range->from + 1 != (int)block->m_width ||   // check block m_width
164             (prevRange && range->from <= prevRange->to) ||          // check for range overlap
165             (nextRange && range->to >= nextRange->from) ||          // check for range overlap
166             range->from > range->to ||                              // check range values
167             range->to >= (int)(*sequence)->Length())                // check bounds of end
168         {
169             ERROR_MESSAGE("CheckAlignedBlock() - range error");
170             return false;
171         }
172     }
173 
174     return true;
175 }
176 
AddAlignedBlockAtEnd(UngappedAlignedBlock * newBlock)177 bool BlockMultipleAlignment::AddAlignedBlockAtEnd(UngappedAlignedBlock *newBlock)
178 {
179     m_blocks.push_back(CRef<Block>(newBlock));
180     return CheckAlignedBlock(newBlock);
181 }
182 
183 UnalignedBlock * BlockMultipleAlignment::
CreateNewUnalignedBlockBetween(const Block * leftBlock,const Block * rightBlock)184     CreateNewUnalignedBlockBetween(const Block *leftBlock, const Block *rightBlock)
185 {
186     if ((leftBlock && !leftBlock->IsAligned()) ||
187         (rightBlock && !rightBlock->IsAligned())) {
188         ERROR_MESSAGE("CreateNewUnalignedBlockBetween() - passed an unaligned block");
189         return NULL;
190     }
191 
192     unsigned int row, from, to, length;
193     SequenceList::const_iterator s, se = m_sequences.end();
194 
195     UnalignedBlock *newBlock = new UnalignedBlock(this);
196     newBlock->m_width = 0;
197 
198     for (row=0, s=m_sequences.begin(); s!=se; ++row, ++s) {
199 
200         if (leftBlock)
201             from = leftBlock->GetRangeOfRow(row)->to + 1;
202         else
203             from = 0;
204 
205         if (rightBlock)
206             to = rightBlock->GetRangeOfRow(row)->from - 1;
207         else
208             to = (*s)->Length() - 1;
209 
210         newBlock->SetRangeOfRow(row, from, to);
211 
212         length = to - from + 1;
213         if ((((int)to) - ((int)from) + 1) < 0) { // just to make sure...
214             ERROR_MESSAGE("CreateNewUnalignedBlockBetween() - unaligned length < 0");
215             return NULL;
216         }
217         if (length > newBlock->m_width)
218             newBlock->m_width = length;
219     }
220 
221     if (newBlock->m_width == 0) {
222         delete newBlock;
223         return NULL;
224     } else
225         return newBlock;
226 }
227 
AddUnalignedBlocks(void)228 bool BlockMultipleAlignment::AddUnalignedBlocks(void)
229 {
230     BlockList::iterator a, ae = m_blocks.end();
231     const Block *alignedBlock = NULL, *prevAlignedBlock = NULL;
232     Block *newUnalignedBlock;
233 
234     // unaligned m_blocks to the left of each aligned block
235     for (a=m_blocks.begin(); a!=ae; ++a) {
236         alignedBlock = *a;
237         newUnalignedBlock = CreateNewUnalignedBlockBetween(prevAlignedBlock, alignedBlock);
238         if (newUnalignedBlock)
239             m_blocks.insert(a, CRef<Block>(newUnalignedBlock));
240         prevAlignedBlock = alignedBlock;
241     }
242 
243     // right tail
244     newUnalignedBlock = CreateNewUnalignedBlockBetween(alignedBlock, NULL);
245     if (newUnalignedBlock) {
246         m_blocks.insert(a, CRef<Block>(newUnalignedBlock));
247     }
248 
249     return true;
250 }
251 
UpdateBlockMap(bool clearRowInfo)252 bool BlockMultipleAlignment::UpdateBlockMap(bool clearRowInfo)
253 {
254     unsigned int i = 0, j, n = 0;
255     BlockList::iterator b, be = m_blocks.end();
256 
257     // reset old stuff, recalculate m_width
258     m_totalWidth = 0;
259     for (b=m_blocks.begin(); b!=be; ++b)
260         m_totalWidth += (*b)->m_width;
261 
262     // fill out the block map
263     m_blockMap.resize(m_totalWidth);
264     UngappedAlignedBlock *aBlock;
265     for (b=m_blocks.begin(); b!=be; ++b) {
266         aBlock = dynamic_cast<UngappedAlignedBlock*>(b->GetPointer());
267         if (aBlock)
268             ++n;
269         for (j=0; j<(*b)->m_width; ++j, ++i) {
270             m_blockMap[i].block = *b;
271             m_blockMap[i].blockColumn = j;
272             m_blockMap[i].alignedBlockNum = aBlock ? n : eUndefined;
273         }
274     }
275 
276     // if alignment changes, any pssm/scores/status become invalid
277     RemovePSSM();
278     if (clearRowInfo) ClearRowInfo();
279 
280     return true;
281 }
282 
GetCharacterAt(unsigned int alignmentColumn,unsigned int row,eUnalignedJustification justification,char * character) const283 bool BlockMultipleAlignment::GetCharacterAt(
284     unsigned int alignmentColumn, unsigned int row, eUnalignedJustification justification,
285     char *character) const
286 {
287     const Sequence *sequence;
288     unsigned int seqIndex;
289     bool isAligned;
290 
291     if (!GetSequenceAndIndexAt(alignmentColumn, row, justification, &sequence, &seqIndex, &isAligned))
292         return false;
293 
294     *character = (seqIndex != eUndefined) ? sequence->m_sequenceString[seqIndex] : '~';
295     if (isAligned)
296         *character = toupper((unsigned char)(*character));
297     else
298         *character = tolower((unsigned char)(*character));
299 
300     return true;
301 }
302 
GetSequenceAndIndexAt(unsigned int alignmentColumn,unsigned int row,eUnalignedJustification requestedJustification,const Sequence ** sequence,unsigned int * index,bool * isAligned) const303 bool BlockMultipleAlignment::GetSequenceAndIndexAt(
304     unsigned int alignmentColumn, unsigned int row, eUnalignedJustification requestedJustification,
305     const Sequence **sequence, unsigned int *index, bool *isAligned) const
306 {
307     if (sequence)
308         *sequence = m_sequences[row];
309 
310     const BlockInfo& blockInfo = m_blockMap[alignmentColumn];
311 
312     if (!blockInfo.block->IsAligned()) {
313         if (isAligned)
314             *isAligned = false;
315         // override requested justification for end m_blocks
316         if (blockInfo.block == m_blocks.back().GetPointer()) // also true if there's a single aligned block
317             requestedJustification = eLeft;
318         else if (blockInfo.block == m_blocks.front().GetPointer())
319             requestedJustification = eRight;
320     } else
321         if (isAligned)
322             *isAligned = true;
323 
324     if (index)
325         *index = blockInfo.block->GetIndexAt(blockInfo.blockColumn, row, requestedJustification);
326 
327     return true;
328 }
329 
IsAligned(unsigned int row,unsigned int seqIndex) const330 bool BlockMultipleAlignment::IsAligned(unsigned int row, unsigned int seqIndex) const
331 {
332     const Block *block = GetBlock(row, seqIndex);
333     return (block && block->IsAligned());
334 }
335 
GetBlock(unsigned int row,unsigned int seqIndex) const336 const Block * BlockMultipleAlignment::GetBlock(unsigned int row, unsigned int seqIndex) const
337 {
338     // make sure we're in range for this sequence
339     if (row >= NRows() || seqIndex >= m_sequences[row]->Length()) {
340         ERROR_MESSAGE("BlockMultipleAlignment::GetBlock() - coordinate out of range");
341         return NULL;
342     }
343 
344     const Block::Range *range;
345 
346     // first check to see if it's in the same block as last time.
347     if (m_cachePrevBlock) {
348         range = m_cachePrevBlock->GetRangeOfRow(row);
349         if ((int)seqIndex >= range->from && (int)seqIndex <= range->to)
350             return m_cachePrevBlock;
351         ++m_cacheBlockIterator; // start search at next block
352     } else {
353         m_cacheBlockIterator = m_blocks.begin();
354     }
355 
356     // otherwise, perform block search. This search is most efficient when queries
357     // happen in order from left to right along a given row.
358     do {
359         if (m_cacheBlockIterator == m_blocks.end())
360             m_cacheBlockIterator = m_blocks.begin();
361         range = (*m_cacheBlockIterator)->GetRangeOfRow(row);
362         if ((int)seqIndex >= range->from && (int)seqIndex <= range->to) {
363             m_cachePrevBlock = *m_cacheBlockIterator; // cache this block
364             return m_cachePrevBlock;
365         }
366         ++m_cacheBlockIterator;
367     } while (1);
368 }
369 
GetFirstAlignedBlockPosition(void) const370 unsigned int BlockMultipleAlignment::GetFirstAlignedBlockPosition(void) const
371 {
372     BlockList::const_iterator b = m_blocks.begin();
373     if (m_blocks.size() > 0 && (*b)->IsAligned()) // first block is aligned
374         return 0;
375     else if (m_blocks.size() >= 2 && (*(++b))->IsAligned()) // second block is aligned
376         return m_blocks.front()->m_width;
377     else
378         return eUndefined;
379 }
380 
GetAlignedSlaveIndex(unsigned int masterSeqIndex,unsigned int slaveRow) const381 unsigned int BlockMultipleAlignment::GetAlignedSlaveIndex(unsigned int masterSeqIndex, unsigned int slaveRow) const
382 {
383     const UngappedAlignedBlock
384         *aBlock = dynamic_cast<const UngappedAlignedBlock*>(GetBlock(0, masterSeqIndex));
385     if (!aBlock)
386         return eUndefined;
387 
388     const Block::Range
389         *masterRange = aBlock->GetRangeOfRow(0),
390         *slaveRange = aBlock->GetRangeOfRow(slaveRow);
391     return (slaveRange->from + masterSeqIndex - masterRange->from);
392 }
393 
GetAlignedBlockPosition(unsigned int alignmentIndex,unsigned int * blockColumn,unsigned int * blockWidth) const394 void BlockMultipleAlignment::GetAlignedBlockPosition(unsigned int alignmentIndex,
395     unsigned int *blockColumn, unsigned int *blockWidth) const
396 {
397     *blockColumn = *blockWidth = eUndefined;
398     const BlockInfo& info = m_blockMap[alignmentIndex];
399     if (info.block->IsAligned()) {
400         *blockColumn = info.blockColumn;
401         *blockWidth = info.block->m_width;
402     }
403 }
404 
GetBlockBefore(const Block * block)405 Block * BlockMultipleAlignment::GetBlockBefore(const Block *block)
406 {
407     Block *prevBlock = NULL;
408     BlockList::iterator b, be = m_blocks.end();
409     for (b=m_blocks.begin(); b!=be; ++b) {
410         if (*b == block) break;
411         prevBlock = b->GetPointer();
412     }
413     return prevBlock;
414 }
415 
GetBlockBefore(const Block * block) const416 const Block * BlockMultipleAlignment::GetBlockBefore(const Block *block) const
417 {
418     const Block *prevBlock = NULL;
419     BlockList::const_iterator b, be = m_blocks.end();
420     for (b=m_blocks.begin(); b!=be; ++b) {
421         if (*b == block) break;
422         prevBlock = b->GetPointer();
423     }
424     return prevBlock;
425 }
426 
GetBlockAfter(const Block * block)427 Block * BlockMultipleAlignment::GetBlockAfter(const Block *block)
428 {
429     BlockList::iterator b, be = m_blocks.end();
430     for (b=m_blocks.begin(); b!=be; ++b) {
431         if (*b == block) {
432             ++b;
433             if (b == be) break;
434             return *b;
435         }
436     }
437     return NULL;
438 }
439 
GetBlockAfter(const Block * block) const440 const Block * BlockMultipleAlignment::GetBlockAfter(const Block *block) const
441 {
442     BlockList::const_iterator b, be = m_blocks.end();
443     for (b=m_blocks.begin(); b!=be; ++b) {
444         if (*b == block) {
445             ++b;
446             if (b == be) break;
447             return *b;
448         }
449     }
450     return NULL;
451 }
452 
GetUnalignedBlockBefore(const UngappedAlignedBlock * aBlock) const453 const UnalignedBlock * BlockMultipleAlignment::GetUnalignedBlockBefore(
454     const UngappedAlignedBlock *aBlock) const
455 {
456     const Block *prevBlock;
457     if (aBlock)
458         prevBlock = GetBlockBefore(aBlock);
459     else
460         prevBlock = m_blocks.back().GetPointer();
461     return dynamic_cast<const UnalignedBlock*>(prevBlock);
462 }
463 
InsertBlockBefore(Block * newBlock,const Block * insertAt)464 void BlockMultipleAlignment::InsertBlockBefore(Block *newBlock, const Block *insertAt)
465 {
466     BlockList::iterator b, be = m_blocks.end();
467     for (b=m_blocks.begin(); b!=be; ++b) {
468         if (*b == insertAt) {
469             m_blocks.insert(b, CRef<Block>(newBlock));
470             return;
471         }
472     }
473     WARNING_MESSAGE("BlockMultipleAlignment::InsertBlockBefore() - couldn't find insertAt block");
474 }
475 
InsertBlockAfter(const Block * insertAt,Block * newBlock)476 void BlockMultipleAlignment::InsertBlockAfter(const Block *insertAt, Block *newBlock)
477 {
478     BlockList::iterator b, be = m_blocks.end();
479     for (b=m_blocks.begin(); b!=be; ++b) {
480         if (*b == insertAt) {
481             ++b;
482             m_blocks.insert(b, CRef<Block>(newBlock));
483             return;
484         }
485     }
486     WARNING_MESSAGE("BlockMultipleAlignment::InsertBlockBefore() - couldn't find insertAt block");
487 }
488 
RemoveBlock(Block * block)489 void BlockMultipleAlignment::RemoveBlock(Block *block)
490 {
491     BlockList::iterator b, be = m_blocks.end();
492     for (b=m_blocks.begin(); b!=be; ++b) {
493         if (*b == block) {
494             m_blocks.erase(b);
495             InitCache();
496             return;
497         }
498     }
499     WARNING_MESSAGE("BlockMultipleAlignment::RemoveBlock() - couldn't find block");
500 }
501 
MoveBlockBoundary(unsigned int columnFrom,unsigned int columnTo)502 bool BlockMultipleAlignment::MoveBlockBoundary(unsigned int columnFrom, unsigned int columnTo)
503 {
504     unsigned int blockColumn, blockWidth;
505     GetAlignedBlockPosition(columnFrom, &blockColumn, &blockWidth);
506     if (blockColumn == eUndefined || blockWidth == 0 || blockWidth == eUndefined) return false;
507 
508     TRACE_MESSAGE("trying to move block boundary from " << columnFrom << " to " << columnTo);
509 
510     const BlockInfo& info = m_blockMap[columnFrom];
511     unsigned int row;
512     int requestedShift = columnTo - columnFrom, actualShift = 0;
513     const Block::Range *range;
514 
515     // shrink block from left
516     if (blockColumn == 0 && requestedShift > 0 && requestedShift < (int) info.block->m_width) {
517         actualShift = requestedShift;
518         TRACE_MESSAGE("shrinking block from left");
519         for (row=0; row<NRows(); ++row) {
520             range = info.block->GetRangeOfRow(row);
521             info.block->SetRangeOfRow(row, range->from + requestedShift, range->to);
522         }
523         info.block->m_width -= requestedShift;
524         Block *prevBlock = GetBlockBefore(info.block);
525         if (prevBlock && !prevBlock->IsAligned()) {
526             for (row=0; row<NRows(); ++row) {
527                 range = prevBlock->GetRangeOfRow(row);
528                 prevBlock->SetRangeOfRow(row, range->from, range->to + requestedShift);
529             }
530             prevBlock->m_width += requestedShift;
531         } else {
532             Block *newUnalignedBlock = CreateNewUnalignedBlockBetween(prevBlock, info.block);
533             if (newUnalignedBlock)
534                 InsertBlockBefore(newUnalignedBlock, info.block);
535             TRACE_MESSAGE("added new unaligned block");
536         }
537     }
538 
539     // shrink block from right (requestedShift < 0)
540     else if (blockColumn == info.block->m_width - 1 &&
541                 requestedShift < 0 && ((unsigned int) -requestedShift) < info.block->m_width) {
542         actualShift = requestedShift;
543         TRACE_MESSAGE("shrinking block from right");
544         for (row=0; row<NRows(); ++row) {
545             range = info.block->GetRangeOfRow(row);
546             info.block->SetRangeOfRow(row, range->from, range->to + requestedShift);
547         }
548         info.block->m_width += requestedShift;
549         Block *nextBlock = GetBlockAfter(info.block);
550         if (nextBlock && !nextBlock->IsAligned()) {
551             for (row=0; row<NRows(); ++row) {
552                 range = nextBlock->GetRangeOfRow(row);
553                 nextBlock->SetRangeOfRow(row, range->from + requestedShift, range->to);
554             }
555             nextBlock->m_width -= requestedShift;
556         } else {
557             Block *newUnalignedBlock = CreateNewUnalignedBlockBetween(info.block, nextBlock);
558             if (newUnalignedBlock)
559                 InsertBlockAfter(info.block, newUnalignedBlock);
560             TRACE_MESSAGE("added new unaligned block");
561         }
562     }
563 
564     // grow block to right
565     else if (blockColumn == info.block->m_width - 1 && requestedShift > 0) {
566         Block *nextBlock = GetBlockAfter(info.block);
567         if (nextBlock && !nextBlock->IsAligned()) {
568             int nRes;
569             actualShift = requestedShift;
570             for (row=0; row<NRows(); ++row) {
571                 range = nextBlock->GetRangeOfRow(row);
572                 nRes = range->to - range->from + 1;
573                 if (nRes < actualShift)
574                     actualShift = nRes;
575             }
576             if (actualShift) {
577                 TRACE_MESSAGE("growing block to right");
578                 for (row=0; row<NRows(); ++row) {
579                     range = info.block->GetRangeOfRow(row);
580                     info.block->SetRangeOfRow(row, range->from, range->to + actualShift);
581                     range = nextBlock->GetRangeOfRow(row);
582                     nextBlock->SetRangeOfRow(row, range->from + actualShift, range->to);
583                 }
584                 info.block->m_width += actualShift;
585                 nextBlock->m_width -= actualShift;
586                 if (nextBlock->m_width == 0) {
587                     RemoveBlock(nextBlock);
588                     TRACE_MESSAGE("removed empty block");
589                 }
590             }
591         }
592     }
593 
594     // grow block to left (requestedShift < 0)
595     else if (blockColumn == 0 && requestedShift < 0) {
596         Block *prevBlock = GetBlockBefore(info.block);
597         if (prevBlock && !prevBlock->IsAligned()) {
598             int nRes;
599             actualShift = requestedShift;
600             for (row=0; row<NRows(); ++row) {
601                 range = prevBlock->GetRangeOfRow(row);
602                 nRes = range->to - range->from + 1;
603                 if (nRes < -actualShift) actualShift = -nRes;
604             }
605             if (actualShift) {
606                 TRACE_MESSAGE("growing block to left");
607                 for (row=0; row<NRows(); ++row) {
608                     range = info.block->GetRangeOfRow(row);
609                     info.block->SetRangeOfRow(row, range->from + actualShift, range->to);
610                     range = prevBlock->GetRangeOfRow(row);
611                     prevBlock->SetRangeOfRow(row, range->from, range->to + actualShift);
612                 }
613                 info.block->m_width -= actualShift;
614                 prevBlock->m_width += actualShift;
615                 if (prevBlock->m_width == 0) {
616                     RemoveBlock(prevBlock);
617                     TRACE_MESSAGE("removed empty block");
618                 }
619             }
620         }
621     }
622 
623     if (actualShift != 0) {
624         UpdateBlockMap();
625         return true;
626     } else
627         return false;
628 }
629 
ShiftRow(unsigned int row,unsigned int fromAlignmentIndex,unsigned int toAlignmentIndex,eUnalignedJustification justification)630 bool BlockMultipleAlignment::ShiftRow(unsigned int row, unsigned int fromAlignmentIndex, unsigned int toAlignmentIndex,
631     eUnalignedJustification justification)
632 {
633     if (fromAlignmentIndex == toAlignmentIndex)
634         return false;
635 
636     Block
637         *blockFrom = m_blockMap[fromAlignmentIndex].block,
638         *blockTo = m_blockMap[toAlignmentIndex].block;
639 
640     // at least one end of the drag must be in an aligned block
641     UngappedAlignedBlock *ABlock =
642         dynamic_cast<UngappedAlignedBlock*>(blockFrom);
643     if (ABlock) {
644         if (blockTo != blockFrom && blockTo->IsAligned())
645             return false;
646     } else {
647         ABlock = dynamic_cast<UngappedAlignedBlock*>(blockTo);
648         if (!ABlock)
649             return false;
650     }
651 
652     // and the other must be in the same aligned block or an adjacent unaligned block
653     UnalignedBlock
654         *prevUABlock = dynamic_cast<UnalignedBlock*>(GetBlockBefore(ABlock)),
655         *nextUABlock = dynamic_cast<UnalignedBlock*>(GetBlockAfter(ABlock));
656     if (blockFrom != blockTo &&
657         ((ABlock == blockFrom && prevUABlock != blockTo && nextUABlock != blockTo) ||
658          (ABlock == blockTo && prevUABlock != blockFrom && nextUABlock != blockFrom)))
659         return false;
660 
661     int requestedShift, actualShift = 0, width = 0;
662 
663     // slightly different behaviour when dragging from unaligned to aligned...
664     if (!blockFrom->IsAligned()) {
665         unsigned int fromSeqIndex, toSeqIndex;
666         GetSequenceAndIndexAt(fromAlignmentIndex, row, justification, NULL, &fromSeqIndex, NULL);
667         GetSequenceAndIndexAt(toAlignmentIndex, row, justification, NULL, &toSeqIndex, NULL);
668         if (fromSeqIndex == eUndefined || toSeqIndex == eUndefined)
669             return false;
670         requestedShift = toSeqIndex - fromSeqIndex;
671     }
672 
673     // vs. dragging from aligned
674     else {
675         requestedShift = toAlignmentIndex - fromAlignmentIndex;
676     }
677 
678     const Block::Range *prevRange = NULL, *nextRange = NULL, *range = ABlock->GetRangeOfRow(row);
679     if (prevUABlock) prevRange = prevUABlock->GetRangeOfRow(row);
680     if (nextUABlock) nextRange = nextUABlock->GetRangeOfRow(row);
681     if (requestedShift > 0) {
682         if (prevUABlock)
683             width = prevRange->to - prevRange->from + 1;
684         actualShift = (width > requestedShift) ? requestedShift : width;
685     } else {
686         if (nextUABlock)
687             width = nextRange->to - nextRange->from + 1;
688         actualShift = (width > -requestedShift) ? requestedShift : -width;
689     }
690     if (actualShift == 0) return false;
691 
692     TRACE_MESSAGE("shifting row " << row << " by " << actualShift);
693 
694     ABlock->SetRangeOfRow(row, range->from - actualShift, range->to - actualShift);
695 
696     if (prevUABlock) {
697         prevUABlock->SetRangeOfRow(row, prevRange->from, prevRange->to - actualShift);
698         prevUABlock->m_width = 0;
699         for (unsigned int i=0; i<NRows(); ++i) {
700             prevRange = prevUABlock->GetRangeOfRow(i);
701             width = prevRange->to - prevRange->from + 1;
702             if (width < 0)
703                 ERROR_MESSAGE("BlockMultipleAlignment::ShiftRow() - negative width on left");
704             if ((unsigned int) width > prevUABlock->m_width)
705                 prevUABlock->m_width = width;
706         }
707         if (prevUABlock->m_width == 0) {
708             TRACE_MESSAGE("removing zero-m_width unaligned block on left");
709             RemoveBlock(prevUABlock);
710         }
711     } else {
712         TRACE_MESSAGE("creating unaligned block on left");
713         prevUABlock = CreateNewUnalignedBlockBetween(GetBlockBefore(ABlock), ABlock);
714         InsertBlockBefore(prevUABlock, ABlock);
715     }
716 
717     if (nextUABlock) {
718         nextUABlock->SetRangeOfRow(row, nextRange->from - actualShift, nextRange->to);
719         nextUABlock->m_width = 0;
720         for (unsigned int i=0; i<NRows(); ++i) {
721             nextRange = nextUABlock->GetRangeOfRow(i);
722             width = nextRange->to - nextRange->from + 1;
723             if (width < 0)
724                 ERROR_MESSAGE("BlockMultipleAlignment::ShiftRow() - negative width on right");
725             if ((unsigned int) width > nextUABlock->m_width)
726                 nextUABlock->m_width = width;
727         }
728         if (nextUABlock->m_width == 0) {
729             TRACE_MESSAGE("removing zero-m_width unaligned block on right");
730             RemoveBlock(nextUABlock);
731         }
732     } else {
733         TRACE_MESSAGE("creating unaligned block on right");
734         nextUABlock = CreateNewUnalignedBlockBetween(ABlock, GetBlockAfter(ABlock));
735         InsertBlockAfter(ABlock, nextUABlock);
736     }
737 
738     if (!CheckAlignedBlock(ABlock))
739         ERROR_MESSAGE("BlockMultipleAlignment::ShiftRow() - shift failed to create valid aligned block");
740 
741     UpdateBlockMap();
742     return true;
743 }
744 
SplitBlock(unsigned int alignmentIndex)745 bool BlockMultipleAlignment::SplitBlock(unsigned int alignmentIndex)
746 {
747     const BlockInfo& info = m_blockMap[alignmentIndex];
748     if (!info.block->IsAligned() || info.block->m_width < 2 || info.blockColumn == 0)
749         return false;
750 
751     TRACE_MESSAGE("splitting block");
752     UngappedAlignedBlock *newAlignedBlock = new UngappedAlignedBlock(this);
753     newAlignedBlock->m_width = info.block->m_width - info.blockColumn;
754     info.block->m_width = info.blockColumn;
755 
756     const Block::Range *range;
757     unsigned int oldTo;
758     for (unsigned int row=0; row<NRows(); ++row) {
759         range = info.block->GetRangeOfRow(row);
760         oldTo = range->to;
761         info.block->SetRangeOfRow(row, range->from, range->from + info.block->m_width - 1);
762         newAlignedBlock->SetRangeOfRow(row, oldTo - newAlignedBlock->m_width + 1, oldTo);
763     }
764 
765     InsertBlockAfter(info.block, newAlignedBlock);
766     if (!CheckAlignedBlock(info.block) || !CheckAlignedBlock(newAlignedBlock))
767         ERROR_MESSAGE("BlockMultipleAlignment::SplitBlock() - split failed to create valid m_blocks");
768 
769     UpdateBlockMap();
770     return true;
771 }
772 
MergeBlocks(unsigned int fromAlignmentIndex,unsigned int toAlignmentIndex)773 bool BlockMultipleAlignment::MergeBlocks(unsigned int fromAlignmentIndex, unsigned int toAlignmentIndex)
774 {
775     Block
776         *expandedBlock = m_blockMap[fromAlignmentIndex].block,
777         *lastBlock = m_blockMap[toAlignmentIndex].block;
778     if (expandedBlock == lastBlock)
779         return false;
780     unsigned int i;
781     for (i=fromAlignmentIndex; i<=toAlignmentIndex; ++i)
782         if (!m_blockMap[i].block->IsAligned())
783             return false;
784 
785     TRACE_MESSAGE("merging block(s)");
786     for (i=0; i<NRows(); ++i)
787         expandedBlock->SetRangeOfRow(i, expandedBlock->GetRangeOfRow(i)->from, lastBlock->GetRangeOfRow(i)->to);
788     expandedBlock->m_width = lastBlock->GetRangeOfRow(0)->to - expandedBlock->GetRangeOfRow(0)->from + 1;
789 
790     Block *deletedBlock = NULL, *blockToDelete;
791     for (i=fromAlignmentIndex; i<=toAlignmentIndex; ++i) {
792         blockToDelete = m_blockMap[i].block;
793         if (blockToDelete == expandedBlock)
794             continue;
795         if (blockToDelete != deletedBlock) {
796             deletedBlock = blockToDelete;
797             RemoveBlock(blockToDelete);
798         }
799     }
800 
801     if (!CheckAlignedBlock(expandedBlock))
802         ERROR_MESSAGE("BlockMultipleAlignment::MergeBlocks() - merge failed to create valid block");
803 
804     UpdateBlockMap();
805     return true;
806 }
807 
CreateBlock(unsigned int fromAlignmentIndex,unsigned int toAlignmentIndex,eUnalignedJustification justification)808 bool BlockMultipleAlignment::CreateBlock(unsigned int fromAlignmentIndex, unsigned int toAlignmentIndex,
809     eUnalignedJustification justification)
810 {
811     const BlockInfo& info = m_blockMap[fromAlignmentIndex];
812     UnalignedBlock *prevUABlock = dynamic_cast<UnalignedBlock*>(info.block);
813     if (!prevUABlock || info.block != m_blockMap[toAlignmentIndex].block)
814         return false;
815     unsigned int row, seqIndexFrom, seqIndexTo,
816         newBlockWidth = toAlignmentIndex - fromAlignmentIndex + 1,
817         origWidth = prevUABlock->m_width;
818     vector < unsigned int > seqIndexesFrom(NRows()), seqIndexesTo(NRows());
819     const Sequence *seq;
820 	bool ignored;
821     for (row=0; row<NRows(); ++row) {
822         if (!GetSequenceAndIndexAt(fromAlignmentIndex, row, justification, &seq, &seqIndexFrom, &ignored) ||
823                 !GetSequenceAndIndexAt(toAlignmentIndex, row, justification, &seq, &seqIndexTo, &ignored) ||
824                 seqIndexFrom == eUndefined || seqIndexTo == eUndefined ||
825                 seqIndexTo - seqIndexFrom + 1 != newBlockWidth)
826             return false;
827         seqIndexesFrom[row] = seqIndexFrom;
828         seqIndexesTo[row] = seqIndexTo;
829     }
830 
831     TRACE_MESSAGE("creating new aligned and unaligned blocks");
832 
833     UnalignedBlock *nextUABlock = new UnalignedBlock(this);
834     UngappedAlignedBlock *ABlock = new UngappedAlignedBlock(this);
835     prevUABlock->m_width = nextUABlock->m_width = 0;
836 
837     bool deletePrevUABlock = true, deleteNextUABlock = true;
838     const Block::Range *prevRange;
839     int rangeWidth;
840     for (row=0; row<NRows(); ++row) {
841         prevRange = prevUABlock->GetRangeOfRow(row);
842 
843         nextUABlock->SetRangeOfRow(row, seqIndexesTo[row] + 1, prevRange->to);
844         rangeWidth = prevRange->to - seqIndexesTo[row];
845         if (rangeWidth < 0)
846             ERROR_MESSAGE("BlockMultipleAlignment::CreateBlock() - negative nextRange width");
847         else if (rangeWidth > 0) {
848             if ((unsigned int) rangeWidth > nextUABlock->m_width)
849                 nextUABlock->m_width = rangeWidth;
850             deleteNextUABlock = false;
851         }
852 
853         prevUABlock->SetRangeOfRow(row, prevRange->from, seqIndexesFrom[row] - 1);
854         rangeWidth = seqIndexesFrom[row] - prevRange->from;
855         if (rangeWidth < 0)
856             ERROR_MESSAGE("BlockMultipleAlignment::CreateBlock() - negative prevRange width");
857         else if (rangeWidth > 0) {
858             if ((unsigned int) rangeWidth > prevUABlock->m_width)
859                 prevUABlock->m_width = rangeWidth;
860             deletePrevUABlock = false;
861         }
862 
863         ABlock->SetRangeOfRow(row, seqIndexesFrom[row], seqIndexesTo[row]);
864     }
865 
866     ABlock->m_width = newBlockWidth;
867     if (prevUABlock->m_width + ABlock->m_width + nextUABlock->m_width != origWidth)
868         ERROR_MESSAGE("BlockMultipleAlignment::CreateBlock() - bad block m_widths sum");
869 
870     InsertBlockAfter(prevUABlock, ABlock);
871     InsertBlockAfter(ABlock, nextUABlock);
872     if (deletePrevUABlock) {
873         TRACE_MESSAGE("deleting zero-width unaligned block on left");
874         RemoveBlock(prevUABlock);
875     }
876     if (deleteNextUABlock) {
877         TRACE_MESSAGE("deleting zero-width unaligned block on right");
878         RemoveBlock(nextUABlock);
879     }
880 
881     if (!CheckAlignedBlock(ABlock))
882         ERROR_MESSAGE("BlockMultipleAlignment::CreateBlock() - failed to create valid block");
883 
884     UpdateBlockMap();
885     return true;
886 }
887 
DeleteBlock(unsigned int alignmentIndex)888 bool BlockMultipleAlignment::DeleteBlock(unsigned int alignmentIndex)
889 {
890     Block *block = m_blockMap[alignmentIndex].block;
891     if (!block || !block->IsAligned())
892         return false;
893 
894     TRACE_MESSAGE("deleting block");
895     Block
896         *prevBlock = GetBlockBefore(block),
897         *nextBlock = GetBlockAfter(block);
898 
899     // unaligned m_blocks on both sides - note that total alignment m_width can change!
900     if (prevBlock && !prevBlock->IsAligned() && nextBlock && !nextBlock->IsAligned()) {
901         const Block::Range *prevRange, *nextRange;
902         unsigned int maxWidth = 0, width;
903         for (unsigned int row=0; row<NRows(); ++row) {
904             prevRange = prevBlock->GetRangeOfRow(row);
905             nextRange = nextBlock->GetRangeOfRow(row);
906             width = nextRange->to - prevRange->from + 1;
907             prevBlock->SetRangeOfRow(row, prevRange->from, nextRange->to);
908             if (width > maxWidth)
909                 maxWidth = width;
910         }
911         prevBlock->m_width = maxWidth;
912         TRACE_MESSAGE("removing extra unaligned block");
913         RemoveBlock(nextBlock);
914     }
915 
916     // unaligned block on left only
917     else if (prevBlock && !prevBlock->IsAligned()) {
918         const Block::Range *prevRange, *range;
919         for (unsigned int row=0; row<NRows(); ++row) {
920             prevRange = prevBlock->GetRangeOfRow(row);
921             range = block->GetRangeOfRow(row);
922             prevBlock->SetRangeOfRow(row, prevRange->from, range->to);
923         }
924         prevBlock->m_width += block->m_width;
925     }
926 
927     // unaligned block on right only
928     else if (nextBlock && !nextBlock->IsAligned()) {
929         const Block::Range *range, *nextRange;
930         for (unsigned int row=0; row<NRows(); ++row) {
931             range = block->GetRangeOfRow(row);
932             nextRange = nextBlock->GetRangeOfRow(row);
933             nextBlock->SetRangeOfRow(row, range->from, nextRange->to);
934         }
935         nextBlock->m_width += block->m_width;
936     }
937 
938     // no adjacent unaligned m_blocks
939     else {
940         TRACE_MESSAGE("creating new unaligned block");
941         Block *newBlock = CreateNewUnalignedBlockBetween(prevBlock, nextBlock);
942         if (prevBlock)
943             InsertBlockAfter(prevBlock, newBlock);
944         else if (nextBlock)
945             InsertBlockBefore(newBlock, nextBlock);
946         else
947             m_blocks.push_back(CRef<Block>(newBlock));
948     }
949 
950     RemoveBlock(block);
951     UpdateBlockMap();
952     return true;
953 }
954 
DeleteAllBlocks(void)955 bool BlockMultipleAlignment::DeleteAllBlocks(void)
956 {
957     if (m_blocks.size() == 0)
958         return false;
959 
960     m_blocks.clear();
961     InitCache();
962     AddUnalignedBlocks();   // one single unaligned block for whole alignment
963     UpdateBlockMap();
964     return true;
965 }
966 
DeleteRow(unsigned int row)967 bool BlockMultipleAlignment::DeleteRow(unsigned int row)
968 {
969     if (row >= NRows()) {
970         ERROR_MESSAGE("BlockMultipleAlignment::DeleteRow() - row out of range");
971         return false;
972     }
973 
974     // remove sequence from list
975     vector < bool > removeRows(NRows(), false);
976     removeRows[row] = true;
977     VectorRemoveElements(m_sequences, removeRows, 1);
978     VectorRemoveElements(m_rowDoubles, removeRows, 1);
979     VectorRemoveElements(m_rowStrings, removeRows, 1);
980 
981     // delete row from all m_blocks, removing any zero-m_width m_blocks
982     BlockList::iterator b = m_blocks.begin(), br, be = m_blocks.end();
983     while (b != be) {
984         (*b)->DeleteRow(row);
985         if ((*b)->m_width == 0) {
986             br = b;
987             ++b;
988             TRACE_MESSAGE("deleting block resized to zero width");
989             RemoveBlock(*br);
990         } else
991             ++b;
992     }
993 
994     // update total alignment m_width
995     UpdateBlockMap();
996     InitCache();
997 
998     return true;
999 }
1000 
GetUngappedAlignedBlocks(UngappedAlignedBlockList * uabs) const1001 void BlockMultipleAlignment::GetUngappedAlignedBlocks(UngappedAlignedBlockList *uabs) const
1002 {
1003     uabs->clear();
1004     uabs->reserve(m_blocks.size());
1005     BlockList::const_iterator b, be = m_blocks.end();
1006     for (b=m_blocks.begin(); b!=be; ++b) {
1007         const UngappedAlignedBlock *uab = dynamic_cast<const UngappedAlignedBlock*>(b->GetPointer());
1008         if (uab)
1009             uabs->push_back(uab);
1010     }
1011     uabs->resize(uabs->size());
1012 }
1013 
GetModifiableUngappedAlignedBlocks(ModifiableUngappedAlignedBlockList * uabs)1014 void BlockMultipleAlignment::GetModifiableUngappedAlignedBlocks(ModifiableUngappedAlignedBlockList *uabs)
1015 {
1016     uabs->clear();
1017     uabs->reserve(m_blocks.size());
1018     BlockList::iterator b, be = m_blocks.end();
1019     for (b=m_blocks.begin(); b!=be; ++b) {
1020         UngappedAlignedBlock *uab = dynamic_cast<UngappedAlignedBlock*>(b->GetPointer());
1021         if (uab)
1022             uabs->push_back(uab);
1023     }
1024     uabs->resize(uabs->size());
1025 }
1026 
ExtractRows(const vector<unsigned int> & slavesToRemove,AlignmentList * pairwiseAlignments)1027 bool BlockMultipleAlignment::ExtractRows(
1028     const vector < unsigned int >& slavesToRemove, AlignmentList *pairwiseAlignments)
1029 {
1030     if (slavesToRemove.size() == 0) return false;
1031 
1032     // make a bool list of rows to remove, also checking to make sure slave list items are in range
1033     unsigned int i;
1034     vector < bool > removeRows(NRows(), false);
1035     for (i=0; i<slavesToRemove.size(); ++i) {
1036         if (slavesToRemove[i] > 0 && slavesToRemove[i] < NRows()) {
1037             removeRows[slavesToRemove[i]] = true;
1038         } else {
1039             ERROR_MESSAGE("BlockMultipleAlignment::ExtractRows() - can't extract row "
1040                 << slavesToRemove[i]);
1041             return false;
1042         }
1043     }
1044 
1045     if (pairwiseAlignments) {
1046         TRACE_MESSAGE("creating new pairwise alignments");
1047         ncbi::EDiagSev oldLevel = SetDiagPostLevel(eDiag_Warning);    // otherwise, info messages take a long time if lots of rows
1048 
1049         UngappedAlignedBlockList uaBlocks;
1050         GetUngappedAlignedBlocks(&uaBlocks);
1051         UngappedAlignedBlockList::const_iterator u, ue = uaBlocks.end();
1052 
1053         for (i=0; i<slavesToRemove.size(); ++i) {
1054 
1055             // create new pairwise alignment from each removed row
1056             SequenceList newSeqs(2);
1057             newSeqs[0] = m_sequences[0];
1058             newSeqs[1] = m_sequences[slavesToRemove[i]];
1059             BlockMultipleAlignment *newAlignment = new BlockMultipleAlignment(newSeqs);
1060             for (u=uaBlocks.begin(); u!=ue; ++u) {
1061                 UngappedAlignedBlock *newABlock = new UngappedAlignedBlock(newAlignment);
1062                 const Block::Range *range = (*u)->GetRangeOfRow(0);
1063                 newABlock->SetRangeOfRow(0, range->from, range->to);
1064                 range = (*u)->GetRangeOfRow(slavesToRemove[i]);
1065                 newABlock->SetRangeOfRow(1, range->from, range->to);
1066                 newABlock->m_width = range->to - range->from + 1;
1067                 newAlignment->AddAlignedBlockAtEnd(newABlock);
1068             }
1069             if (!newAlignment->AddUnalignedBlocks() ||
1070                 !newAlignment->UpdateBlockMap()) {
1071                 ERROR_MESSAGE("BlockMultipleAlignment::ExtractRows() - error creating new alignment");
1072                 return false;
1073             }
1074 
1075             pairwiseAlignments->push_back(newAlignment);
1076         }
1077         SetDiagPostLevel(oldLevel);
1078     }
1079 
1080     // remove sequences
1081     TRACE_MESSAGE("deleting sequences");
1082     VectorRemoveElements(m_sequences, removeRows, slavesToRemove.size());
1083     VectorRemoveElements(m_rowDoubles, removeRows, slavesToRemove.size());
1084     VectorRemoveElements(m_rowStrings, removeRows, slavesToRemove.size());
1085 
1086     // delete row from all blocks, removing any zero-width blocks
1087     TRACE_MESSAGE("deleting alignment rows from blocks");
1088     BlockList::iterator b = m_blocks.begin(), br, be = m_blocks.end();
1089     while (b != be) {
1090         (*b)->DeleteRows(removeRows, slavesToRemove.size());
1091         if ((*b)->m_width == 0) {
1092             br = b;
1093             ++b;
1094             TRACE_MESSAGE("deleting block resized to zero width");
1095             RemoveBlock(*br);
1096         } else
1097             ++b;
1098     }
1099 
1100     // update total alignment m_width
1101     UpdateBlockMap();
1102     InitCache();
1103     return true;
1104 }
1105 
MergeAlignment(const BlockMultipleAlignment * newAlignment)1106 bool BlockMultipleAlignment::MergeAlignment(const BlockMultipleAlignment *newAlignment)
1107 {
1108     // check to see if the alignment is compatible - must have same master sequence
1109     // and blocks of new alignment must contain m_blocks of this alignment; at same time,
1110     // build up map of aligned blocks in new alignment that correspond to aligned blocks
1111     // of this object, for convenient lookup later
1112     if (newAlignment->GetMaster() != GetMaster())
1113         return false;
1114 
1115     const Block::Range *newRange, *thisRange;
1116     BlockList::const_iterator nb, nbe = newAlignment->m_blocks.end();
1117     BlockList::iterator b, be = m_blocks.end();
1118     typedef map < UngappedAlignedBlock *, const UngappedAlignedBlock * > AlignedBlockMap;
1119     AlignedBlockMap correspondingNewBlocks;
1120 
1121     for (b=m_blocks.begin(); b!=be; ++b) {
1122         if (!(*b)->IsAligned())
1123             continue;
1124         for (nb=newAlignment->m_blocks.begin(); nb!=nbe; ++nb) {
1125             if (!(*nb)->IsAligned())
1126                 continue;
1127 
1128             newRange = (*nb)->GetRangeOfRow(0);
1129             thisRange = (*b)->GetRangeOfRow(0);
1130             if (newRange->from <= thisRange->from && newRange->to >= thisRange->to) {
1131                 correspondingNewBlocks[dynamic_cast<UngappedAlignedBlock*>(b->GetPointer())] =
1132                     dynamic_cast<const UngappedAlignedBlock*>(nb->GetPointer());
1133                 break;
1134             }
1135         }
1136         if (nb == nbe) return false;    // no corresponding block found
1137     }
1138 
1139     // add slave sequences from new alignment; also copy scores/status
1140     unsigned int i, nNewRows = newAlignment->m_sequences.size() - 1;
1141     m_sequences.resize(m_sequences.size() + nNewRows);
1142     m_rowDoubles.resize(m_rowDoubles.size() + nNewRows);
1143     m_rowStrings.resize(m_rowStrings.size() + nNewRows);
1144     for (i=0; i<nNewRows; ++i) {
1145         m_sequences[m_sequences.size() + i - nNewRows] = newAlignment->m_sequences[i + 1];
1146         SetRowDouble(NRows() + i - nNewRows, newAlignment->GetRowDouble(i + 1));
1147         SetRowStatusLine(NRows() + i - nNewRows, newAlignment->GetRowStatusLine(i + 1));
1148     }
1149 
1150     // now that we know blocks are compatible, add new rows at end of this alignment, containing
1151     // all rows (except master) from new alignment; only that part of the aligned blocks from
1152     // the new alignment that intersect with the aligned blocks from this object are added, so
1153     // that this object's block structure is unchanged
1154 
1155     // resize all aligned blocks
1156     for (b=m_blocks.begin(); b!=be; ++b)
1157         (*b)->AddRows(nNewRows);
1158 
1159     // set ranges of aligned blocks, and add them to the list
1160     AlignedBlockMap::const_iterator ab, abe = correspondingNewBlocks.end();
1161     const Block::Range *thisMaster, *newMaster;
1162     for (ab=correspondingNewBlocks.begin(); ab!=abe; ++ab) {
1163         thisMaster = ab->first->GetRangeOfRow(0);
1164         newMaster = ab->second->GetRangeOfRow(0);
1165         for (i=0; i<nNewRows; ++i) {
1166             newRange = ab->second->GetRangeOfRow(i + 1);
1167             ab->first->SetRangeOfRow(NRows() + i - nNewRows,
1168                 newRange->from + thisMaster->from - newMaster->from,
1169                 newRange->to + thisMaster->to - newMaster->to);
1170         }
1171     }
1172 
1173     // delete then recreate the unaligned blocks, in case a merge requires the
1174     // creation of a new unaligned block
1175     for (b=m_blocks.begin(); b!=be; ) {
1176         if (!(*b)->IsAligned()) {
1177             BlockList::iterator bb(b);
1178             ++bb;
1179             m_blocks.erase(b);
1180             b = bb;
1181         } else
1182             ++b;
1183     }
1184     InitCache();
1185 
1186     // update this alignment, but leave row scores/status alone
1187     if (!AddUnalignedBlocks() || !UpdateBlockMap(false)) {
1188         ERROR_MESSAGE("BlockMultipleAlignment::MergeAlignment() - internal update after merge failed");
1189         return false;
1190     }
1191     return true;
1192 }
1193 
1194 template < class T >
ReorderVector(T & v,const std::vector<unsigned int> & newOrder)1195 bool ReorderVector(T& v, const std::vector < unsigned int >& newOrder)
1196 {
1197     // check validity of new ordering
1198     if (newOrder.size() != v.size()) {
1199         ERROR_MESSAGE("ReorderVector() - wrong size newOrder");
1200         return false;
1201     }
1202     vector < bool > isPresent(v.size(), false);
1203     unsigned int r;
1204     for (r=0; r<v.size(); r++) {
1205         if (isPresent[newOrder[r]]) {
1206             ERROR_MESSAGE("ReorderVector() - invalid newOrder: repeated/missing row");
1207             return false;
1208         }
1209         isPresent[newOrder[r]] = true;
1210     }
1211 
1212     // not terribly efficient - makes a whole new copy with the new order, then re-copies back
1213     T reordered(v.size());
1214     for (r=0; r<v.size(); r++)
1215         reordered[r] = v[newOrder[r]];
1216     v = reordered;
1217 
1218     return true;
1219 }
1220 
ReorderRows(const std::vector<unsigned int> & newOrder)1221 bool BlockMultipleAlignment::ReorderRows(const std::vector < unsigned int >& newOrder)
1222 {
1223     // can't reorder master
1224     if (newOrder[0] != 0) {
1225         ERROR_MESSAGE("ReorderRows() - can't move master row");
1226         return false;
1227     }
1228     bool okay =
1229         (ReorderVector(m_sequences, newOrder) &&
1230          ReorderVector(m_rowDoubles, newOrder) &&
1231          ReorderVector(m_rowStrings, newOrder));
1232     if (!okay) {
1233         ERROR_MESSAGE("reordering of sequences and status info failed");
1234         return false;
1235     }
1236     BlockList::iterator b, be = m_blocks.end();
1237     for (b=m_blocks.begin(); b!=be; ++b)
1238         okay = (okay && (*b)->ReorderRows(newOrder));
1239     if (!okay)
1240         ERROR_MESSAGE("reordering of block ranges failed");
1241     return okay;
1242 }
1243 
CreatePairwiseSeqAlignFromMultipleRow(const BlockMultipleAlignment * multiple,const BlockMultipleAlignment::UngappedAlignedBlockList & m_blocks,unsigned int slaveRow)1244 CSeq_align * CreatePairwiseSeqAlignFromMultipleRow(const BlockMultipleAlignment *multiple,
1245     const BlockMultipleAlignment::UngappedAlignedBlockList& m_blocks, unsigned int slaveRow)
1246 {
1247     if (!multiple || slaveRow >= multiple->NRows()) {
1248         ERROR_MESSAGE("CreatePairwiseSeqAlignFromMultipleRow() - bad parameters");
1249         return NULL;
1250     }
1251 
1252     CSeq_align *seqAlign = new CSeq_align();
1253     seqAlign->SetType(CSeq_align::eType_partial);
1254     seqAlign->SetDim(2);
1255 
1256     CSeq_align::C_Segs::TDendiag& denDiags = seqAlign->SetSegs().SetDendiag();
1257     denDiags.resize((m_blocks.size() > 0) ? m_blocks.size() : 1);
1258 
1259     CSeq_align::C_Segs::TDendiag::iterator d, de = denDiags.end();
1260     BlockMultipleAlignment::UngappedAlignedBlockList::const_iterator b = m_blocks.begin();
1261     const Block::Range *range;
1262     for (d=denDiags.begin(); d!=de; ++d, ++b) {
1263 
1264         CDense_diag *denDiag = new CDense_diag();
1265         d->Reset(denDiag);
1266         denDiag->SetDim(2);
1267         denDiag->SetIds().resize(2);
1268 
1269         // master row
1270         CRef < CSeq_id > id(new CSeq_id());
1271         id->Assign(multiple->GetSequenceOfRow(0)->GetPreferredIdentifier());
1272         denDiag->SetIds().front() = id;
1273         if (m_blocks.size() > 0) {
1274             range = (*b)->GetRangeOfRow(0);
1275             denDiag->SetStarts().push_back(range->from);
1276         } else
1277             denDiag->SetStarts().push_back(0);
1278 
1279         // slave row
1280         id.Reset(new CSeq_id());
1281         id->Assign(multiple->GetSequenceOfRow(slaveRow)->GetPreferredIdentifier());
1282         denDiag->SetIds().back() = id;
1283         if (m_blocks.size() > 0) {
1284             range = (*b)->GetRangeOfRow(slaveRow);
1285             denDiag->SetStarts().push_back(range->from);
1286         } else
1287             denDiag->SetStarts().push_back(0);
1288 
1289         // block m_width
1290         denDiag->SetLen((m_blocks.size() > 0) ? (*b)->m_width : 0);
1291     }
1292 
1293     return seqAlign;
1294 }
1295 
NAlignedBlocks(void) const1296 unsigned int BlockMultipleAlignment::NAlignedBlocks(void) const
1297 {
1298     unsigned int n = 0;
1299     BlockList::const_iterator b, be = m_blocks.end();
1300     for (b=m_blocks.begin(); b!=be; ++b)
1301         if ((*b)->IsAligned())
1302             ++n;
1303     return n;
1304 }
1305 
GetAlignmentIndex(unsigned int row,unsigned int seqIndex,eUnalignedJustification justification)1306 unsigned int BlockMultipleAlignment::GetAlignmentIndex(unsigned int row, unsigned int seqIndex, eUnalignedJustification justification)
1307 {
1308     if (row >= NRows() || seqIndex >= GetSequenceOfRow(row)->Length()) {
1309         ERROR_MESSAGE("BlockMultipleAlignment::GetAlignmentIndex() - coordinate out of range");
1310         return eUndefined;
1311     }
1312 
1313     unsigned int alignmentIndex, blockColumn;
1314     const Block *block = NULL;
1315     const Block::Range *range;
1316 
1317     for (alignmentIndex=0; alignmentIndex<m_totalWidth; ++alignmentIndex) {
1318 
1319         // check each block to see if index is in range
1320         if (block != m_blockMap[alignmentIndex].block) {
1321             block = m_blockMap[alignmentIndex].block;
1322 
1323             range = block->GetRangeOfRow(row);
1324             if ((int)seqIndex >= range->from && (int)seqIndex <= range->to) {
1325 
1326                 // override requested justification for end blocks
1327                 if (block == m_blocks.back()) // also true if there's a single aligned block
1328                     justification = eLeft;
1329                 else if (block == m_blocks.front())
1330                     justification = eRight;
1331 
1332                 // search block columns to find index (inefficient, but avoids having to write
1333                 // inverse functions of Block::GetIndexAt()
1334                 for (blockColumn=0; blockColumn<block->m_width; ++blockColumn) {
1335                     if (seqIndex == block->GetIndexAt(blockColumn, row, justification))
1336                         return alignmentIndex + blockColumn;
1337                 }
1338                 ERROR_MESSAGE("BlockMultipleAlignment::GetAlignmentIndex() - can't find index in block");
1339                 return eUndefined;
1340             }
1341         }
1342     }
1343 
1344     // should never get here...
1345     ERROR_MESSAGE("BlockMultipleAlignment::GetAlignmentIndex() - confused");
1346     return eUndefined;
1347 }
1348 
1349 
ReorderRows(const std::vector<unsigned int> & newOrder)1350 bool Block::ReorderRows(const std::vector < unsigned int >& newOrder)
1351 {
1352     return ReorderVector(m_ranges, newOrder);
1353 }
1354 
1355 
1356 ///// UngappedAlignedBlock methods /////
1357 
GetCharacterAt(unsigned int blockColumn,unsigned int row) const1358 char UngappedAlignedBlock::GetCharacterAt(unsigned int blockColumn, unsigned int row) const
1359 {
1360     return m_parentAlignment->GetSequenceOfRow(row)->m_sequenceString[GetIndexAt(blockColumn, row)];
1361 }
1362 
Clone(const BlockMultipleAlignment * newMultiple) const1363 Block * UngappedAlignedBlock::Clone(const BlockMultipleAlignment *newMultiple) const
1364 {
1365     UngappedAlignedBlock *copy = new UngappedAlignedBlock(newMultiple);
1366     const Block::Range *range;
1367     for (unsigned int row=0; row<NSequences(); ++row) {
1368         range = GetRangeOfRow(row);
1369         copy->SetRangeOfRow(row, range->from, range->to);
1370     }
1371     copy->m_width = m_width;
1372     return copy;
1373 }
1374 
DeleteRow(unsigned int row)1375 void UngappedAlignedBlock::DeleteRow(unsigned int row)
1376 {
1377     RangeList::iterator r = m_ranges.begin();
1378     for (unsigned int i=0; i<row; ++i)
1379         ++r;
1380     m_ranges.erase(r);
1381 }
1382 
DeleteRows(vector<bool> & removeRows,unsigned int nToRemove)1383 void UngappedAlignedBlock::DeleteRows(vector < bool >& removeRows, unsigned int nToRemove)
1384 {
1385     VectorRemoveElements(m_ranges, removeRows, nToRemove);
1386 }
1387 
1388 
1389 ///// UnalignedBlock methods /////
1390 
GetIndexAt(unsigned int blockColumn,unsigned int row,BlockMultipleAlignment::eUnalignedJustification justification) const1391 unsigned int UnalignedBlock::GetIndexAt(unsigned int blockColumn, unsigned int row,
1392         BlockMultipleAlignment::eUnalignedJustification justification) const
1393 {
1394     const Block::Range *range = GetRangeOfRow(row);
1395     unsigned int seqIndex = BlockMultipleAlignment::eUndefined, rangeWidth, rangeMiddle, extraSpace;
1396 
1397     switch (justification) {
1398         case BlockMultipleAlignment::eLeft:
1399             seqIndex = range->from + blockColumn;
1400             break;
1401         case BlockMultipleAlignment::eRight:
1402             seqIndex = range->to - m_width + blockColumn + 1;
1403             break;
1404         case BlockMultipleAlignment::eCenter:
1405             rangeWidth = (range->to - range->from + 1);
1406             extraSpace = (m_width - rangeWidth) / 2;
1407             if (blockColumn < extraSpace || blockColumn >= extraSpace + rangeWidth)
1408                 seqIndex = BlockMultipleAlignment::eUndefined;
1409             else
1410                 seqIndex = range->from + blockColumn - extraSpace;
1411             break;
1412         case BlockMultipleAlignment::eSplit:
1413             rangeWidth = (range->to - range->from + 1);
1414             rangeMiddle = (rangeWidth / 2) + (rangeWidth % 2);
1415             extraSpace = m_width - rangeWidth;
1416             if (blockColumn < rangeMiddle)
1417                 seqIndex = range->from + blockColumn;
1418             else if (blockColumn >= extraSpace + rangeMiddle)
1419                 seqIndex = range->to - m_width + blockColumn + 1;
1420             else
1421                 seqIndex = BlockMultipleAlignment::eUndefined;
1422             break;
1423     }
1424     if ((int)seqIndex < range->from || (int)seqIndex > range->to)
1425         seqIndex = BlockMultipleAlignment::eUndefined;
1426 
1427     return seqIndex;
1428 }
1429 
Resize(void)1430 void UnalignedBlock::Resize(void)
1431 {
1432     m_width = 0;
1433     for (unsigned int i=0; i<NSequences(); ++i) {
1434         unsigned int blockWidth = m_ranges[i].to - m_ranges[i].from + 1;
1435         if (blockWidth > m_width)
1436             m_width = blockWidth;
1437     }
1438 }
1439 
DeleteRow(unsigned int row)1440 void UnalignedBlock::DeleteRow(unsigned int row)
1441 {
1442     RangeList::iterator r = m_ranges.begin();
1443     for (unsigned int i=0; i<row; ++i)
1444         ++r;
1445     m_ranges.erase(r);
1446     Resize();
1447 }
1448 
DeleteRows(vector<bool> & removeRows,unsigned int nToRemove)1449 void UnalignedBlock::DeleteRows(vector < bool >& removeRows, unsigned int nToRemove)
1450 {
1451     VectorRemoveElements(m_ranges, removeRows, nToRemove);
1452     Resize();
1453 }
1454 
Clone(const BlockMultipleAlignment * newMultiple) const1455 Block * UnalignedBlock::Clone(const BlockMultipleAlignment *newMultiple) const
1456 {
1457     UnalignedBlock *copy = new UnalignedBlock(newMultiple);
1458     const Block::Range *range;
1459     for (unsigned int row=0; row<NSequences(); ++row) {
1460         range = GetRangeOfRow(row);
1461         copy->SetRangeOfRow(row, range->from, range->to);
1462     }
1463     copy->m_width = m_width;
1464     return copy;
1465 }
1466 
1467 END_SCOPE(struct_util)
1468