1 /* $Id: su_block_multiple_alignment.cpp 121421 2008-03-06 13:58:33Z thiessen $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Authors: Paul Thiessen
27 *
28 * File Description:
29 * Classes to hold alignment data
30 *
31 * ===========================================================================
32 */
33
34 #include <ncbi_pch.hpp>
35 #include <corelib/ncbistd.hpp>
36 #include <corelib/ncbi_limits.hpp>
37 #include <util/tables/raw_scoremat.h>
38
39 #include <objects/seqalign/Dense_diag.hpp>
40
41 #include <algo/structure/struct_util/su_block_multiple_alignment.hpp>
42 #include <algo/structure/struct_util/su_sequence_set.hpp>
43 #include <algo/structure/struct_util/su_pssm.hpp>
44 #include "su_private.hpp"
45
46 USING_NCBI_SCOPE;
47 USING_SCOPE(objects);
48
49
BEGIN_SCOPE(struct_util)50 BEGIN_SCOPE(struct_util)
51
52 BlockMultipleAlignment::BlockMultipleAlignment(const SequenceList& sequenceList)
53 {
54 m_sequences = sequenceList;
55 m_pssm = NULL;
56
57 InitCache();
58 m_rowDoubles.resize(m_sequences.size(), 0.0);
59 m_rowStrings.resize(m_sequences.size());
60 }
61
InitCache(void)62 void BlockMultipleAlignment::InitCache(void)
63 {
64 m_cachePrevRow = eUndefined;
65 m_cachePrevBlock = NULL;
66 m_cacheBlockIterator = m_blocks.begin();
67 }
68
~BlockMultipleAlignment(void)69 BlockMultipleAlignment::~BlockMultipleAlignment(void)
70 {
71 RemovePSSM();
72 }
73
Clone(void) const74 BlockMultipleAlignment * BlockMultipleAlignment::Clone(void) const
75 {
76 BlockMultipleAlignment *copy = new BlockMultipleAlignment(m_sequences);
77 BlockList::const_iterator b, be = m_blocks.end();
78 for (b=m_blocks.begin(); b!=be; ++b)
79 copy->m_blocks.push_back(CRef<Block>((*b)->Clone(copy)));
80 copy->UpdateBlockMap();
81 copy->m_rowDoubles = m_rowDoubles;
82 copy->m_rowStrings = m_rowStrings;
83 return copy;
84 }
85
86 /*
87 static inline unsigned int ScreenResidueCharacter(char original)
88 {
89 char ch = toupper((unsigned char) original);
90 switch (ch) {
91 case 'A': case 'R': case 'N': case 'D': case 'C':
92 case 'Q': case 'E': case 'G': case 'H': case 'I':
93 case 'L': case 'K': case 'M': case 'F': case 'P':
94 case 'S': case 'T': case 'W': case 'Y': case 'V':
95 case 'B': case 'Z':
96 break;
97 default:
98 ch = 'X'; // make all but natural aa's just 'X'
99 }
100 return ch;
101 }
102
103 static int GetBLOSUM62Score(char a, char b)
104 {
105 static SNCBIFullScoreMatrix Blosum62Matrix;
106 static bool unpacked = false;
107
108 if (!unpacked) {
109 NCBISM_Unpack(&NCBISM_Blosum62, &Blosum62Matrix);
110 unpacked = true;
111 }
112
113 return Blosum62Matrix.s[ScreenResidueCharacter(a)][ScreenResidueCharacter(b)];
114 }
115 */
116
GetPSSM(void) const117 const BLAST_Matrix * BlockMultipleAlignment::GetPSSM(void) const
118 {
119 if (m_pssm) return m_pssm;
120 m_pssm = CreateBlastMatrix(this);
121 return m_pssm;
122 }
123
RemovePSSM(void) const124 void BlockMultipleAlignment::RemovePSSM(void) const
125 {
126 if (m_pssm) {
127 delete m_pssm;
128 m_pssm = NULL;
129 }
130 }
131
GetBlockList(ConstBlockList & cbl) const132 void BlockMultipleAlignment::GetBlockList(ConstBlockList& cbl) const
133 {
134 cbl.clear();
135 cbl.reserve(m_blocks.size());
136 BlockList::const_iterator b, be = m_blocks.end();
137 for (b=m_blocks.begin(); b!=be; ++b)
138 cbl.push_back(CConstRef<Block>(b->GetPointer()));
139 }
140
CheckAlignedBlock(const Block * block) const141 bool BlockMultipleAlignment::CheckAlignedBlock(const Block *block) const
142 {
143 if (!block || !block->IsAligned()) {
144 ERROR_MESSAGE("CheckAlignedBlock() - checks aligned blocks only");
145 return false;
146 }
147 if (block->NSequences() != m_sequences.size()) {
148 ERROR_MESSAGE("CheckAlignedBlock() - block size mismatch");
149 return false;
150 }
151
152 // make sure ranges are reasonable for each sequence
153 unsigned int row;
154 const Block
155 *prevBlock = GetBlockBefore(block),
156 *nextBlock = GetBlockAfter(block);
157 const Block::Range *range, *prevRange = NULL, *nextRange = NULL;
158 SequenceList::const_iterator sequence = m_sequences.begin();
159 for (row=0; row<block->NSequences(); ++row, ++sequence) {
160 range = block->GetRangeOfRow(row);
161 if (prevBlock) prevRange = prevBlock->GetRangeOfRow(row);
162 if (nextBlock) nextRange = nextBlock->GetRangeOfRow(row);
163 if (range->to - range->from + 1 != (int)block->m_width || // check block m_width
164 (prevRange && range->from <= prevRange->to) || // check for range overlap
165 (nextRange && range->to >= nextRange->from) || // check for range overlap
166 range->from > range->to || // check range values
167 range->to >= (int)(*sequence)->Length()) // check bounds of end
168 {
169 ERROR_MESSAGE("CheckAlignedBlock() - range error");
170 return false;
171 }
172 }
173
174 return true;
175 }
176
AddAlignedBlockAtEnd(UngappedAlignedBlock * newBlock)177 bool BlockMultipleAlignment::AddAlignedBlockAtEnd(UngappedAlignedBlock *newBlock)
178 {
179 m_blocks.push_back(CRef<Block>(newBlock));
180 return CheckAlignedBlock(newBlock);
181 }
182
183 UnalignedBlock * BlockMultipleAlignment::
CreateNewUnalignedBlockBetween(const Block * leftBlock,const Block * rightBlock)184 CreateNewUnalignedBlockBetween(const Block *leftBlock, const Block *rightBlock)
185 {
186 if ((leftBlock && !leftBlock->IsAligned()) ||
187 (rightBlock && !rightBlock->IsAligned())) {
188 ERROR_MESSAGE("CreateNewUnalignedBlockBetween() - passed an unaligned block");
189 return NULL;
190 }
191
192 unsigned int row, from, to, length;
193 SequenceList::const_iterator s, se = m_sequences.end();
194
195 UnalignedBlock *newBlock = new UnalignedBlock(this);
196 newBlock->m_width = 0;
197
198 for (row=0, s=m_sequences.begin(); s!=se; ++row, ++s) {
199
200 if (leftBlock)
201 from = leftBlock->GetRangeOfRow(row)->to + 1;
202 else
203 from = 0;
204
205 if (rightBlock)
206 to = rightBlock->GetRangeOfRow(row)->from - 1;
207 else
208 to = (*s)->Length() - 1;
209
210 newBlock->SetRangeOfRow(row, from, to);
211
212 length = to - from + 1;
213 if ((((int)to) - ((int)from) + 1) < 0) { // just to make sure...
214 ERROR_MESSAGE("CreateNewUnalignedBlockBetween() - unaligned length < 0");
215 return NULL;
216 }
217 if (length > newBlock->m_width)
218 newBlock->m_width = length;
219 }
220
221 if (newBlock->m_width == 0) {
222 delete newBlock;
223 return NULL;
224 } else
225 return newBlock;
226 }
227
AddUnalignedBlocks(void)228 bool BlockMultipleAlignment::AddUnalignedBlocks(void)
229 {
230 BlockList::iterator a, ae = m_blocks.end();
231 const Block *alignedBlock = NULL, *prevAlignedBlock = NULL;
232 Block *newUnalignedBlock;
233
234 // unaligned m_blocks to the left of each aligned block
235 for (a=m_blocks.begin(); a!=ae; ++a) {
236 alignedBlock = *a;
237 newUnalignedBlock = CreateNewUnalignedBlockBetween(prevAlignedBlock, alignedBlock);
238 if (newUnalignedBlock)
239 m_blocks.insert(a, CRef<Block>(newUnalignedBlock));
240 prevAlignedBlock = alignedBlock;
241 }
242
243 // right tail
244 newUnalignedBlock = CreateNewUnalignedBlockBetween(alignedBlock, NULL);
245 if (newUnalignedBlock) {
246 m_blocks.insert(a, CRef<Block>(newUnalignedBlock));
247 }
248
249 return true;
250 }
251
UpdateBlockMap(bool clearRowInfo)252 bool BlockMultipleAlignment::UpdateBlockMap(bool clearRowInfo)
253 {
254 unsigned int i = 0, j, n = 0;
255 BlockList::iterator b, be = m_blocks.end();
256
257 // reset old stuff, recalculate m_width
258 m_totalWidth = 0;
259 for (b=m_blocks.begin(); b!=be; ++b)
260 m_totalWidth += (*b)->m_width;
261
262 // fill out the block map
263 m_blockMap.resize(m_totalWidth);
264 UngappedAlignedBlock *aBlock;
265 for (b=m_blocks.begin(); b!=be; ++b) {
266 aBlock = dynamic_cast<UngappedAlignedBlock*>(b->GetPointer());
267 if (aBlock)
268 ++n;
269 for (j=0; j<(*b)->m_width; ++j, ++i) {
270 m_blockMap[i].block = *b;
271 m_blockMap[i].blockColumn = j;
272 m_blockMap[i].alignedBlockNum = aBlock ? n : eUndefined;
273 }
274 }
275
276 // if alignment changes, any pssm/scores/status become invalid
277 RemovePSSM();
278 if (clearRowInfo) ClearRowInfo();
279
280 return true;
281 }
282
GetCharacterAt(unsigned int alignmentColumn,unsigned int row,eUnalignedJustification justification,char * character) const283 bool BlockMultipleAlignment::GetCharacterAt(
284 unsigned int alignmentColumn, unsigned int row, eUnalignedJustification justification,
285 char *character) const
286 {
287 const Sequence *sequence;
288 unsigned int seqIndex;
289 bool isAligned;
290
291 if (!GetSequenceAndIndexAt(alignmentColumn, row, justification, &sequence, &seqIndex, &isAligned))
292 return false;
293
294 *character = (seqIndex != eUndefined) ? sequence->m_sequenceString[seqIndex] : '~';
295 if (isAligned)
296 *character = toupper((unsigned char)(*character));
297 else
298 *character = tolower((unsigned char)(*character));
299
300 return true;
301 }
302
GetSequenceAndIndexAt(unsigned int alignmentColumn,unsigned int row,eUnalignedJustification requestedJustification,const Sequence ** sequence,unsigned int * index,bool * isAligned) const303 bool BlockMultipleAlignment::GetSequenceAndIndexAt(
304 unsigned int alignmentColumn, unsigned int row, eUnalignedJustification requestedJustification,
305 const Sequence **sequence, unsigned int *index, bool *isAligned) const
306 {
307 if (sequence)
308 *sequence = m_sequences[row];
309
310 const BlockInfo& blockInfo = m_blockMap[alignmentColumn];
311
312 if (!blockInfo.block->IsAligned()) {
313 if (isAligned)
314 *isAligned = false;
315 // override requested justification for end m_blocks
316 if (blockInfo.block == m_blocks.back().GetPointer()) // also true if there's a single aligned block
317 requestedJustification = eLeft;
318 else if (blockInfo.block == m_blocks.front().GetPointer())
319 requestedJustification = eRight;
320 } else
321 if (isAligned)
322 *isAligned = true;
323
324 if (index)
325 *index = blockInfo.block->GetIndexAt(blockInfo.blockColumn, row, requestedJustification);
326
327 return true;
328 }
329
IsAligned(unsigned int row,unsigned int seqIndex) const330 bool BlockMultipleAlignment::IsAligned(unsigned int row, unsigned int seqIndex) const
331 {
332 const Block *block = GetBlock(row, seqIndex);
333 return (block && block->IsAligned());
334 }
335
GetBlock(unsigned int row,unsigned int seqIndex) const336 const Block * BlockMultipleAlignment::GetBlock(unsigned int row, unsigned int seqIndex) const
337 {
338 // make sure we're in range for this sequence
339 if (row >= NRows() || seqIndex >= m_sequences[row]->Length()) {
340 ERROR_MESSAGE("BlockMultipleAlignment::GetBlock() - coordinate out of range");
341 return NULL;
342 }
343
344 const Block::Range *range;
345
346 // first check to see if it's in the same block as last time.
347 if (m_cachePrevBlock) {
348 range = m_cachePrevBlock->GetRangeOfRow(row);
349 if ((int)seqIndex >= range->from && (int)seqIndex <= range->to)
350 return m_cachePrevBlock;
351 ++m_cacheBlockIterator; // start search at next block
352 } else {
353 m_cacheBlockIterator = m_blocks.begin();
354 }
355
356 // otherwise, perform block search. This search is most efficient when queries
357 // happen in order from left to right along a given row.
358 do {
359 if (m_cacheBlockIterator == m_blocks.end())
360 m_cacheBlockIterator = m_blocks.begin();
361 range = (*m_cacheBlockIterator)->GetRangeOfRow(row);
362 if ((int)seqIndex >= range->from && (int)seqIndex <= range->to) {
363 m_cachePrevBlock = *m_cacheBlockIterator; // cache this block
364 return m_cachePrevBlock;
365 }
366 ++m_cacheBlockIterator;
367 } while (1);
368 }
369
GetFirstAlignedBlockPosition(void) const370 unsigned int BlockMultipleAlignment::GetFirstAlignedBlockPosition(void) const
371 {
372 BlockList::const_iterator b = m_blocks.begin();
373 if (m_blocks.size() > 0 && (*b)->IsAligned()) // first block is aligned
374 return 0;
375 else if (m_blocks.size() >= 2 && (*(++b))->IsAligned()) // second block is aligned
376 return m_blocks.front()->m_width;
377 else
378 return eUndefined;
379 }
380
GetAlignedSlaveIndex(unsigned int masterSeqIndex,unsigned int slaveRow) const381 unsigned int BlockMultipleAlignment::GetAlignedSlaveIndex(unsigned int masterSeqIndex, unsigned int slaveRow) const
382 {
383 const UngappedAlignedBlock
384 *aBlock = dynamic_cast<const UngappedAlignedBlock*>(GetBlock(0, masterSeqIndex));
385 if (!aBlock)
386 return eUndefined;
387
388 const Block::Range
389 *masterRange = aBlock->GetRangeOfRow(0),
390 *slaveRange = aBlock->GetRangeOfRow(slaveRow);
391 return (slaveRange->from + masterSeqIndex - masterRange->from);
392 }
393
GetAlignedBlockPosition(unsigned int alignmentIndex,unsigned int * blockColumn,unsigned int * blockWidth) const394 void BlockMultipleAlignment::GetAlignedBlockPosition(unsigned int alignmentIndex,
395 unsigned int *blockColumn, unsigned int *blockWidth) const
396 {
397 *blockColumn = *blockWidth = eUndefined;
398 const BlockInfo& info = m_blockMap[alignmentIndex];
399 if (info.block->IsAligned()) {
400 *blockColumn = info.blockColumn;
401 *blockWidth = info.block->m_width;
402 }
403 }
404
GetBlockBefore(const Block * block)405 Block * BlockMultipleAlignment::GetBlockBefore(const Block *block)
406 {
407 Block *prevBlock = NULL;
408 BlockList::iterator b, be = m_blocks.end();
409 for (b=m_blocks.begin(); b!=be; ++b) {
410 if (*b == block) break;
411 prevBlock = b->GetPointer();
412 }
413 return prevBlock;
414 }
415
GetBlockBefore(const Block * block) const416 const Block * BlockMultipleAlignment::GetBlockBefore(const Block *block) const
417 {
418 const Block *prevBlock = NULL;
419 BlockList::const_iterator b, be = m_blocks.end();
420 for (b=m_blocks.begin(); b!=be; ++b) {
421 if (*b == block) break;
422 prevBlock = b->GetPointer();
423 }
424 return prevBlock;
425 }
426
GetBlockAfter(const Block * block)427 Block * BlockMultipleAlignment::GetBlockAfter(const Block *block)
428 {
429 BlockList::iterator b, be = m_blocks.end();
430 for (b=m_blocks.begin(); b!=be; ++b) {
431 if (*b == block) {
432 ++b;
433 if (b == be) break;
434 return *b;
435 }
436 }
437 return NULL;
438 }
439
GetBlockAfter(const Block * block) const440 const Block * BlockMultipleAlignment::GetBlockAfter(const Block *block) const
441 {
442 BlockList::const_iterator b, be = m_blocks.end();
443 for (b=m_blocks.begin(); b!=be; ++b) {
444 if (*b == block) {
445 ++b;
446 if (b == be) break;
447 return *b;
448 }
449 }
450 return NULL;
451 }
452
GetUnalignedBlockBefore(const UngappedAlignedBlock * aBlock) const453 const UnalignedBlock * BlockMultipleAlignment::GetUnalignedBlockBefore(
454 const UngappedAlignedBlock *aBlock) const
455 {
456 const Block *prevBlock;
457 if (aBlock)
458 prevBlock = GetBlockBefore(aBlock);
459 else
460 prevBlock = m_blocks.back().GetPointer();
461 return dynamic_cast<const UnalignedBlock*>(prevBlock);
462 }
463
InsertBlockBefore(Block * newBlock,const Block * insertAt)464 void BlockMultipleAlignment::InsertBlockBefore(Block *newBlock, const Block *insertAt)
465 {
466 BlockList::iterator b, be = m_blocks.end();
467 for (b=m_blocks.begin(); b!=be; ++b) {
468 if (*b == insertAt) {
469 m_blocks.insert(b, CRef<Block>(newBlock));
470 return;
471 }
472 }
473 WARNING_MESSAGE("BlockMultipleAlignment::InsertBlockBefore() - couldn't find insertAt block");
474 }
475
InsertBlockAfter(const Block * insertAt,Block * newBlock)476 void BlockMultipleAlignment::InsertBlockAfter(const Block *insertAt, Block *newBlock)
477 {
478 BlockList::iterator b, be = m_blocks.end();
479 for (b=m_blocks.begin(); b!=be; ++b) {
480 if (*b == insertAt) {
481 ++b;
482 m_blocks.insert(b, CRef<Block>(newBlock));
483 return;
484 }
485 }
486 WARNING_MESSAGE("BlockMultipleAlignment::InsertBlockBefore() - couldn't find insertAt block");
487 }
488
RemoveBlock(Block * block)489 void BlockMultipleAlignment::RemoveBlock(Block *block)
490 {
491 BlockList::iterator b, be = m_blocks.end();
492 for (b=m_blocks.begin(); b!=be; ++b) {
493 if (*b == block) {
494 m_blocks.erase(b);
495 InitCache();
496 return;
497 }
498 }
499 WARNING_MESSAGE("BlockMultipleAlignment::RemoveBlock() - couldn't find block");
500 }
501
MoveBlockBoundary(unsigned int columnFrom,unsigned int columnTo)502 bool BlockMultipleAlignment::MoveBlockBoundary(unsigned int columnFrom, unsigned int columnTo)
503 {
504 unsigned int blockColumn, blockWidth;
505 GetAlignedBlockPosition(columnFrom, &blockColumn, &blockWidth);
506 if (blockColumn == eUndefined || blockWidth == 0 || blockWidth == eUndefined) return false;
507
508 TRACE_MESSAGE("trying to move block boundary from " << columnFrom << " to " << columnTo);
509
510 const BlockInfo& info = m_blockMap[columnFrom];
511 unsigned int row;
512 int requestedShift = columnTo - columnFrom, actualShift = 0;
513 const Block::Range *range;
514
515 // shrink block from left
516 if (blockColumn == 0 && requestedShift > 0 && requestedShift < (int) info.block->m_width) {
517 actualShift = requestedShift;
518 TRACE_MESSAGE("shrinking block from left");
519 for (row=0; row<NRows(); ++row) {
520 range = info.block->GetRangeOfRow(row);
521 info.block->SetRangeOfRow(row, range->from + requestedShift, range->to);
522 }
523 info.block->m_width -= requestedShift;
524 Block *prevBlock = GetBlockBefore(info.block);
525 if (prevBlock && !prevBlock->IsAligned()) {
526 for (row=0; row<NRows(); ++row) {
527 range = prevBlock->GetRangeOfRow(row);
528 prevBlock->SetRangeOfRow(row, range->from, range->to + requestedShift);
529 }
530 prevBlock->m_width += requestedShift;
531 } else {
532 Block *newUnalignedBlock = CreateNewUnalignedBlockBetween(prevBlock, info.block);
533 if (newUnalignedBlock)
534 InsertBlockBefore(newUnalignedBlock, info.block);
535 TRACE_MESSAGE("added new unaligned block");
536 }
537 }
538
539 // shrink block from right (requestedShift < 0)
540 else if (blockColumn == info.block->m_width - 1 &&
541 requestedShift < 0 && ((unsigned int) -requestedShift) < info.block->m_width) {
542 actualShift = requestedShift;
543 TRACE_MESSAGE("shrinking block from right");
544 for (row=0; row<NRows(); ++row) {
545 range = info.block->GetRangeOfRow(row);
546 info.block->SetRangeOfRow(row, range->from, range->to + requestedShift);
547 }
548 info.block->m_width += requestedShift;
549 Block *nextBlock = GetBlockAfter(info.block);
550 if (nextBlock && !nextBlock->IsAligned()) {
551 for (row=0; row<NRows(); ++row) {
552 range = nextBlock->GetRangeOfRow(row);
553 nextBlock->SetRangeOfRow(row, range->from + requestedShift, range->to);
554 }
555 nextBlock->m_width -= requestedShift;
556 } else {
557 Block *newUnalignedBlock = CreateNewUnalignedBlockBetween(info.block, nextBlock);
558 if (newUnalignedBlock)
559 InsertBlockAfter(info.block, newUnalignedBlock);
560 TRACE_MESSAGE("added new unaligned block");
561 }
562 }
563
564 // grow block to right
565 else if (blockColumn == info.block->m_width - 1 && requestedShift > 0) {
566 Block *nextBlock = GetBlockAfter(info.block);
567 if (nextBlock && !nextBlock->IsAligned()) {
568 int nRes;
569 actualShift = requestedShift;
570 for (row=0; row<NRows(); ++row) {
571 range = nextBlock->GetRangeOfRow(row);
572 nRes = range->to - range->from + 1;
573 if (nRes < actualShift)
574 actualShift = nRes;
575 }
576 if (actualShift) {
577 TRACE_MESSAGE("growing block to right");
578 for (row=0; row<NRows(); ++row) {
579 range = info.block->GetRangeOfRow(row);
580 info.block->SetRangeOfRow(row, range->from, range->to + actualShift);
581 range = nextBlock->GetRangeOfRow(row);
582 nextBlock->SetRangeOfRow(row, range->from + actualShift, range->to);
583 }
584 info.block->m_width += actualShift;
585 nextBlock->m_width -= actualShift;
586 if (nextBlock->m_width == 0) {
587 RemoveBlock(nextBlock);
588 TRACE_MESSAGE("removed empty block");
589 }
590 }
591 }
592 }
593
594 // grow block to left (requestedShift < 0)
595 else if (blockColumn == 0 && requestedShift < 0) {
596 Block *prevBlock = GetBlockBefore(info.block);
597 if (prevBlock && !prevBlock->IsAligned()) {
598 int nRes;
599 actualShift = requestedShift;
600 for (row=0; row<NRows(); ++row) {
601 range = prevBlock->GetRangeOfRow(row);
602 nRes = range->to - range->from + 1;
603 if (nRes < -actualShift) actualShift = -nRes;
604 }
605 if (actualShift) {
606 TRACE_MESSAGE("growing block to left");
607 for (row=0; row<NRows(); ++row) {
608 range = info.block->GetRangeOfRow(row);
609 info.block->SetRangeOfRow(row, range->from + actualShift, range->to);
610 range = prevBlock->GetRangeOfRow(row);
611 prevBlock->SetRangeOfRow(row, range->from, range->to + actualShift);
612 }
613 info.block->m_width -= actualShift;
614 prevBlock->m_width += actualShift;
615 if (prevBlock->m_width == 0) {
616 RemoveBlock(prevBlock);
617 TRACE_MESSAGE("removed empty block");
618 }
619 }
620 }
621 }
622
623 if (actualShift != 0) {
624 UpdateBlockMap();
625 return true;
626 } else
627 return false;
628 }
629
ShiftRow(unsigned int row,unsigned int fromAlignmentIndex,unsigned int toAlignmentIndex,eUnalignedJustification justification)630 bool BlockMultipleAlignment::ShiftRow(unsigned int row, unsigned int fromAlignmentIndex, unsigned int toAlignmentIndex,
631 eUnalignedJustification justification)
632 {
633 if (fromAlignmentIndex == toAlignmentIndex)
634 return false;
635
636 Block
637 *blockFrom = m_blockMap[fromAlignmentIndex].block,
638 *blockTo = m_blockMap[toAlignmentIndex].block;
639
640 // at least one end of the drag must be in an aligned block
641 UngappedAlignedBlock *ABlock =
642 dynamic_cast<UngappedAlignedBlock*>(blockFrom);
643 if (ABlock) {
644 if (blockTo != blockFrom && blockTo->IsAligned())
645 return false;
646 } else {
647 ABlock = dynamic_cast<UngappedAlignedBlock*>(blockTo);
648 if (!ABlock)
649 return false;
650 }
651
652 // and the other must be in the same aligned block or an adjacent unaligned block
653 UnalignedBlock
654 *prevUABlock = dynamic_cast<UnalignedBlock*>(GetBlockBefore(ABlock)),
655 *nextUABlock = dynamic_cast<UnalignedBlock*>(GetBlockAfter(ABlock));
656 if (blockFrom != blockTo &&
657 ((ABlock == blockFrom && prevUABlock != blockTo && nextUABlock != blockTo) ||
658 (ABlock == blockTo && prevUABlock != blockFrom && nextUABlock != blockFrom)))
659 return false;
660
661 int requestedShift, actualShift = 0, width = 0;
662
663 // slightly different behaviour when dragging from unaligned to aligned...
664 if (!blockFrom->IsAligned()) {
665 unsigned int fromSeqIndex, toSeqIndex;
666 GetSequenceAndIndexAt(fromAlignmentIndex, row, justification, NULL, &fromSeqIndex, NULL);
667 GetSequenceAndIndexAt(toAlignmentIndex, row, justification, NULL, &toSeqIndex, NULL);
668 if (fromSeqIndex == eUndefined || toSeqIndex == eUndefined)
669 return false;
670 requestedShift = toSeqIndex - fromSeqIndex;
671 }
672
673 // vs. dragging from aligned
674 else {
675 requestedShift = toAlignmentIndex - fromAlignmentIndex;
676 }
677
678 const Block::Range *prevRange = NULL, *nextRange = NULL, *range = ABlock->GetRangeOfRow(row);
679 if (prevUABlock) prevRange = prevUABlock->GetRangeOfRow(row);
680 if (nextUABlock) nextRange = nextUABlock->GetRangeOfRow(row);
681 if (requestedShift > 0) {
682 if (prevUABlock)
683 width = prevRange->to - prevRange->from + 1;
684 actualShift = (width > requestedShift) ? requestedShift : width;
685 } else {
686 if (nextUABlock)
687 width = nextRange->to - nextRange->from + 1;
688 actualShift = (width > -requestedShift) ? requestedShift : -width;
689 }
690 if (actualShift == 0) return false;
691
692 TRACE_MESSAGE("shifting row " << row << " by " << actualShift);
693
694 ABlock->SetRangeOfRow(row, range->from - actualShift, range->to - actualShift);
695
696 if (prevUABlock) {
697 prevUABlock->SetRangeOfRow(row, prevRange->from, prevRange->to - actualShift);
698 prevUABlock->m_width = 0;
699 for (unsigned int i=0; i<NRows(); ++i) {
700 prevRange = prevUABlock->GetRangeOfRow(i);
701 width = prevRange->to - prevRange->from + 1;
702 if (width < 0)
703 ERROR_MESSAGE("BlockMultipleAlignment::ShiftRow() - negative width on left");
704 if ((unsigned int) width > prevUABlock->m_width)
705 prevUABlock->m_width = width;
706 }
707 if (prevUABlock->m_width == 0) {
708 TRACE_MESSAGE("removing zero-m_width unaligned block on left");
709 RemoveBlock(prevUABlock);
710 }
711 } else {
712 TRACE_MESSAGE("creating unaligned block on left");
713 prevUABlock = CreateNewUnalignedBlockBetween(GetBlockBefore(ABlock), ABlock);
714 InsertBlockBefore(prevUABlock, ABlock);
715 }
716
717 if (nextUABlock) {
718 nextUABlock->SetRangeOfRow(row, nextRange->from - actualShift, nextRange->to);
719 nextUABlock->m_width = 0;
720 for (unsigned int i=0; i<NRows(); ++i) {
721 nextRange = nextUABlock->GetRangeOfRow(i);
722 width = nextRange->to - nextRange->from + 1;
723 if (width < 0)
724 ERROR_MESSAGE("BlockMultipleAlignment::ShiftRow() - negative width on right");
725 if ((unsigned int) width > nextUABlock->m_width)
726 nextUABlock->m_width = width;
727 }
728 if (nextUABlock->m_width == 0) {
729 TRACE_MESSAGE("removing zero-m_width unaligned block on right");
730 RemoveBlock(nextUABlock);
731 }
732 } else {
733 TRACE_MESSAGE("creating unaligned block on right");
734 nextUABlock = CreateNewUnalignedBlockBetween(ABlock, GetBlockAfter(ABlock));
735 InsertBlockAfter(ABlock, nextUABlock);
736 }
737
738 if (!CheckAlignedBlock(ABlock))
739 ERROR_MESSAGE("BlockMultipleAlignment::ShiftRow() - shift failed to create valid aligned block");
740
741 UpdateBlockMap();
742 return true;
743 }
744
SplitBlock(unsigned int alignmentIndex)745 bool BlockMultipleAlignment::SplitBlock(unsigned int alignmentIndex)
746 {
747 const BlockInfo& info = m_blockMap[alignmentIndex];
748 if (!info.block->IsAligned() || info.block->m_width < 2 || info.blockColumn == 0)
749 return false;
750
751 TRACE_MESSAGE("splitting block");
752 UngappedAlignedBlock *newAlignedBlock = new UngappedAlignedBlock(this);
753 newAlignedBlock->m_width = info.block->m_width - info.blockColumn;
754 info.block->m_width = info.blockColumn;
755
756 const Block::Range *range;
757 unsigned int oldTo;
758 for (unsigned int row=0; row<NRows(); ++row) {
759 range = info.block->GetRangeOfRow(row);
760 oldTo = range->to;
761 info.block->SetRangeOfRow(row, range->from, range->from + info.block->m_width - 1);
762 newAlignedBlock->SetRangeOfRow(row, oldTo - newAlignedBlock->m_width + 1, oldTo);
763 }
764
765 InsertBlockAfter(info.block, newAlignedBlock);
766 if (!CheckAlignedBlock(info.block) || !CheckAlignedBlock(newAlignedBlock))
767 ERROR_MESSAGE("BlockMultipleAlignment::SplitBlock() - split failed to create valid m_blocks");
768
769 UpdateBlockMap();
770 return true;
771 }
772
MergeBlocks(unsigned int fromAlignmentIndex,unsigned int toAlignmentIndex)773 bool BlockMultipleAlignment::MergeBlocks(unsigned int fromAlignmentIndex, unsigned int toAlignmentIndex)
774 {
775 Block
776 *expandedBlock = m_blockMap[fromAlignmentIndex].block,
777 *lastBlock = m_blockMap[toAlignmentIndex].block;
778 if (expandedBlock == lastBlock)
779 return false;
780 unsigned int i;
781 for (i=fromAlignmentIndex; i<=toAlignmentIndex; ++i)
782 if (!m_blockMap[i].block->IsAligned())
783 return false;
784
785 TRACE_MESSAGE("merging block(s)");
786 for (i=0; i<NRows(); ++i)
787 expandedBlock->SetRangeOfRow(i, expandedBlock->GetRangeOfRow(i)->from, lastBlock->GetRangeOfRow(i)->to);
788 expandedBlock->m_width = lastBlock->GetRangeOfRow(0)->to - expandedBlock->GetRangeOfRow(0)->from + 1;
789
790 Block *deletedBlock = NULL, *blockToDelete;
791 for (i=fromAlignmentIndex; i<=toAlignmentIndex; ++i) {
792 blockToDelete = m_blockMap[i].block;
793 if (blockToDelete == expandedBlock)
794 continue;
795 if (blockToDelete != deletedBlock) {
796 deletedBlock = blockToDelete;
797 RemoveBlock(blockToDelete);
798 }
799 }
800
801 if (!CheckAlignedBlock(expandedBlock))
802 ERROR_MESSAGE("BlockMultipleAlignment::MergeBlocks() - merge failed to create valid block");
803
804 UpdateBlockMap();
805 return true;
806 }
807
CreateBlock(unsigned int fromAlignmentIndex,unsigned int toAlignmentIndex,eUnalignedJustification justification)808 bool BlockMultipleAlignment::CreateBlock(unsigned int fromAlignmentIndex, unsigned int toAlignmentIndex,
809 eUnalignedJustification justification)
810 {
811 const BlockInfo& info = m_blockMap[fromAlignmentIndex];
812 UnalignedBlock *prevUABlock = dynamic_cast<UnalignedBlock*>(info.block);
813 if (!prevUABlock || info.block != m_blockMap[toAlignmentIndex].block)
814 return false;
815 unsigned int row, seqIndexFrom, seqIndexTo,
816 newBlockWidth = toAlignmentIndex - fromAlignmentIndex + 1,
817 origWidth = prevUABlock->m_width;
818 vector < unsigned int > seqIndexesFrom(NRows()), seqIndexesTo(NRows());
819 const Sequence *seq;
820 bool ignored;
821 for (row=0; row<NRows(); ++row) {
822 if (!GetSequenceAndIndexAt(fromAlignmentIndex, row, justification, &seq, &seqIndexFrom, &ignored) ||
823 !GetSequenceAndIndexAt(toAlignmentIndex, row, justification, &seq, &seqIndexTo, &ignored) ||
824 seqIndexFrom == eUndefined || seqIndexTo == eUndefined ||
825 seqIndexTo - seqIndexFrom + 1 != newBlockWidth)
826 return false;
827 seqIndexesFrom[row] = seqIndexFrom;
828 seqIndexesTo[row] = seqIndexTo;
829 }
830
831 TRACE_MESSAGE("creating new aligned and unaligned blocks");
832
833 UnalignedBlock *nextUABlock = new UnalignedBlock(this);
834 UngappedAlignedBlock *ABlock = new UngappedAlignedBlock(this);
835 prevUABlock->m_width = nextUABlock->m_width = 0;
836
837 bool deletePrevUABlock = true, deleteNextUABlock = true;
838 const Block::Range *prevRange;
839 int rangeWidth;
840 for (row=0; row<NRows(); ++row) {
841 prevRange = prevUABlock->GetRangeOfRow(row);
842
843 nextUABlock->SetRangeOfRow(row, seqIndexesTo[row] + 1, prevRange->to);
844 rangeWidth = prevRange->to - seqIndexesTo[row];
845 if (rangeWidth < 0)
846 ERROR_MESSAGE("BlockMultipleAlignment::CreateBlock() - negative nextRange width");
847 else if (rangeWidth > 0) {
848 if ((unsigned int) rangeWidth > nextUABlock->m_width)
849 nextUABlock->m_width = rangeWidth;
850 deleteNextUABlock = false;
851 }
852
853 prevUABlock->SetRangeOfRow(row, prevRange->from, seqIndexesFrom[row] - 1);
854 rangeWidth = seqIndexesFrom[row] - prevRange->from;
855 if (rangeWidth < 0)
856 ERROR_MESSAGE("BlockMultipleAlignment::CreateBlock() - negative prevRange width");
857 else if (rangeWidth > 0) {
858 if ((unsigned int) rangeWidth > prevUABlock->m_width)
859 prevUABlock->m_width = rangeWidth;
860 deletePrevUABlock = false;
861 }
862
863 ABlock->SetRangeOfRow(row, seqIndexesFrom[row], seqIndexesTo[row]);
864 }
865
866 ABlock->m_width = newBlockWidth;
867 if (prevUABlock->m_width + ABlock->m_width + nextUABlock->m_width != origWidth)
868 ERROR_MESSAGE("BlockMultipleAlignment::CreateBlock() - bad block m_widths sum");
869
870 InsertBlockAfter(prevUABlock, ABlock);
871 InsertBlockAfter(ABlock, nextUABlock);
872 if (deletePrevUABlock) {
873 TRACE_MESSAGE("deleting zero-width unaligned block on left");
874 RemoveBlock(prevUABlock);
875 }
876 if (deleteNextUABlock) {
877 TRACE_MESSAGE("deleting zero-width unaligned block on right");
878 RemoveBlock(nextUABlock);
879 }
880
881 if (!CheckAlignedBlock(ABlock))
882 ERROR_MESSAGE("BlockMultipleAlignment::CreateBlock() - failed to create valid block");
883
884 UpdateBlockMap();
885 return true;
886 }
887
DeleteBlock(unsigned int alignmentIndex)888 bool BlockMultipleAlignment::DeleteBlock(unsigned int alignmentIndex)
889 {
890 Block *block = m_blockMap[alignmentIndex].block;
891 if (!block || !block->IsAligned())
892 return false;
893
894 TRACE_MESSAGE("deleting block");
895 Block
896 *prevBlock = GetBlockBefore(block),
897 *nextBlock = GetBlockAfter(block);
898
899 // unaligned m_blocks on both sides - note that total alignment m_width can change!
900 if (prevBlock && !prevBlock->IsAligned() && nextBlock && !nextBlock->IsAligned()) {
901 const Block::Range *prevRange, *nextRange;
902 unsigned int maxWidth = 0, width;
903 for (unsigned int row=0; row<NRows(); ++row) {
904 prevRange = prevBlock->GetRangeOfRow(row);
905 nextRange = nextBlock->GetRangeOfRow(row);
906 width = nextRange->to - prevRange->from + 1;
907 prevBlock->SetRangeOfRow(row, prevRange->from, nextRange->to);
908 if (width > maxWidth)
909 maxWidth = width;
910 }
911 prevBlock->m_width = maxWidth;
912 TRACE_MESSAGE("removing extra unaligned block");
913 RemoveBlock(nextBlock);
914 }
915
916 // unaligned block on left only
917 else if (prevBlock && !prevBlock->IsAligned()) {
918 const Block::Range *prevRange, *range;
919 for (unsigned int row=0; row<NRows(); ++row) {
920 prevRange = prevBlock->GetRangeOfRow(row);
921 range = block->GetRangeOfRow(row);
922 prevBlock->SetRangeOfRow(row, prevRange->from, range->to);
923 }
924 prevBlock->m_width += block->m_width;
925 }
926
927 // unaligned block on right only
928 else if (nextBlock && !nextBlock->IsAligned()) {
929 const Block::Range *range, *nextRange;
930 for (unsigned int row=0; row<NRows(); ++row) {
931 range = block->GetRangeOfRow(row);
932 nextRange = nextBlock->GetRangeOfRow(row);
933 nextBlock->SetRangeOfRow(row, range->from, nextRange->to);
934 }
935 nextBlock->m_width += block->m_width;
936 }
937
938 // no adjacent unaligned m_blocks
939 else {
940 TRACE_MESSAGE("creating new unaligned block");
941 Block *newBlock = CreateNewUnalignedBlockBetween(prevBlock, nextBlock);
942 if (prevBlock)
943 InsertBlockAfter(prevBlock, newBlock);
944 else if (nextBlock)
945 InsertBlockBefore(newBlock, nextBlock);
946 else
947 m_blocks.push_back(CRef<Block>(newBlock));
948 }
949
950 RemoveBlock(block);
951 UpdateBlockMap();
952 return true;
953 }
954
DeleteAllBlocks(void)955 bool BlockMultipleAlignment::DeleteAllBlocks(void)
956 {
957 if (m_blocks.size() == 0)
958 return false;
959
960 m_blocks.clear();
961 InitCache();
962 AddUnalignedBlocks(); // one single unaligned block for whole alignment
963 UpdateBlockMap();
964 return true;
965 }
966
DeleteRow(unsigned int row)967 bool BlockMultipleAlignment::DeleteRow(unsigned int row)
968 {
969 if (row >= NRows()) {
970 ERROR_MESSAGE("BlockMultipleAlignment::DeleteRow() - row out of range");
971 return false;
972 }
973
974 // remove sequence from list
975 vector < bool > removeRows(NRows(), false);
976 removeRows[row] = true;
977 VectorRemoveElements(m_sequences, removeRows, 1);
978 VectorRemoveElements(m_rowDoubles, removeRows, 1);
979 VectorRemoveElements(m_rowStrings, removeRows, 1);
980
981 // delete row from all m_blocks, removing any zero-m_width m_blocks
982 BlockList::iterator b = m_blocks.begin(), br, be = m_blocks.end();
983 while (b != be) {
984 (*b)->DeleteRow(row);
985 if ((*b)->m_width == 0) {
986 br = b;
987 ++b;
988 TRACE_MESSAGE("deleting block resized to zero width");
989 RemoveBlock(*br);
990 } else
991 ++b;
992 }
993
994 // update total alignment m_width
995 UpdateBlockMap();
996 InitCache();
997
998 return true;
999 }
1000
GetUngappedAlignedBlocks(UngappedAlignedBlockList * uabs) const1001 void BlockMultipleAlignment::GetUngappedAlignedBlocks(UngappedAlignedBlockList *uabs) const
1002 {
1003 uabs->clear();
1004 uabs->reserve(m_blocks.size());
1005 BlockList::const_iterator b, be = m_blocks.end();
1006 for (b=m_blocks.begin(); b!=be; ++b) {
1007 const UngappedAlignedBlock *uab = dynamic_cast<const UngappedAlignedBlock*>(b->GetPointer());
1008 if (uab)
1009 uabs->push_back(uab);
1010 }
1011 uabs->resize(uabs->size());
1012 }
1013
GetModifiableUngappedAlignedBlocks(ModifiableUngappedAlignedBlockList * uabs)1014 void BlockMultipleAlignment::GetModifiableUngappedAlignedBlocks(ModifiableUngappedAlignedBlockList *uabs)
1015 {
1016 uabs->clear();
1017 uabs->reserve(m_blocks.size());
1018 BlockList::iterator b, be = m_blocks.end();
1019 for (b=m_blocks.begin(); b!=be; ++b) {
1020 UngappedAlignedBlock *uab = dynamic_cast<UngappedAlignedBlock*>(b->GetPointer());
1021 if (uab)
1022 uabs->push_back(uab);
1023 }
1024 uabs->resize(uabs->size());
1025 }
1026
ExtractRows(const vector<unsigned int> & slavesToRemove,AlignmentList * pairwiseAlignments)1027 bool BlockMultipleAlignment::ExtractRows(
1028 const vector < unsigned int >& slavesToRemove, AlignmentList *pairwiseAlignments)
1029 {
1030 if (slavesToRemove.size() == 0) return false;
1031
1032 // make a bool list of rows to remove, also checking to make sure slave list items are in range
1033 unsigned int i;
1034 vector < bool > removeRows(NRows(), false);
1035 for (i=0; i<slavesToRemove.size(); ++i) {
1036 if (slavesToRemove[i] > 0 && slavesToRemove[i] < NRows()) {
1037 removeRows[slavesToRemove[i]] = true;
1038 } else {
1039 ERROR_MESSAGE("BlockMultipleAlignment::ExtractRows() - can't extract row "
1040 << slavesToRemove[i]);
1041 return false;
1042 }
1043 }
1044
1045 if (pairwiseAlignments) {
1046 TRACE_MESSAGE("creating new pairwise alignments");
1047 ncbi::EDiagSev oldLevel = SetDiagPostLevel(eDiag_Warning); // otherwise, info messages take a long time if lots of rows
1048
1049 UngappedAlignedBlockList uaBlocks;
1050 GetUngappedAlignedBlocks(&uaBlocks);
1051 UngappedAlignedBlockList::const_iterator u, ue = uaBlocks.end();
1052
1053 for (i=0; i<slavesToRemove.size(); ++i) {
1054
1055 // create new pairwise alignment from each removed row
1056 SequenceList newSeqs(2);
1057 newSeqs[0] = m_sequences[0];
1058 newSeqs[1] = m_sequences[slavesToRemove[i]];
1059 BlockMultipleAlignment *newAlignment = new BlockMultipleAlignment(newSeqs);
1060 for (u=uaBlocks.begin(); u!=ue; ++u) {
1061 UngappedAlignedBlock *newABlock = new UngappedAlignedBlock(newAlignment);
1062 const Block::Range *range = (*u)->GetRangeOfRow(0);
1063 newABlock->SetRangeOfRow(0, range->from, range->to);
1064 range = (*u)->GetRangeOfRow(slavesToRemove[i]);
1065 newABlock->SetRangeOfRow(1, range->from, range->to);
1066 newABlock->m_width = range->to - range->from + 1;
1067 newAlignment->AddAlignedBlockAtEnd(newABlock);
1068 }
1069 if (!newAlignment->AddUnalignedBlocks() ||
1070 !newAlignment->UpdateBlockMap()) {
1071 ERROR_MESSAGE("BlockMultipleAlignment::ExtractRows() - error creating new alignment");
1072 return false;
1073 }
1074
1075 pairwiseAlignments->push_back(newAlignment);
1076 }
1077 SetDiagPostLevel(oldLevel);
1078 }
1079
1080 // remove sequences
1081 TRACE_MESSAGE("deleting sequences");
1082 VectorRemoveElements(m_sequences, removeRows, slavesToRemove.size());
1083 VectorRemoveElements(m_rowDoubles, removeRows, slavesToRemove.size());
1084 VectorRemoveElements(m_rowStrings, removeRows, slavesToRemove.size());
1085
1086 // delete row from all blocks, removing any zero-width blocks
1087 TRACE_MESSAGE("deleting alignment rows from blocks");
1088 BlockList::iterator b = m_blocks.begin(), br, be = m_blocks.end();
1089 while (b != be) {
1090 (*b)->DeleteRows(removeRows, slavesToRemove.size());
1091 if ((*b)->m_width == 0) {
1092 br = b;
1093 ++b;
1094 TRACE_MESSAGE("deleting block resized to zero width");
1095 RemoveBlock(*br);
1096 } else
1097 ++b;
1098 }
1099
1100 // update total alignment m_width
1101 UpdateBlockMap();
1102 InitCache();
1103 return true;
1104 }
1105
MergeAlignment(const BlockMultipleAlignment * newAlignment)1106 bool BlockMultipleAlignment::MergeAlignment(const BlockMultipleAlignment *newAlignment)
1107 {
1108 // check to see if the alignment is compatible - must have same master sequence
1109 // and blocks of new alignment must contain m_blocks of this alignment; at same time,
1110 // build up map of aligned blocks in new alignment that correspond to aligned blocks
1111 // of this object, for convenient lookup later
1112 if (newAlignment->GetMaster() != GetMaster())
1113 return false;
1114
1115 const Block::Range *newRange, *thisRange;
1116 BlockList::const_iterator nb, nbe = newAlignment->m_blocks.end();
1117 BlockList::iterator b, be = m_blocks.end();
1118 typedef map < UngappedAlignedBlock *, const UngappedAlignedBlock * > AlignedBlockMap;
1119 AlignedBlockMap correspondingNewBlocks;
1120
1121 for (b=m_blocks.begin(); b!=be; ++b) {
1122 if (!(*b)->IsAligned())
1123 continue;
1124 for (nb=newAlignment->m_blocks.begin(); nb!=nbe; ++nb) {
1125 if (!(*nb)->IsAligned())
1126 continue;
1127
1128 newRange = (*nb)->GetRangeOfRow(0);
1129 thisRange = (*b)->GetRangeOfRow(0);
1130 if (newRange->from <= thisRange->from && newRange->to >= thisRange->to) {
1131 correspondingNewBlocks[dynamic_cast<UngappedAlignedBlock*>(b->GetPointer())] =
1132 dynamic_cast<const UngappedAlignedBlock*>(nb->GetPointer());
1133 break;
1134 }
1135 }
1136 if (nb == nbe) return false; // no corresponding block found
1137 }
1138
1139 // add slave sequences from new alignment; also copy scores/status
1140 unsigned int i, nNewRows = newAlignment->m_sequences.size() - 1;
1141 m_sequences.resize(m_sequences.size() + nNewRows);
1142 m_rowDoubles.resize(m_rowDoubles.size() + nNewRows);
1143 m_rowStrings.resize(m_rowStrings.size() + nNewRows);
1144 for (i=0; i<nNewRows; ++i) {
1145 m_sequences[m_sequences.size() + i - nNewRows] = newAlignment->m_sequences[i + 1];
1146 SetRowDouble(NRows() + i - nNewRows, newAlignment->GetRowDouble(i + 1));
1147 SetRowStatusLine(NRows() + i - nNewRows, newAlignment->GetRowStatusLine(i + 1));
1148 }
1149
1150 // now that we know blocks are compatible, add new rows at end of this alignment, containing
1151 // all rows (except master) from new alignment; only that part of the aligned blocks from
1152 // the new alignment that intersect with the aligned blocks from this object are added, so
1153 // that this object's block structure is unchanged
1154
1155 // resize all aligned blocks
1156 for (b=m_blocks.begin(); b!=be; ++b)
1157 (*b)->AddRows(nNewRows);
1158
1159 // set ranges of aligned blocks, and add them to the list
1160 AlignedBlockMap::const_iterator ab, abe = correspondingNewBlocks.end();
1161 const Block::Range *thisMaster, *newMaster;
1162 for (ab=correspondingNewBlocks.begin(); ab!=abe; ++ab) {
1163 thisMaster = ab->first->GetRangeOfRow(0);
1164 newMaster = ab->second->GetRangeOfRow(0);
1165 for (i=0; i<nNewRows; ++i) {
1166 newRange = ab->second->GetRangeOfRow(i + 1);
1167 ab->first->SetRangeOfRow(NRows() + i - nNewRows,
1168 newRange->from + thisMaster->from - newMaster->from,
1169 newRange->to + thisMaster->to - newMaster->to);
1170 }
1171 }
1172
1173 // delete then recreate the unaligned blocks, in case a merge requires the
1174 // creation of a new unaligned block
1175 for (b=m_blocks.begin(); b!=be; ) {
1176 if (!(*b)->IsAligned()) {
1177 BlockList::iterator bb(b);
1178 ++bb;
1179 m_blocks.erase(b);
1180 b = bb;
1181 } else
1182 ++b;
1183 }
1184 InitCache();
1185
1186 // update this alignment, but leave row scores/status alone
1187 if (!AddUnalignedBlocks() || !UpdateBlockMap(false)) {
1188 ERROR_MESSAGE("BlockMultipleAlignment::MergeAlignment() - internal update after merge failed");
1189 return false;
1190 }
1191 return true;
1192 }
1193
1194 template < class T >
ReorderVector(T & v,const std::vector<unsigned int> & newOrder)1195 bool ReorderVector(T& v, const std::vector < unsigned int >& newOrder)
1196 {
1197 // check validity of new ordering
1198 if (newOrder.size() != v.size()) {
1199 ERROR_MESSAGE("ReorderVector() - wrong size newOrder");
1200 return false;
1201 }
1202 vector < bool > isPresent(v.size(), false);
1203 unsigned int r;
1204 for (r=0; r<v.size(); r++) {
1205 if (isPresent[newOrder[r]]) {
1206 ERROR_MESSAGE("ReorderVector() - invalid newOrder: repeated/missing row");
1207 return false;
1208 }
1209 isPresent[newOrder[r]] = true;
1210 }
1211
1212 // not terribly efficient - makes a whole new copy with the new order, then re-copies back
1213 T reordered(v.size());
1214 for (r=0; r<v.size(); r++)
1215 reordered[r] = v[newOrder[r]];
1216 v = reordered;
1217
1218 return true;
1219 }
1220
ReorderRows(const std::vector<unsigned int> & newOrder)1221 bool BlockMultipleAlignment::ReorderRows(const std::vector < unsigned int >& newOrder)
1222 {
1223 // can't reorder master
1224 if (newOrder[0] != 0) {
1225 ERROR_MESSAGE("ReorderRows() - can't move master row");
1226 return false;
1227 }
1228 bool okay =
1229 (ReorderVector(m_sequences, newOrder) &&
1230 ReorderVector(m_rowDoubles, newOrder) &&
1231 ReorderVector(m_rowStrings, newOrder));
1232 if (!okay) {
1233 ERROR_MESSAGE("reordering of sequences and status info failed");
1234 return false;
1235 }
1236 BlockList::iterator b, be = m_blocks.end();
1237 for (b=m_blocks.begin(); b!=be; ++b)
1238 okay = (okay && (*b)->ReorderRows(newOrder));
1239 if (!okay)
1240 ERROR_MESSAGE("reordering of block ranges failed");
1241 return okay;
1242 }
1243
CreatePairwiseSeqAlignFromMultipleRow(const BlockMultipleAlignment * multiple,const BlockMultipleAlignment::UngappedAlignedBlockList & m_blocks,unsigned int slaveRow)1244 CSeq_align * CreatePairwiseSeqAlignFromMultipleRow(const BlockMultipleAlignment *multiple,
1245 const BlockMultipleAlignment::UngappedAlignedBlockList& m_blocks, unsigned int slaveRow)
1246 {
1247 if (!multiple || slaveRow >= multiple->NRows()) {
1248 ERROR_MESSAGE("CreatePairwiseSeqAlignFromMultipleRow() - bad parameters");
1249 return NULL;
1250 }
1251
1252 CSeq_align *seqAlign = new CSeq_align();
1253 seqAlign->SetType(CSeq_align::eType_partial);
1254 seqAlign->SetDim(2);
1255
1256 CSeq_align::C_Segs::TDendiag& denDiags = seqAlign->SetSegs().SetDendiag();
1257 denDiags.resize((m_blocks.size() > 0) ? m_blocks.size() : 1);
1258
1259 CSeq_align::C_Segs::TDendiag::iterator d, de = denDiags.end();
1260 BlockMultipleAlignment::UngappedAlignedBlockList::const_iterator b = m_blocks.begin();
1261 const Block::Range *range;
1262 for (d=denDiags.begin(); d!=de; ++d, ++b) {
1263
1264 CDense_diag *denDiag = new CDense_diag();
1265 d->Reset(denDiag);
1266 denDiag->SetDim(2);
1267 denDiag->SetIds().resize(2);
1268
1269 // master row
1270 CRef < CSeq_id > id(new CSeq_id());
1271 id->Assign(multiple->GetSequenceOfRow(0)->GetPreferredIdentifier());
1272 denDiag->SetIds().front() = id;
1273 if (m_blocks.size() > 0) {
1274 range = (*b)->GetRangeOfRow(0);
1275 denDiag->SetStarts().push_back(range->from);
1276 } else
1277 denDiag->SetStarts().push_back(0);
1278
1279 // slave row
1280 id.Reset(new CSeq_id());
1281 id->Assign(multiple->GetSequenceOfRow(slaveRow)->GetPreferredIdentifier());
1282 denDiag->SetIds().back() = id;
1283 if (m_blocks.size() > 0) {
1284 range = (*b)->GetRangeOfRow(slaveRow);
1285 denDiag->SetStarts().push_back(range->from);
1286 } else
1287 denDiag->SetStarts().push_back(0);
1288
1289 // block m_width
1290 denDiag->SetLen((m_blocks.size() > 0) ? (*b)->m_width : 0);
1291 }
1292
1293 return seqAlign;
1294 }
1295
NAlignedBlocks(void) const1296 unsigned int BlockMultipleAlignment::NAlignedBlocks(void) const
1297 {
1298 unsigned int n = 0;
1299 BlockList::const_iterator b, be = m_blocks.end();
1300 for (b=m_blocks.begin(); b!=be; ++b)
1301 if ((*b)->IsAligned())
1302 ++n;
1303 return n;
1304 }
1305
GetAlignmentIndex(unsigned int row,unsigned int seqIndex,eUnalignedJustification justification)1306 unsigned int BlockMultipleAlignment::GetAlignmentIndex(unsigned int row, unsigned int seqIndex, eUnalignedJustification justification)
1307 {
1308 if (row >= NRows() || seqIndex >= GetSequenceOfRow(row)->Length()) {
1309 ERROR_MESSAGE("BlockMultipleAlignment::GetAlignmentIndex() - coordinate out of range");
1310 return eUndefined;
1311 }
1312
1313 unsigned int alignmentIndex, blockColumn;
1314 const Block *block = NULL;
1315 const Block::Range *range;
1316
1317 for (alignmentIndex=0; alignmentIndex<m_totalWidth; ++alignmentIndex) {
1318
1319 // check each block to see if index is in range
1320 if (block != m_blockMap[alignmentIndex].block) {
1321 block = m_blockMap[alignmentIndex].block;
1322
1323 range = block->GetRangeOfRow(row);
1324 if ((int)seqIndex >= range->from && (int)seqIndex <= range->to) {
1325
1326 // override requested justification for end blocks
1327 if (block == m_blocks.back()) // also true if there's a single aligned block
1328 justification = eLeft;
1329 else if (block == m_blocks.front())
1330 justification = eRight;
1331
1332 // search block columns to find index (inefficient, but avoids having to write
1333 // inverse functions of Block::GetIndexAt()
1334 for (blockColumn=0; blockColumn<block->m_width; ++blockColumn) {
1335 if (seqIndex == block->GetIndexAt(blockColumn, row, justification))
1336 return alignmentIndex + blockColumn;
1337 }
1338 ERROR_MESSAGE("BlockMultipleAlignment::GetAlignmentIndex() - can't find index in block");
1339 return eUndefined;
1340 }
1341 }
1342 }
1343
1344 // should never get here...
1345 ERROR_MESSAGE("BlockMultipleAlignment::GetAlignmentIndex() - confused");
1346 return eUndefined;
1347 }
1348
1349
ReorderRows(const std::vector<unsigned int> & newOrder)1350 bool Block::ReorderRows(const std::vector < unsigned int >& newOrder)
1351 {
1352 return ReorderVector(m_ranges, newOrder);
1353 }
1354
1355
1356 ///// UngappedAlignedBlock methods /////
1357
GetCharacterAt(unsigned int blockColumn,unsigned int row) const1358 char UngappedAlignedBlock::GetCharacterAt(unsigned int blockColumn, unsigned int row) const
1359 {
1360 return m_parentAlignment->GetSequenceOfRow(row)->m_sequenceString[GetIndexAt(blockColumn, row)];
1361 }
1362
Clone(const BlockMultipleAlignment * newMultiple) const1363 Block * UngappedAlignedBlock::Clone(const BlockMultipleAlignment *newMultiple) const
1364 {
1365 UngappedAlignedBlock *copy = new UngappedAlignedBlock(newMultiple);
1366 const Block::Range *range;
1367 for (unsigned int row=0; row<NSequences(); ++row) {
1368 range = GetRangeOfRow(row);
1369 copy->SetRangeOfRow(row, range->from, range->to);
1370 }
1371 copy->m_width = m_width;
1372 return copy;
1373 }
1374
DeleteRow(unsigned int row)1375 void UngappedAlignedBlock::DeleteRow(unsigned int row)
1376 {
1377 RangeList::iterator r = m_ranges.begin();
1378 for (unsigned int i=0; i<row; ++i)
1379 ++r;
1380 m_ranges.erase(r);
1381 }
1382
DeleteRows(vector<bool> & removeRows,unsigned int nToRemove)1383 void UngappedAlignedBlock::DeleteRows(vector < bool >& removeRows, unsigned int nToRemove)
1384 {
1385 VectorRemoveElements(m_ranges, removeRows, nToRemove);
1386 }
1387
1388
1389 ///// UnalignedBlock methods /////
1390
GetIndexAt(unsigned int blockColumn,unsigned int row,BlockMultipleAlignment::eUnalignedJustification justification) const1391 unsigned int UnalignedBlock::GetIndexAt(unsigned int blockColumn, unsigned int row,
1392 BlockMultipleAlignment::eUnalignedJustification justification) const
1393 {
1394 const Block::Range *range = GetRangeOfRow(row);
1395 unsigned int seqIndex = BlockMultipleAlignment::eUndefined, rangeWidth, rangeMiddle, extraSpace;
1396
1397 switch (justification) {
1398 case BlockMultipleAlignment::eLeft:
1399 seqIndex = range->from + blockColumn;
1400 break;
1401 case BlockMultipleAlignment::eRight:
1402 seqIndex = range->to - m_width + blockColumn + 1;
1403 break;
1404 case BlockMultipleAlignment::eCenter:
1405 rangeWidth = (range->to - range->from + 1);
1406 extraSpace = (m_width - rangeWidth) / 2;
1407 if (blockColumn < extraSpace || blockColumn >= extraSpace + rangeWidth)
1408 seqIndex = BlockMultipleAlignment::eUndefined;
1409 else
1410 seqIndex = range->from + blockColumn - extraSpace;
1411 break;
1412 case BlockMultipleAlignment::eSplit:
1413 rangeWidth = (range->to - range->from + 1);
1414 rangeMiddle = (rangeWidth / 2) + (rangeWidth % 2);
1415 extraSpace = m_width - rangeWidth;
1416 if (blockColumn < rangeMiddle)
1417 seqIndex = range->from + blockColumn;
1418 else if (blockColumn >= extraSpace + rangeMiddle)
1419 seqIndex = range->to - m_width + blockColumn + 1;
1420 else
1421 seqIndex = BlockMultipleAlignment::eUndefined;
1422 break;
1423 }
1424 if ((int)seqIndex < range->from || (int)seqIndex > range->to)
1425 seqIndex = BlockMultipleAlignment::eUndefined;
1426
1427 return seqIndex;
1428 }
1429
Resize(void)1430 void UnalignedBlock::Resize(void)
1431 {
1432 m_width = 0;
1433 for (unsigned int i=0; i<NSequences(); ++i) {
1434 unsigned int blockWidth = m_ranges[i].to - m_ranges[i].from + 1;
1435 if (blockWidth > m_width)
1436 m_width = blockWidth;
1437 }
1438 }
1439
DeleteRow(unsigned int row)1440 void UnalignedBlock::DeleteRow(unsigned int row)
1441 {
1442 RangeList::iterator r = m_ranges.begin();
1443 for (unsigned int i=0; i<row; ++i)
1444 ++r;
1445 m_ranges.erase(r);
1446 Resize();
1447 }
1448
DeleteRows(vector<bool> & removeRows,unsigned int nToRemove)1449 void UnalignedBlock::DeleteRows(vector < bool >& removeRows, unsigned int nToRemove)
1450 {
1451 VectorRemoveElements(m_ranges, removeRows, nToRemove);
1452 Resize();
1453 }
1454
Clone(const BlockMultipleAlignment * newMultiple) const1455 Block * UnalignedBlock::Clone(const BlockMultipleAlignment *newMultiple) const
1456 {
1457 UnalignedBlock *copy = new UnalignedBlock(newMultiple);
1458 const Block::Range *range;
1459 for (unsigned int row=0; row<NSequences(); ++row) {
1460 range = GetRangeOfRow(row);
1461 copy->SetRangeOfRow(row, range->from, range->to);
1462 }
1463 copy->m_width = m_width;
1464 return copy;
1465 }
1466
1467 END_SCOPE(struct_util)
1468