1 /* $Id: block_multiple_alignment.hpp 103491 2007-05-04 17:18:18Z kazimird $ 2 * =========================================================================== 3 * 4 * PUBLIC DOMAIN NOTICE 5 * National Center for Biotechnology Information 6 * 7 * This software/database is a "United States Government Work" under the 8 * terms of the United States Copyright Act. It was written as part of 9 * the author's official duties as a United States Government employee and 10 * thus cannot be copyrighted. This software/database is freely available 11 * to the public for use. The National Library of Medicine and the U.S. 12 * Government have not placed any restriction on its use or reproduction. 13 * 14 * Although all reasonable efforts have been taken to ensure the accuracy 15 * and reliability of the software and data, the NLM and the U.S. 16 * Government do not and cannot warrant the performance or results that 17 * may be obtained by using this software or data. The NLM and the U.S. 18 * Government disclaim all warranties, express or implied, including 19 * warranties of performance, merchantability or fitness for any particular 20 * purpose. 21 * 22 * Please cite the author in any work or product based on this material. 23 * 24 * =========================================================================== 25 * 26 * Authors: Paul Thiessen 27 * 28 * File Description: 29 * Classes to hold alignment data 30 * 31 * =========================================================================== 32 */ 33 34 #ifndef CN3D_BLOCK_MULTIPLE_ALIGNMENT__HPP 35 #define CN3D_BLOCK_MULTIPLE_ALIGNMENT__HPP 36 37 #include <corelib/ncbistl.hpp> 38 39 #include <objects/cdd/Update_align.hpp> 40 #include <objects/seqalign/Seq_align.hpp> 41 42 #include <list> 43 #include <vector> 44 #include <map> 45 46 #include "vector_math.hpp" 47 #include "style_manager.hpp" 48 49 50 BEGIN_SCOPE(Cn3D) 51 52 class Sequence; 53 class ConservationColorer; 54 class AlignmentManager; 55 class Block; 56 class UngappedAlignedBlock; 57 class UnalignedBlock; 58 class Messenger; 59 class Threader; 60 class PSSMWrapper; 61 62 class BlockMultipleAlignment 63 { 64 public: 65 typedef std::vector < const Sequence * > SequenceList; 66 // list will be owned/freed by this object 67 BlockMultipleAlignment(SequenceList *sequenceList, AlignmentManager *alnMgr); 68 69 ~BlockMultipleAlignment(void); 70 GetSequences(void) const71 const SequenceList * GetSequences(void) const { return sequences; } 72 73 // to track the origin of this alignment if it came from an update 74 ncbi::CRef < ncbi::objects::CUpdate_align > updateOrigin; 75 76 // add a new aligned block - will be "owned" and deallocated by BlockMultipleAlignment 77 bool AddAlignedBlockAtEnd(UngappedAlignedBlock *newBlock); 78 79 // these two should be called after all aligned blocks have been added; fills out 80 // unaligned blocks inbetween aligned blocks (and at ends). Also sets length. 81 bool AddUnalignedBlocks(void); 82 83 // Fills out the BlockMap for mapping alignment column -> block+column, special colors, 84 // and sets up conservation colors (although they're not calculated until needed). 85 bool UpdateBlockMapAndColors(bool clearRowInfo = true); 86 87 // find out if a residue is aligned, by row 88 bool IsAligned(unsigned int row, unsigned int seqIndex) const; 89 90 // find out if a residue is aligned, by Sequence - only works for non-repeated Sequences! IsAligned(const Sequence * sequence,unsigned int seqIndex) const91 bool IsAligned(const Sequence *sequence, unsigned int seqIndex) const 92 { 93 int row = GetRowForSequence(sequence); 94 if (row < 0) return false; 95 return IsAligned(row, seqIndex); 96 } 97 98 // stuff regarding master sequence GetMaster(void) const99 const Sequence * GetMaster(void) const { return (*sequences)[0]; } IsMaster(const Sequence * sequence) const100 bool IsMaster(const Sequence *sequence) const { return (sequence == (*sequences)[0]); } 101 102 // return sequence for given row GetSequenceOfRow(unsigned int row) const103 const Sequence * GetSequenceOfRow(unsigned int row) const 104 { 105 if (row < sequences->size()) 106 return (*sequences)[row]; 107 else 108 return NULL; 109 } 110 111 // given a sequence, return row number in this alignment (or -1 if not found) 112 int GetRowForSequence(const Sequence *sequence) const; 113 114 // get a color for an aligned residue that's dependent on the entire alignment 115 // (e.g., for coloring by sequence conservation) 116 const Vector * GetAlignmentColor(const Sequence *sequence, unsigned int seqIndex, 117 StyleSettings::eColorScheme colorScheme) const; 118 const Vector * GetAlignmentColor(unsigned int row, unsigned int seqIndex, 119 StyleSettings::eColorScheme colorScheme) const; 120 121 // will be used to control padding of unaligned blocks 122 enum eUnalignedJustification { 123 eLeft, 124 eRight, 125 eCenter, 126 eSplit 127 }; 128 129 // return alignment position of left side first aligned block (-1 if no aligned blocks) 130 int GetFirstAlignedBlockPosition(void) const; 131 132 // makes a new copy of itself 133 BlockMultipleAlignment * Clone(void) const; 134 135 // character query interface - "column" must be in alignment range [0 .. totalWidth-1] 136 bool GetCharacterTraitsAt(unsigned int alignmentColumn, unsigned int row, eUnalignedJustification justification, 137 char *character, Vector *color, bool *isHighlighted, 138 bool *drawBackground, Vector *cellBackgroundColor) const; 139 140 // get sequence and index (if any) at given position, and whether that residue is aligned 141 bool GetSequenceAndIndexAt(unsigned int alignmentColumn, unsigned int row, eUnalignedJustification justification, 142 const Sequence **sequence, int *index, bool *isAligned) const; 143 144 // given row and sequence index, return alignment index; not the most efficient function - use sparingly 145 int GetAlignmentIndex(unsigned int row, unsigned int seqIndex, eUnalignedJustification justification) const; 146 147 // called when user selects some part of a row 148 void SelectedRange(unsigned int row, unsigned int alnIndexFrom, unsigned int alnIndexTo, 149 eUnalignedJustification justification, bool toggle) const; 150 void SelectBlocks(unsigned int alnIndexFrom, unsigned int alnIndexTo, bool toggle) const; 151 152 // fill in a vector of Blocks 153 typedef std::vector < const Block * > ConstBlockList; 154 void GetBlocks(ConstBlockList *blocks) const; 155 156 // fill in a vector of UngappedAlignedBlocks; returns # aligned blocks 157 typedef std::vector < const UngappedAlignedBlock * > UngappedAlignedBlockList; 158 unsigned int GetUngappedAlignedBlocks(UngappedAlignedBlockList *blocks) const; 159 160 // free color storage 161 void FreeColors(void); 162 163 // highlight aligned columns based on master indexes (mainly for alignment annotations); 164 // returns false if any residue in the range is unaligned (or out of range), true on success 165 bool HighlightAlignedColumnsOfMasterRange(unsigned int from, unsigned int to) const; 166 167 // PSSM for this alignment (cached) 168 const PSSMWrapper& GetPSSM(void) const; 169 void RemovePSSM(void) const; 170 171 172 ///// editing functions ///// 173 174 // if in an aligned block, give block column and width of that position; otherwise, -1 175 void GetAlignedBlockPosition(unsigned int alignmentIndex, int *blockColumn, int *blockWidth) const; 176 177 // get seqIndex of dependent aligned to the given master seqIndex; -1 if master residue unaligned 178 int GetAlignedDependentIndex(unsigned int masterSeqIndex, unsigned int dependentRow) const; 179 180 // returns true if any boundary shift actually occurred 181 bool MoveBlockBoundary(unsigned int columnFrom, unsigned int columnTo); 182 183 // splits a block such that alignmentIndex is the first column of the new block; 184 // returns false if no split occurred (e.g. if index is not inside aligned block) 185 bool SplitBlock(unsigned int alignmentIndex); 186 187 // merges all blocks that overlap specified range - assuming no unaligned blocks 188 // in that range. Returns true if any merge(s) occurred, false otherwise. 189 bool MergeBlocks(unsigned int fromAlignmentIndex, unsigned int toAlignmentIndex); 190 191 // creates a block, if given region of an unaligned block in which no gaps 192 // are present. Returns true if a block is created. 193 bool CreateBlock(unsigned int fromAlignmentIndex, unsigned int toAlignmentIndex, eUnalignedJustification justification); 194 195 // deletes the block containing this index; returns true if deletion occurred. 196 bool DeleteBlock(unsigned int alignmentIndex); 197 198 // deletes all blocks; returns true if there were any blocks to delete 199 bool DeleteAllBlocks(void); 200 201 // shifts (horizontally) the residues in and immediately surrounding an 202 // aligned block; returns true if any shift occurs. 203 bool ShiftRow(unsigned int row, unsigned int fromAlignmentIndex, unsigned int toAlignmentIndex, eUnalignedJustification justification); 204 205 // only if the residue is unaligned, move it into the first aligned position of the adjacent aligned block 206 bool ZipAlignResidue(unsigned int row, unsigned int alignmentIndex, bool moveRight, eUnalignedJustification justification); 207 208 // scans for the best position of a particular block; returns true if the block moved 209 bool OptimizeBlock(unsigned int row, unsigned int alignmentIndex, eUnalignedJustification justification); 210 211 // delete a row; returns true if successful 212 bool DeleteRow(unsigned int row); 213 214 // flag an aligned block for realignment - block will be removed upon ExtractRows; returns true if 215 // column is in fact an aligned block 216 bool MarkBlock(unsigned int column); 217 bool ClearMarks(void); // remove all block flags 218 void GetMarkedBlockNumbers(std::vector < unsigned int > *alignedBlockNumbers) const; 219 220 // this function does two things: it extracts from a multiple alignment all dependent rows listed for 221 // removal; and if pairwiseAlignments!=NULL, for each dependent removed creates a new BlockMultipleAlignment 222 // that contains the alignment of just that dependent with the master, as it was in the original multiple 223 // (i.e., not according to the corresponding pre-IBM MasterDependentAlignment) 224 typedef std::list < BlockMultipleAlignment * > AlignmentList; 225 bool ExtractRows(const std::vector < unsigned int >& dependentsToRemove, AlignmentList *pairwiseAlignments); 226 227 // merge in the contents of the given alignment (assuming same master, compatible block structure), 228 // addings its rows to the end of this alignment; returns true if merge successful. Does not change 229 // block structure - just adds the part of new alignment's aligned blocks that intersect with this 230 // object's aligned blocks 231 bool MergeAlignment(const BlockMultipleAlignment *newAlignment); 232 233 // turn on/off geometry violations; if param is true, will return # violations found 234 unsigned int ShowGeometryViolations(bool showGeometryViolations); 235 236 private: 237 AlignmentManager *alignmentManager; 238 SequenceList *sequences; 239 ConservationColorer *conservationColorer; 240 mutable PSSMWrapper *pssm; 241 242 typedef std::list < Block * > BlockList; 243 BlockList blocks; 244 245 unsigned int totalWidth; 246 247 typedef struct { 248 Block *block; 249 int blockColumn, alignedBlockNum; 250 } BlockInfo; 251 typedef std::vector < BlockInfo > BlockMap; 252 BlockMap blockMap; 253 254 // to flag blocks for realignment 255 typedef std::map < const Block * , bool > MarkBlockMap; 256 MarkBlockMap markBlocks; 257 258 bool CheckAlignedBlock(const Block *newBlock) const; 259 UnalignedBlock * CreateNewUnalignedBlockBetween(const Block *left, const Block *right); 260 Block * GetBlockBefore(const Block *block) const; 261 Block * GetBlockAfter(const Block *block) const; 262 void InsertBlockBefore(Block *newBlock, const Block *insertAt); 263 void InsertBlockAfter(const Block *insertAt, Block *newBlock); 264 void RemoveBlock(Block *block); 265 266 // for cacheing of residue->block lookups 267 mutable unsigned int cachePrevRow; 268 mutable const Block *cachePrevBlock; 269 mutable BlockList::const_iterator cacheBlockIterator; 270 void InitCache(void); 271 272 // given a row and seqIndex, find block that contains that residue 273 const Block * GetBlock(unsigned int row, unsigned int seqIndex) const; 274 275 // intended for volatile storage of row-associated info (e.g. for alignment scores, etc.) 276 mutable std::vector < double > rowDoubles; 277 mutable std::vector < std::string > rowStrings; 278 279 // for flagging residues (e.g. for geometry violations) 280 typedef std::vector < std::vector < bool > > RowSeqIndexFlags; 281 RowSeqIndexFlags geometryViolations; 282 bool showGeometryViolations; 283 284 unsigned int SumOfAlignedBlockWidths(void) const; 285 286 public: 287 288 // NULL if block before is aligned; if NULL passed, retrieves last block (if unaligned; else NULL) 289 const UnalignedBlock * GetUnalignedBlockBefore(const UngappedAlignedBlock *aBlock) const; 290 // NULL if block after is aligned; if NULL passed, retrieves first block (if unaligned; else NULL) 291 const UnalignedBlock * GetUnalignedBlockAfter(const UngappedAlignedBlock *aBlock) const; 292 NBlocks(void) const293 unsigned int NBlocks(void) const { return blocks.size(); } 294 bool HasNoAlignedBlocks(void) const; 295 unsigned int NAlignedBlocks(void) const; NRows(void) const296 unsigned int NRows(void) const { return sequences->size(); } AlignmentWidth(void) const297 unsigned int AlignmentWidth(void) const { return totalWidth; } 298 299 // return a number from 1..n for aligned blocks, -1 for unaligned GetAlignedBlockNumber(unsigned int alignmentIndex) const300 int GetAlignedBlockNumber(unsigned int alignmentIndex) const { return blockMap[alignmentIndex].alignedBlockNum; } 301 302 // returns the loop length of the unaligned region at the given row and alignment index; -1 if that position is aligned 303 int GetLoopLength(unsigned int row, unsigned int alignmentIndex); 304 305 // return [0..1] for aligned residues depending on the alignment position - first aligned column is 0, last is 1; 306 // -1 if unaligned. This is mainly for alignment rainbow coloring 307 double GetRelativeAlignmentFraction(unsigned int alignmentIndex) const; 308 309 // for storing/querying info GetRowDouble(unsigned int row) const310 double GetRowDouble(unsigned int row) const { return rowDoubles[row]; } SetRowDouble(unsigned int row,double value) const311 void SetRowDouble(unsigned int row, double value) const { rowDoubles[row] = value; } GetRowStatusLine(unsigned int row) const312 const std::string& GetRowStatusLine(unsigned int row) const { return rowStrings[row]; } SetRowStatusLine(unsigned int row,const std::string & value) const313 void SetRowStatusLine(unsigned int row, const std::string& value) const { rowStrings[row] = value; } 314 315 // empties all rows' infos ClearRowInfo(void) const316 void ClearRowInfo(void) const 317 { 318 for (unsigned int r=0; r<NRows(); ++r) { 319 rowDoubles[r] = 0.0; 320 rowStrings[r].erase(); 321 } 322 } 323 324 // kludge for now for storing allowed alignment regions, e.g. when demoted from multiple. 325 // (only two ranges for now, since this is used only with pairwise alignments) 326 int alignMasterFrom, alignMasterTo, alignDependentFrom, alignDependentTo; 327 }; 328 329 330 // static function to create Seq-aligns out of multiple 331 ncbi::objects::CSeq_align * CreatePairwiseSeqAlignFromMultipleRow(const BlockMultipleAlignment *multiple, 332 const BlockMultipleAlignment::UngappedAlignedBlockList& blocks, unsigned int dependentRow); 333 334 335 // base class for Block - BlockMultipleAlignment is made up of a list of these 336 class Block 337 { 338 public: 339 unsigned int width; 340 341 virtual bool IsAligned(void) const = 0; 342 343 typedef struct { 344 int from, to; // signed because 'to' may be -1 (zero-width block starting at index zero) 345 } Range; 346 347 // get sequence index for a column, which must be in block range (0 ... width-1) 348 virtual int GetIndexAt(unsigned int blockColumn, unsigned int row, 349 BlockMultipleAlignment::eUnalignedJustification justification) const = 0; 350 351 // delete a row 352 virtual void DeleteRow(unsigned int row) = 0; 353 virtual void DeleteRows(std::vector < bool >& removeRows, unsigned int nToRemove) = 0; 354 355 // makes a new copy of itself 356 virtual Block * Clone(const BlockMultipleAlignment *newMultiple) const = 0; 357 358 protected: Block(const BlockMultipleAlignment * multiple)359 Block(const BlockMultipleAlignment *multiple) : 360 width(0), ranges(multiple->NRows()), parentAlignment(multiple) { } 361 362 typedef std::vector < Range > RangeList; 363 RangeList ranges; 364 365 const BlockMultipleAlignment *parentAlignment; 366 367 public: ~Block(void)368 virtual ~Block(void) { } // virtual destructor for base class 369 IsFrom(const BlockMultipleAlignment * alignment) const370 bool IsFrom(const BlockMultipleAlignment *alignment) const { return (alignment == parentAlignment); } 371 372 // given a row number (from 0 ... nSequences-1), give the sequence range covered by this block GetRangeOfRow(unsigned int row) const373 const Range* GetRangeOfRow(unsigned int row) const { return &(ranges[row]); } 374 375 // set range SetRangeOfRow(unsigned int row,int from,int to)376 void SetRangeOfRow(unsigned int row, int from, int to) 377 { 378 ranges[row].from = from; 379 ranges[row].to = to; 380 } 381 382 // resize - add new (empty) rows at end AddRows(unsigned int nNewRows)383 void AddRows(unsigned int nNewRows) { ranges.resize(ranges.size() + nNewRows); } 384 NSequences(void) const385 unsigned int NSequences(void) const { return ranges.size(); } 386 }; 387 388 389 // a gapless aligned block; width must be >= 1 390 class UngappedAlignedBlock : public Block 391 { 392 public: UngappedAlignedBlock(const BlockMultipleAlignment * multiple)393 UngappedAlignedBlock(const BlockMultipleAlignment *multiple) : Block(multiple) { } 394 IsAligned(void) const395 bool IsAligned(void) const { return true; } 396 GetIndexAt(unsigned int blockColumn,unsigned int row,BlockMultipleAlignment::eUnalignedJustification justification=BlockMultipleAlignment::eCenter) const397 int GetIndexAt(unsigned int blockColumn, unsigned int row, 398 BlockMultipleAlignment::eUnalignedJustification justification = 399 BlockMultipleAlignment::eCenter) const // justification ignored for aligned block 400 { return (GetRangeOfRow(row)->from + blockColumn); } 401 402 char GetCharacterAt(unsigned int blockColumn, unsigned int row) const; 403 404 void DeleteRow(unsigned int row); 405 void DeleteRows(std::vector < bool >& removeRows, unsigned int nToRemove); 406 407 Block * Clone(const BlockMultipleAlignment *newMultiple) const; 408 }; 409 410 411 // an unaligned block; max width of block must be >=1, but range over any given 412 // sequence can be length 0 (but not <0). If length 0, "to" is the residue after 413 // the block, and "from" (=to - 1) is the residue before. 414 class UnalignedBlock : public Block 415 { 416 public: UnalignedBlock(const BlockMultipleAlignment * multiple)417 UnalignedBlock(const BlockMultipleAlignment *multiple) : Block(multiple) { } 418 419 void Resize(void); // recompute block width from current ranges 420 IsAligned(void) const421 bool IsAligned(void) const { return false; } 422 423 int GetIndexAt(unsigned int blockColumn, unsigned int row, 424 BlockMultipleAlignment::eUnalignedJustification justification) const; 425 426 void DeleteRow(unsigned int row); 427 void DeleteRows(std::vector < bool >& removeRows, unsigned int nToRemove); 428 429 Block * Clone(const BlockMultipleAlignment *newMultiple) const; 430 431 // return the length of the shortest region that any row contributes to this block 432 unsigned int MinResidues(void) const; 433 }; 434 435 END_SCOPE(Cn3D) 436 437 #endif // CN3D_BLOCK_MULTIPLE_ALIGNMENT__HPP 438