1 /*  $Id: block_multiple_alignment.hpp 103491 2007-05-04 17:18:18Z kazimird $
2 * ===========================================================================
3 *
4 *                            PUBLIC DOMAIN NOTICE
5 *               National Center for Biotechnology Information
6 *
7 *  This software/database is a "United States Government Work" under the
8 *  terms of the United States Copyright Act.  It was written as part of
9 *  the author's official duties as a United States Government employee and
10 *  thus cannot be copyrighted.  This software/database is freely available
11 *  to the public for use. The National Library of Medicine and the U.S.
12 *  Government have not placed any restriction on its use or reproduction.
13 *
14 *  Although all reasonable efforts have been taken to ensure the accuracy
15 *  and reliability of the software and data, the NLM and the U.S.
16 *  Government do not and cannot warrant the performance or results that
17 *  may be obtained by using this software or data. The NLM and the U.S.
18 *  Government disclaim all warranties, express or implied, including
19 *  warranties of performance, merchantability or fitness for any particular
20 *  purpose.
21 *
22 *  Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Authors:  Paul Thiessen
27 *
28 * File Description:
29 *      Classes to hold alignment data
30 *
31 * ===========================================================================
32 */
33 
34 #ifndef CN3D_BLOCK_MULTIPLE_ALIGNMENT__HPP
35 #define CN3D_BLOCK_MULTIPLE_ALIGNMENT__HPP
36 
37 #include <corelib/ncbistl.hpp>
38 
39 #include <objects/cdd/Update_align.hpp>
40 #include <objects/seqalign/Seq_align.hpp>
41 
42 #include <list>
43 #include <vector>
44 #include <map>
45 
46 #include "vector_math.hpp"
47 #include "style_manager.hpp"
48 
49 
50 BEGIN_SCOPE(Cn3D)
51 
52 class Sequence;
53 class ConservationColorer;
54 class AlignmentManager;
55 class Block;
56 class UngappedAlignedBlock;
57 class UnalignedBlock;
58 class Messenger;
59 class Threader;
60 class PSSMWrapper;
61 
62 class BlockMultipleAlignment
63 {
64 public:
65     typedef std::vector < const Sequence * > SequenceList;
66     // list will be owned/freed by this object
67     BlockMultipleAlignment(SequenceList *sequenceList, AlignmentManager *alnMgr);
68 
69     ~BlockMultipleAlignment(void);
70 
GetSequences(void) const71     const SequenceList * GetSequences(void) const { return sequences; }
72 
73     // to track the origin of this alignment if it came from an update
74     ncbi::CRef < ncbi::objects::CUpdate_align > updateOrigin;
75 
76     // add a new aligned block - will be "owned" and deallocated by BlockMultipleAlignment
77     bool AddAlignedBlockAtEnd(UngappedAlignedBlock *newBlock);
78 
79     // these two should be called after all aligned blocks have been added; fills out
80     // unaligned blocks inbetween aligned blocks (and at ends). Also sets length.
81     bool AddUnalignedBlocks(void);
82 
83     // Fills out the BlockMap for mapping alignment column -> block+column, special colors,
84     // and sets up conservation colors (although they're not calculated until needed).
85     bool UpdateBlockMapAndColors(bool clearRowInfo = true);
86 
87     // find out if a residue is aligned, by row
88     bool IsAligned(unsigned int row, unsigned int seqIndex) const;
89 
90     // find out if a residue is aligned, by Sequence - only works for non-repeated Sequences!
IsAligned(const Sequence * sequence,unsigned int seqIndex) const91     bool IsAligned(const Sequence *sequence, unsigned int seqIndex) const
92     {
93         int row = GetRowForSequence(sequence);
94         if (row < 0) return false;
95         return IsAligned(row, seqIndex);
96     }
97 
98     // stuff regarding master sequence
GetMaster(void) const99     const Sequence * GetMaster(void) const { return (*sequences)[0]; }
IsMaster(const Sequence * sequence) const100     bool IsMaster(const Sequence *sequence) const { return (sequence == (*sequences)[0]); }
101 
102     // return sequence for given row
GetSequenceOfRow(unsigned int row) const103     const Sequence * GetSequenceOfRow(unsigned int row) const
104     {
105         if (row < sequences->size())
106             return (*sequences)[row];
107         else
108             return NULL;
109     }
110 
111     // given a sequence, return row number in this alignment (or -1 if not found)
112     int GetRowForSequence(const Sequence *sequence) const;
113 
114     // get a color for an aligned residue that's dependent on the entire alignment
115     // (e.g., for coloring by sequence conservation)
116     const Vector * GetAlignmentColor(const Sequence *sequence, unsigned int seqIndex,
117         StyleSettings::eColorScheme colorScheme) const;
118     const Vector * GetAlignmentColor(unsigned int row, unsigned int seqIndex,
119         StyleSettings::eColorScheme colorScheme) const;
120 
121     // will be used to control padding of unaligned blocks
122     enum eUnalignedJustification {
123         eLeft,
124         eRight,
125         eCenter,
126         eSplit
127     };
128 
129     // return alignment position of left side first aligned block (-1 if no aligned blocks)
130     int GetFirstAlignedBlockPosition(void) const;
131 
132     // makes a new copy of itself
133     BlockMultipleAlignment * Clone(void) const;
134 
135     // character query interface - "column" must be in alignment range [0 .. totalWidth-1]
136     bool GetCharacterTraitsAt(unsigned int alignmentColumn, unsigned int row, eUnalignedJustification justification,
137         char *character, Vector *color, bool *isHighlighted,
138         bool *drawBackground, Vector *cellBackgroundColor) const;
139 
140     // get sequence and index (if any) at given position, and whether that residue is aligned
141     bool GetSequenceAndIndexAt(unsigned int alignmentColumn, unsigned int row, eUnalignedJustification justification,
142         const Sequence **sequence, int *index, bool *isAligned) const;
143 
144     // given row and sequence index, return alignment index; not the most efficient function - use sparingly
145     int GetAlignmentIndex(unsigned int row, unsigned int seqIndex, eUnalignedJustification justification) const;
146 
147     // called when user selects some part of a row
148     void SelectedRange(unsigned int row, unsigned int alnIndexFrom, unsigned int alnIndexTo,
149         eUnalignedJustification justification, bool toggle) const;
150     void SelectBlocks(unsigned int alnIndexFrom, unsigned int alnIndexTo, bool toggle) const;
151 
152     // fill in a vector of Blocks
153     typedef std::vector < const Block * > ConstBlockList;
154     void GetBlocks(ConstBlockList *blocks) const;
155 
156     // fill in a vector of UngappedAlignedBlocks; returns # aligned blocks
157     typedef std::vector < const UngappedAlignedBlock * > UngappedAlignedBlockList;
158     unsigned int GetUngappedAlignedBlocks(UngappedAlignedBlockList *blocks) const;
159 
160     // free color storage
161     void FreeColors(void);
162 
163     // highlight aligned columns based on master indexes (mainly for alignment annotations);
164     // returns false if any residue in the range is unaligned (or out of range), true on success
165     bool HighlightAlignedColumnsOfMasterRange(unsigned int from, unsigned int to) const;
166 
167     // PSSM for this alignment (cached)
168     const PSSMWrapper& GetPSSM(void) const;
169     void RemovePSSM(void) const;
170 
171 
172     ///// editing functions /////
173 
174     // if in an aligned block, give block column and width of that position; otherwise, -1
175     void GetAlignedBlockPosition(unsigned int alignmentIndex, int *blockColumn, int *blockWidth) const;
176 
177     // get seqIndex of dependent aligned to the given master seqIndex; -1 if master residue unaligned
178     int GetAlignedDependentIndex(unsigned int masterSeqIndex, unsigned int dependentRow) const;
179 
180     // returns true if any boundary shift actually occurred
181     bool MoveBlockBoundary(unsigned int columnFrom, unsigned int columnTo);
182 
183     // splits a block such that alignmentIndex is the first column of the new block;
184     // returns false if no split occurred (e.g. if index is not inside aligned block)
185     bool SplitBlock(unsigned int alignmentIndex);
186 
187     // merges all blocks that overlap specified range - assuming no unaligned blocks
188     // in that range. Returns true if any merge(s) occurred, false otherwise.
189     bool MergeBlocks(unsigned int fromAlignmentIndex, unsigned int toAlignmentIndex);
190 
191     // creates a block, if given region of an unaligned block in which no gaps
192     // are present. Returns true if a block is created.
193     bool CreateBlock(unsigned int fromAlignmentIndex, unsigned int toAlignmentIndex, eUnalignedJustification justification);
194 
195     // deletes the block containing this index; returns true if deletion occurred.
196     bool DeleteBlock(unsigned int alignmentIndex);
197 
198     // deletes all blocks; returns true if there were any blocks to delete
199     bool DeleteAllBlocks(void);
200 
201     // shifts (horizontally) the residues in and immediately surrounding an
202     // aligned block; returns true if any shift occurs.
203     bool ShiftRow(unsigned int row, unsigned int fromAlignmentIndex, unsigned int toAlignmentIndex, eUnalignedJustification justification);
204 
205     // only if the residue is unaligned, move it into the first aligned position of the adjacent aligned block
206     bool ZipAlignResidue(unsigned int row, unsigned int alignmentIndex, bool moveRight, eUnalignedJustification justification);
207 
208     // scans for the best position of a particular block; returns true if the block moved
209     bool OptimizeBlock(unsigned int row, unsigned int alignmentIndex, eUnalignedJustification justification);
210 
211     // delete a row; returns true if successful
212     bool DeleteRow(unsigned int row);
213 
214     // flag an aligned block for realignment - block will be removed upon ExtractRows; returns true if
215     // column is in fact an aligned block
216     bool MarkBlock(unsigned int column);
217     bool ClearMarks(void);  // remove all block flags
218     void GetMarkedBlockNumbers(std::vector < unsigned int > *alignedBlockNumbers) const;
219 
220     // this function does two things: it extracts from a multiple alignment all dependent rows listed for
221     // removal; and if pairwiseAlignments!=NULL, for each dependent removed creates a new BlockMultipleAlignment
222     // that contains the alignment of just that dependent with the master, as it was in the original multiple
223     // (i.e., not according to the corresponding pre-IBM MasterDependentAlignment)
224     typedef std::list < BlockMultipleAlignment * > AlignmentList;
225     bool ExtractRows(const std::vector < unsigned int >& dependentsToRemove, AlignmentList *pairwiseAlignments);
226 
227     // merge in the contents of the given alignment (assuming same master, compatible block structure),
228     // addings its rows to the end of this alignment; returns true if merge successful. Does not change
229     // block structure - just adds the part of new alignment's aligned blocks that intersect with this
230     // object's aligned blocks
231     bool MergeAlignment(const BlockMultipleAlignment *newAlignment);
232 
233     // turn on/off geometry violations; if param is true, will return # violations found
234     unsigned int ShowGeometryViolations(bool showGeometryViolations);
235 
236 private:
237     AlignmentManager *alignmentManager;
238     SequenceList *sequences;
239     ConservationColorer *conservationColorer;
240     mutable PSSMWrapper *pssm;
241 
242     typedef std::list < Block * > BlockList;
243     BlockList blocks;
244 
245     unsigned int totalWidth;
246 
247     typedef struct {
248         Block *block;
249         int blockColumn, alignedBlockNum;
250     } BlockInfo;
251     typedef std::vector < BlockInfo > BlockMap;
252     BlockMap blockMap;
253 
254     // to flag blocks for realignment
255     typedef std::map < const Block * , bool > MarkBlockMap;
256     MarkBlockMap markBlocks;
257 
258     bool CheckAlignedBlock(const Block *newBlock) const;
259     UnalignedBlock * CreateNewUnalignedBlockBetween(const Block *left, const Block *right);
260     Block * GetBlockBefore(const Block *block) const;
261     Block * GetBlockAfter(const Block *block) const;
262     void InsertBlockBefore(Block *newBlock, const Block *insertAt);
263     void InsertBlockAfter(const Block *insertAt, Block *newBlock);
264     void RemoveBlock(Block *block);
265 
266     // for cacheing of residue->block lookups
267     mutable unsigned int cachePrevRow;
268     mutable const Block *cachePrevBlock;
269     mutable BlockList::const_iterator cacheBlockIterator;
270     void InitCache(void);
271 
272     // given a row and seqIndex, find block that contains that residue
273     const Block * GetBlock(unsigned int row, unsigned int seqIndex) const;
274 
275     // intended for volatile storage of row-associated info (e.g. for alignment scores, etc.)
276     mutable std::vector < double > rowDoubles;
277     mutable std::vector < std::string > rowStrings;
278 
279     // for flagging residues (e.g. for geometry violations)
280     typedef std::vector < std::vector < bool > > RowSeqIndexFlags;
281     RowSeqIndexFlags geometryViolations;
282     bool showGeometryViolations;
283 
284     unsigned int SumOfAlignedBlockWidths(void) const;
285 
286 public:
287 
288     // NULL if block before is aligned; if NULL passed, retrieves last block (if unaligned; else NULL)
289     const UnalignedBlock * GetUnalignedBlockBefore(const UngappedAlignedBlock *aBlock) const;
290     // NULL if block after is aligned; if NULL passed, retrieves first block (if unaligned; else NULL)
291     const UnalignedBlock * GetUnalignedBlockAfter(const UngappedAlignedBlock *aBlock) const;
292 
NBlocks(void) const293     unsigned int NBlocks(void) const { return blocks.size(); }
294     bool HasNoAlignedBlocks(void) const;
295     unsigned int NAlignedBlocks(void) const;
NRows(void) const296     unsigned int NRows(void) const { return sequences->size(); }
AlignmentWidth(void) const297     unsigned int AlignmentWidth(void) const { return totalWidth; }
298 
299     // return a number from 1..n for aligned blocks, -1 for unaligned
GetAlignedBlockNumber(unsigned int alignmentIndex) const300     int GetAlignedBlockNumber(unsigned int alignmentIndex) const { return blockMap[alignmentIndex].alignedBlockNum; }
301 
302     // returns the loop length of the unaligned region at the given row and alignment index; -1 if that position is aligned
303     int GetLoopLength(unsigned int row, unsigned int alignmentIndex);
304 
305     // return [0..1] for aligned residues depending on the alignment position - first aligned column is 0, last is 1;
306     // -1 if unaligned. This is mainly for alignment rainbow coloring
307     double GetRelativeAlignmentFraction(unsigned int alignmentIndex) const;
308 
309     // for storing/querying info
GetRowDouble(unsigned int row) const310     double GetRowDouble(unsigned int row) const { return rowDoubles[row]; }
SetRowDouble(unsigned int row,double value) const311     void SetRowDouble(unsigned int row, double value) const { rowDoubles[row] = value; }
GetRowStatusLine(unsigned int row) const312     const std::string& GetRowStatusLine(unsigned int row) const { return rowStrings[row]; }
SetRowStatusLine(unsigned int row,const std::string & value) const313     void SetRowStatusLine(unsigned int row, const std::string& value) const { rowStrings[row] = value; }
314 
315     // empties all rows' infos
ClearRowInfo(void) const316     void ClearRowInfo(void) const
317     {
318         for (unsigned int r=0; r<NRows(); ++r) {
319             rowDoubles[r] = 0.0;
320             rowStrings[r].erase();
321         }
322     }
323 
324     // kludge for now for storing allowed alignment regions, e.g. when demoted from multiple.
325     // (only two ranges for now, since this is used only with pairwise alignments)
326     int alignMasterFrom, alignMasterTo, alignDependentFrom, alignDependentTo;
327 };
328 
329 
330 // static function to create Seq-aligns out of multiple
331 ncbi::objects::CSeq_align * CreatePairwiseSeqAlignFromMultipleRow(const BlockMultipleAlignment *multiple,
332     const BlockMultipleAlignment::UngappedAlignedBlockList& blocks, unsigned int dependentRow);
333 
334 
335 // base class for Block - BlockMultipleAlignment is made up of a list of these
336 class Block
337 {
338 public:
339     unsigned int width;
340 
341     virtual bool IsAligned(void) const = 0;
342 
343     typedef struct {
344         int from, to;   // signed because 'to' may be -1 (zero-width block starting at index zero)
345     } Range;
346 
347     // get sequence index for a column, which must be in block range (0 ... width-1)
348     virtual int GetIndexAt(unsigned int blockColumn, unsigned int row,
349         BlockMultipleAlignment::eUnalignedJustification justification) const = 0;
350 
351     // delete a row
352     virtual void DeleteRow(unsigned int row) = 0;
353     virtual void DeleteRows(std::vector < bool >& removeRows, unsigned int nToRemove) = 0;
354 
355     // makes a new copy of itself
356     virtual Block * Clone(const BlockMultipleAlignment *newMultiple) const = 0;
357 
358 protected:
Block(const BlockMultipleAlignment * multiple)359     Block(const BlockMultipleAlignment *multiple) :
360         width(0), ranges(multiple->NRows()), parentAlignment(multiple)  { }
361 
362     typedef std::vector < Range > RangeList;
363     RangeList ranges;
364 
365     const BlockMultipleAlignment *parentAlignment;
366 
367 public:
~Block(void)368     virtual ~Block(void) { }    // virtual destructor for base class
369 
IsFrom(const BlockMultipleAlignment * alignment) const370     bool IsFrom(const BlockMultipleAlignment *alignment) const { return (alignment == parentAlignment); }
371 
372     // given a row number (from 0 ... nSequences-1), give the sequence range covered by this block
GetRangeOfRow(unsigned int row) const373     const Range* GetRangeOfRow(unsigned int row) const { return &(ranges[row]); }
374 
375     // set range
SetRangeOfRow(unsigned int row,int from,int to)376     void SetRangeOfRow(unsigned int row, int from, int to)
377     {
378         ranges[row].from = from;
379         ranges[row].to = to;
380     }
381 
382     // resize - add new (empty) rows at end
AddRows(unsigned int nNewRows)383     void AddRows(unsigned int nNewRows) { ranges.resize(ranges.size() + nNewRows); }
384 
NSequences(void) const385     unsigned int NSequences(void) const { return ranges.size(); }
386 };
387 
388 
389 // a gapless aligned block; width must be >= 1
390 class UngappedAlignedBlock : public Block
391 {
392 public:
UngappedAlignedBlock(const BlockMultipleAlignment * multiple)393     UngappedAlignedBlock(const BlockMultipleAlignment *multiple) : Block(multiple) { }
394 
IsAligned(void) const395     bool IsAligned(void) const { return true; }
396 
GetIndexAt(unsigned int blockColumn,unsigned int row,BlockMultipleAlignment::eUnalignedJustification justification=BlockMultipleAlignment::eCenter) const397     int GetIndexAt(unsigned int blockColumn, unsigned int row,
398         BlockMultipleAlignment::eUnalignedJustification justification =
399             BlockMultipleAlignment::eCenter) const  // justification ignored for aligned block
400         { return (GetRangeOfRow(row)->from + blockColumn); }
401 
402     char GetCharacterAt(unsigned int blockColumn, unsigned int row) const;
403 
404     void DeleteRow(unsigned int row);
405     void DeleteRows(std::vector < bool >& removeRows, unsigned int nToRemove);
406 
407     Block * Clone(const BlockMultipleAlignment *newMultiple) const;
408 };
409 
410 
411 // an unaligned block; max width of block must be >=1, but range over any given
412 // sequence can be length 0 (but not <0). If length 0, "to" is the residue after
413 // the block, and "from" (=to - 1) is the residue before.
414 class UnalignedBlock : public Block
415 {
416 public:
UnalignedBlock(const BlockMultipleAlignment * multiple)417     UnalignedBlock(const BlockMultipleAlignment *multiple) : Block(multiple) { }
418 
419     void Resize(void);  // recompute block width from current ranges
420 
IsAligned(void) const421     bool IsAligned(void) const { return false; }
422 
423     int GetIndexAt(unsigned int blockColumn, unsigned int row,
424         BlockMultipleAlignment::eUnalignedJustification justification) const;
425 
426     void DeleteRow(unsigned int row);
427     void DeleteRows(std::vector < bool >& removeRows, unsigned int nToRemove);
428 
429     Block * Clone(const BlockMultipleAlignment *newMultiple) const;
430 
431     // return the length of the shortest region that any row contributes to this block
432     unsigned int MinResidues(void) const;
433 };
434 
435 END_SCOPE(Cn3D)
436 
437 #endif // CN3D_BLOCK_MULTIPLE_ALIGNMENT__HPP
438