1 /** 2 * Author: Mark Larkin 3 * 4 * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson. 5 */ 6 /** 7 * The Alignment class is used to store the alignment that is being constructed. 8 * It also contains other information such as gap penalty masks etc. 9 * An object of this type will be passed by reference to the FileReader. This FileReader 10 * and the FileParsers will then set it up properly from the information given in the file. 11 * I have decided to put everything into vectors, string etc. No more array*'s, gets rid 12 * of the memory allocation problem. 13 * 14 * CHANGE: 15 * Mark Jan 16th 2007. I have changed the pasteSequencesIntoPosition function to allow 16 * explicit pastes into profile2. 17 * Mark 25-1-2007. I have changed the class so that each of the sequences have a unique 18 * identifier. Several functions were changed to allow this. 19 * 20 * 16-02-07,Nigel Brown(EMBL): Added friend NameIterator to allow a caller to 21 * process the name vector. 22 * 23 * 23-03-07,Nigel Brown(EMBL): added testUniqueNames() predicate, which 24 * compares new sequence names with those in the alignment vector BEFORE 25 * appending them. 26 */ 27 28 // NOTE NOTE NOTE Very important! The list of sequences begins from 1 to numSeqs. 29 // This is because of the fact that the code was written in Fortran where arrays begin at 30 // 1. It has become difficult to change this. Ramu has tried before and had problems 31 // so we decided to leave it this way. 32 33 #ifndef ALIGNMENT_H 34 #define ALIGNMENT_H 35 36 #include <vector> 37 #include <string> 38 #include <iomanip> 39 #include <exception> 40 #include <stdexcept> 41 #include "Sequence.h" 42 #include "../substitutionMatrix/globalmatrix.h" 43 #include "../general/userparams.h" 44 #include "../general/VectorOutOfRange.h" 45 #include "../general/SequenceNotFoundException.h" 46 47 48 // FIXME because this object is used for aligned and unaligned 49 // sequences it would be nice to have a isAligned flag here (AW) 50 51 using namespace std; 52 53 namespace clustalw 54 { 55 56 typedef std::vector<vector <int> > SeqArray; 57 58 class Alignment 59 { 60 public: 61 /* Functions */ 62 Alignment(); 63 void addSequences(vector<Sequence>* seqVector); 64 void addSequences(SeqArray* seqVector); 65 void appendSequences(vector<Sequence>* seqVector); 66 vector<Sequence> cutSelectedSequencesFromAlignment(vector<int>* selected); 67 void pasteSequencesIntoPosition(vector<Sequence>* seqVector, int pos, 68 bool explicitPasteToProfile2 = false); 69 resizeSeqArray(int size)70 void resizeSeqArray(int size){seqArray.resize(size); numSeqs = size - 1; 71 outputIndex.resize(size - 1); names.resize(size); 72 titles.resize(size);}; 73 bool addOutputIndex(vector<int>* outputIndexToAdd); 74 bool appendOutputIndex(vector<int>* outputIndexToAppend); 75 void addSecStructMask1(vector<char>* secStructMaskToAdd); 76 void addSecStructMask2(vector<char>* secStructMaskToAdd); 77 void addSeqWeight(vector<int>* _seqWeight); 78 void addGapPenaltyMask1(vector<char>* gapPenaltyMaskToAdd); 79 void addGapPenaltyMask2(vector<char>* gapPenaltyMaskToAdd); 80 vector<char>* getSecStructMask1(); 81 vector<char>* getSecStructMask2(); 82 const vector<int>* getOutputIndex(); 83 vector<char>* getGapPenaltyMask1(); 84 vector<char>* getGapPenaltyMask2(); 85 void addSecStructName1(string nameToAdd); 86 void addSecStructName2(string nameToAdd); 87 int alignScore(void); 88 int countGaps(int s1, int s2, int l); 89 void resetAlign(); 90 void fixGaps(); 91 float countid(int s1, int s2); 92 getSequence(int index)93 const vector<int>* getSequence(int index){return &seqArray[index];}; // For Pairwise! getSequence(int index)94 const vector<int>* getSequence(int index) const {return &seqArray[index];}; 95 const vector<int>* getSequenceFromUniqueId(unsigned long id); // For iteration getSeqArray()96 const SeqArray* getSeqArray() const {return &seqArray;}; // For multiple align! getSeqArrayForRealloc()97 SeqArray* getSeqArrayForRealloc(){return &seqArray;}; 98 void updateSequence(int index, const vector<int>* seq); 99 100 bool checkAllNamesDifferent(string *offendingSeq); 101 bool testUniqueNames(vector<Sequence>* seqVector, string *offendingSeq); 102 void clearAlignment(); 103 void clearSecStruct1(); 104 void clearSecStruct2(); 105 void printSequencesAddedInfo(); 106 107 string getSecStructName1(); 108 string getSecStructName2(); getNumSeqs()109 int getNumSeqs() const {return numSeqs;}; 110 int getMaxNames(); getMaxAlnLength()111 int getMaxAlnLength(){return maxAlignmentLength;}; setMaxAlnLength(int len)112 void setMaxAlnLength(int len){maxAlignmentLength = len;}; 113 int getLengthLongestSequence(); 114 int getLengthLongestSequence(int firstSeq, int lastSeq); getSeqLength(int index)115 int getSeqLength(int index) const {return seqArray[index].size() - 1;}; 116 int getSecStructMask1Element(int index); 117 int getSecStructMask2Element(int index); 118 int getGapPenaltyMask1Element(int index); 119 int getGapPenaltyMask2Element(int index); 120 int getOutputIndex(int index); 121 int getSeqWeight(int index) const; getSeqWeights()122 const vector<int>* getSeqWeights() const{return &seqWeight;} 123 string getName(int index); 124 string getTitle(int index); 125 vector<int>* QTcalcHistColumnHeights(int firstSeq, int nSeqs, 126 Array2D<int>* exceptionalRes); 127 // NOTE July 13, for Qt 128 129 // NOTE the following functions are to be used when we are doing a profile 130 // alignment. It resets the gaps from fixed. 131 void resetProfile1(); 132 void resetProfile2(); 133 void resetAllSeqWeights(); 134 135 int searchForString(bool* found, int seq, int beginRes, string search); 136 void removeGapsFromSelectedSeqs(vector<int>* selected); 137 void removeGapOnlyColsFromSelectedSeqs(vector<int>* selected); 138 void removeAllGapOnlyColumns(int fSeq, int lSeq, int profileNum); 139 void setDefaultOutputIndex(); 140 bool removeAllOutsideRange(int beginPos, int endPos); 141 bool updateRealignedRange(SeqArray realignedSeqs, int beginPos, int endPos); 142 bool reloadAlignment(); 143 getProfile1NumSeqs()144 int getProfile1NumSeqs(){return profile1NumSeqs;}; setProfile1NumSeqs(int value)145 void setProfile1NumSeqs(int value){profile1NumSeqs = value;} 146 bool isGap(int seq, int col) const; 147 void calculateMaxLengths(); 148 149 /** 150 * The following functions are for the iteration output order. 151 */ 152 unsigned long getUniqueId(int seq); 153 debugPrintArray()154 void debugPrintArray(){debugPrintSeqArray(&seqArray);} 155 void debugPrintSeqArray(SeqArray* arrayToPrint); 156 void debugPrintProfile1(); 157 void debugPrintProfile2(); 158 void debugPrintOutAlignInfo(); 159 void debugPrintAllNames(); 160 void debugPrintSequences(); 161 162 /* Attributes */ 163 164 /* Friends */ 165 class NameIterator; 166 friend class NameIterator; 167 168 class NameIterator 169 { 170 private: 171 Alignment *alignment; 172 vector<string>::iterator i; 173 public: 174 void begin(Alignment *alignment); 175 const string next(); 176 bool end(); 177 }; 178 private: 179 /* Functions */ 180 181 void addSequencesToVector(vector<Sequence>* seqVector); 182 int getSequenceLength(int index); 183 void sortScores(vector<float>* scores, int f, int l); 184 void swap(vector<float>* scores, int s1, int s2); 185 bool keepPortionOfSeqArray(int beginRangeIndex, int endRangeIndex); 186 187 void clearSeqArray(); 188 /* Attributes */ 189 int maxNames; 190 int maxAlignmentLength; 191 int lengthLongestSequence; 192 int numSeqs; 193 vector<int> outputIndex; 194 vector<unsigned long> sequenceIds; // Mark change: To help with output order 195 vector<int> seqWeight; 196 SeqArray seqArray; 197 vector<string> names; 198 vector<string> titles; 199 vector<char> gapPenaltyMask1; 200 vector<char> gapPenaltyMask2; 201 vector<char> secStructMask1; 202 vector<char> secStructMask2; 203 string secStructName1; 204 string secStructName2; 205 vector<int> histogramColumnHeights; // NOTE July 13, for Qt 206 int profile1NumSeqs; 207 int gapPos1, gapPos2; 208 }; 209 } 210 #endif 211 212