1 /**
2  * Author: Mark Larkin
3  *
4  * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson.
5  */
6 /**
7  * Changes:
8  *
9  * 16-02-07,Nigel Brown(EMBL): Added friend NameIterator to allow a caller to
10  * process the name vector.
11  *
12  * 05-03-07,Nigel Brown(EMBL): modified searchForString() to skip over gaps in
13  * sequence while matching.
14  *
15  * 26-03-07,Nigel Brown(EMBL): suppressed error message box for name clashes;
16  * added testUniqueNames() predicate, which compares new sequence names with
17  * those in the alignment vector BEFORE appending them.
18  */
19 #ifdef HAVE_CONFIG_H
20     #include "config.h"
21 #endif
22 #include <exception>
23 #include <cmath>
24 #include <sstream>
25 #include "Alignment.h"
26 
27 using namespace std;
28 
29 namespace clustalw
30 {
31 
Alignment()32 Alignment::Alignment()
33  : gapPos1(userParameters->getGapPos1()),
34    gapPos2(userParameters->getGapPos2())
35 {
36     maxNames = 0;
37     numSeqs = 0;
38     maxAlignmentLength = 0;
39     lengthLongestSequence = 0;
40     profile1NumSeqs = 0;
41 }
42 
43 // Andreas Wilm (UCD): shouldn't resetProfile1 and resetProfile2 be merged?
44 /** remove gaps from older alignments (code = gap_pos1) */
resetProfile1()45 void Alignment::resetProfile1()
46 {
47     register int sl;                /* which have  code = gap_pos2  */
48     int i,j;
49     int _gapPos1 = userParameters->getGapPos1();
50     int _gapPos2 = userParameters->getGapPos2();
51     bool _resetAlignNew = userParameters->getResetAlignmentsNew();
52     bool _resetAlignAll = userParameters->getResetAlignmentsAll();
53 
54     if (userParameters->getStructPenalties1() != NONE)
55     {
56         sl = 0;
57         for (j = 0; j < (int)gapPenaltyMask1.size(); ++j)
58         {
59             if (gapPenaltyMask1[j] == _gapPos1 && (_resetAlignNew ||
60                            _resetAlignAll))
61             {
62                 continue;
63             }
64             if (gapPenaltyMask1[j] == _gapPos2 && (_resetAlignAll))
65             {
66                 continue;
67             }
68             gapPenaltyMask1[sl] = gapPenaltyMask1[j];
69             ++sl;
70         }
71     }
72 
73     if (userParameters->getStructPenalties1() == SECST)
74     {
75         sl = 0;
76         for (j = 0; j < (int)secStructMask1.size(); ++j)
77         {
78             if (secStructMask1[j] == _gapPos1 && (_resetAlignNew ||
79                           _resetAlignAll))
80             {
81                 continue;
82             }
83             if (secStructMask1[j] == _gapPos2 && (_resetAlignAll))
84             {
85                 continue;
86             }
87             secStructMask1[sl] = secStructMask1[j];
88             ++sl;
89         }
90     }
91 
92     for(i = 1; i <= profile1NumSeqs; ++i)
93     {
94         sl = 0;
95         for(j = 1; j <= getSeqLength(i); ++j)
96         {
97             if(seqArray[i][j] == _gapPos1 && (_resetAlignNew || _resetAlignAll))
98             {
99                 continue;
100             }
101             if(seqArray[i][j] == _gapPos2 && (_resetAlignAll))
102             {
103                 continue;
104             }
105             ++sl;
106             seqArray[i][sl] = seqArray[i][j];
107         }
108 
109         // Andreas Wilm (UCD): added 2008-03-07:
110         // Remove excess bit at end of sequence
111         int numExtraElements = seqArray[i].size() - 1 - sl;
112         for(int k = 0; k < numExtraElements; k++)
113         {
114             seqArray[i].pop_back();
115         }
116     }
117 }
118 
119 // Andreas Wilm (UCD): shouldn't resetProfile1 and resetProfile2 be merged?
resetProfile2()120 void Alignment::resetProfile2()
121 {
122     register int sl;                /* which have  code = gap_pos2  */
123     int i, j;
124     int _gapPos1 = userParameters->getGapPos1();
125     int _gapPos2 = userParameters->getGapPos2();
126     bool _resetAlignNew = userParameters->getResetAlignmentsNew();
127     bool _resetAlignAll = userParameters->getResetAlignmentsAll();
128     int _profile1NumSeqs = profile1NumSeqs;
129 
130 
131     if (userParameters->getStructPenalties2() != NONE)
132     {
133         sl = 0;
134         for (j = 0; j < (int)gapPenaltyMask2.size(); ++j)
135         {
136             if (gapPenaltyMask2[j] == _gapPos1 && (_resetAlignNew || _resetAlignAll))
137             {
138                 continue;
139             }
140             if (gapPenaltyMask2[j] == _gapPos2 && (_resetAlignAll))
141             {
142                 continue;
143             }
144             gapPenaltyMask2[sl] = gapPenaltyMask2[j];
145             ++sl;
146         }
147     }
148 
149     if (userParameters->getStructPenalties2() == SECST)
150     {
151         sl = 0;
152         for (j = 0; j < (int)secStructMask2.size(); ++j)
153         {
154             if (secStructMask2[j] == _gapPos1 && (_resetAlignNew ||
155                          _resetAlignAll))
156             {
157                 continue;
158             }
159             if (secStructMask2[j] == _gapPos2 && (_resetAlignAll))
160             {
161                 continue;
162             }
163             secStructMask2[sl] = secStructMask2[j];
164             ++sl;
165         }
166     }
167 
168     for(i = _profile1NumSeqs + 1; i <= numSeqs; ++i)
169     {
170         sl = 0;
171         for(j = 1; j <= getSeqLength(i); ++j)
172         {
173             if(seqArray[i][j] == _gapPos1 && (_resetAlignNew || _resetAlignAll))
174             {
175                 continue;
176             }
177             if(seqArray[i][j] == _gapPos2 && (_resetAlignAll))
178             {
179                 continue;
180             }
181             ++sl;
182             seqArray[i][sl] = seqArray[i][j];
183         }
184 
185 
186         // Andreas Wilm (UCD) added 2008-03-07:
187         // Remove excess bit at end of sequence
188         int numExtraElements = seqArray[i].size() - 1 - sl;
189         for(int k = 0; k < numExtraElements; k++)
190         {
191             seqArray[i].pop_back();
192         }
193     }
194 
195 }
196 
resetAllSeqWeights()197 void Alignment::resetAllSeqWeights()
198 {
199     seqWeight.clear();
200     seqWeight.resize(numSeqs + 1, 100);
201 }
202 
203 /*
204  * The function addOutputIndex must check that the number of outputindex
205  * Not sure what to do with this one.
206  */
addOutputIndex(vector<int> * outputIndexToAdd)207 bool Alignment::addOutputIndex(vector<int>* outputIndexToAdd)
208 {
209     // First need to clear the old outputIndex
210     outputIndex.clear();
211     // Check if the size is correct
212     if((int)outputIndexToAdd->size() == numSeqs)
213     {
214         // Add the output index
215         outputIndex = *outputIndexToAdd;
216         return true;
217     }
218     else
219     {
220         clearAlignment();
221         return false; // Could not add them
222     }
223 
224 }
225 
226 /*
227  * The function appendOutputIndex is used when we are appending the outputIndex
228  * to the current one. We do this when we are adding a profile2.
229  */
appendOutputIndex(vector<int> * outputIndexToAppend)230 bool Alignment::appendOutputIndex(vector<int>* outputIndexToAppend)
231 {
232     // Check if the size is correct
233     if((int)(outputIndexToAppend->size() + outputIndex.size()) == numSeqs)
234     {
235         // Append the outputIndex
236         vector<int>::iterator outIndexIter = outputIndexToAppend->begin();
237         while(outIndexIter != outputIndexToAppend->end())
238         {
239             outputIndex.push_back(*outIndexIter);
240             outIndexIter++;
241         }
242         if((int)outputIndex.size() == numSeqs)
243         {
244             return true;
245         }
246         else
247         {
248             clearAlignment();
249             cerr << "There is a problem with adding the sequences\n";
250             return false;
251         }
252     }
253     else
254     {
255         clearAlignment();
256         return false;
257     }
258 }
259 
260 /*
261  * The function addSequences takes a vector of sequences and adds it to the Alignment.
262  * This is used if there is nothing in memory that we need.
263  *
264  */
addSequences(vector<Sequence> * seqVector)265 void Alignment::addSequences(vector<Sequence>* seqVector)
266 {
267     clearAlignment();
268 
269     numSeqs = seqVector->size();
270     vector<int> emptyVec;
271 
272     // NOTE push dummy sequences on first!
273     /********************************************************
274      * It was decided to stay with the seqs and seqArray    *
275      * index begining at 1. This is because it is difficult *
276      * to change in the code, so we push dummy seqs on      *
277      ********************************************************/
278     seqArray.push_back(emptyVec); // EMPTY sequence
279     names.push_back(string(""));
280     titles.push_back(string(""));
281     sequenceIds.push_back(0);
282 
283     addSequencesToVector(seqVector);
284 
285     calculateMaxLengths();
286     seqWeight.resize(numSeqs + 1, 100);
287 }
288 
289 /*
290  * The function appendSequences is used when we have a profile2.
291  *
292  */
appendSequences(vector<Sequence> * seqVector)293 void Alignment::appendSequences(vector<Sequence>* seqVector)
294 {
295     numSeqs += seqVector->size(); // Add to the number we already have!
296     addSequencesToVector(seqVector);
297     seqWeight.clear();
298     seqWeight.resize(numSeqs + 1, 100);
299     calculateMaxLengths();
300 }
301 
pasteSequencesIntoPosition(vector<Sequence> * seqVector,int pos,bool explicitPasteToProfile2)302 void Alignment::pasteSequencesIntoPosition(vector<Sequence>* seqVector, int pos,
303                                            bool explicitPasteToProfile2)
304 {
305     SeqArray::iterator seqArrayIterator;
306     vector<string>::iterator namesIterator;
307     vector<string>::iterator titlesIterator;
308     vector<unsigned long>::iterator sequenceIdsIterator;
309 
310     int profNum = userParameters->getProfileNum();
311     int numSeqsToInsert = seqVector->size();
312     if(numSeqsToInsert == 0 || pos < 0)
313     {
314         return;
315     }
316     if(pos == numSeqs)
317     {
318         seqArrayIterator = seqArray.end();
319         namesIterator = names.end();
320         titlesIterator = titles.end();
321         sequenceIdsIterator = sequenceIds.end();
322     }
323     else
324     {
325         seqArrayIterator = seqArray.begin() + pos + 1;
326         namesIterator = names.begin() + pos + 1;
327         titlesIterator = titles.begin() + pos + 1;
328         sequenceIdsIterator = sequenceIds.begin() + pos + 1;
329     }
330 
331     int prof1NumSeqs = profile1NumSeqs;
332 
333     for(int i = numSeqsToInsert - 1; i >= 0; i--)
334     {
335         seqArray.insert(seqArrayIterator, *(*seqVector)[i].getSequence());
336         names.insert(namesIterator, (*seqVector)[i].getName());
337         titles.insert(titlesIterator, (*seqVector)[i].getTitle());
338         sequenceIds.insert(sequenceIdsIterator, (*seqVector)[i].getIdentifier());
339 
340         numSeqs++;
341         if(profNum != 0 && !explicitPasteToProfile2 && pos <= prof1NumSeqs)
342         {
343             prof1NumSeqs++;
344         }
345     }
346 
347     if(profNum != 0 && pos <= prof1NumSeqs)
348     {
349         profile1NumSeqs = prof1NumSeqs;
350     }
351 
352     resetAllSeqWeights();
353     setDefaultOutputIndex();
354 }
355 
debugPrintAllNames()356 void Alignment::debugPrintAllNames()
357 {
358     vector<string>::iterator nameIter = names.begin();
359     while(nameIter != names.end())
360     {
361         cout << *nameIter << endl;
362         nameIter++;
363     }
364 }
365 
begin(Alignment * a)366 void Alignment::NameIterator::begin(Alignment *a) {
367     //cout << "begin()\n";
368     if (a) {
369         alignment = a;
370         i = alignment->names.begin();
371     }
372 }
373 
next()374 const string Alignment::NameIterator::next() {
375     //cout << "next()\n";
376     if (!alignment)
377         return string();
378     if (i == alignment->names.end())
379         return string();
380     return *i++;
381 }
382 
end()383 bool Alignment::NameIterator::end() {
384     //cout << "end()\n";
385     if (!alignment)
386         return true;
387     if (i == alignment->names.end())
388         return true;
389     return false;
390 }
391 
392 // 26-03-03,nige: test before appending seqVector
testUniqueNames(vector<Sequence> * seqVector,string * offendingSeq)393 bool Alignment::testUniqueNames(vector<Sequence>* seqVector, string *offendingSeq)
394 {
395     vector<string>::iterator   oldName;
396     vector<Sequence>::iterator newName;
397     bool unique = true;
398 
399     //iterate over new candidate names
400     for (newName = seqVector->begin(); unique && newName != seqVector->end(); newName++) {
401         //iterate over old stored names
402         for (oldName = names.begin() + 1; unique && oldName != names.end(); oldName++) {
403             if (*oldName == newName->getName()) {
404                 offendingSeq->assign(*oldName);
405                 unique = false;
406             }
407         }
408     }
409     return unique;
410 }
411 
412 
413 /*
414  * The function addSequencesToVector is used to add sequences to our seqVector
415  * It is used by both addSequences and appendSequences. This is to reduce code
416  * duplication.
417  */
addSequencesToVector(vector<Sequence> * seqVector)418 void Alignment::addSequencesToVector(vector<Sequence>* seqVector)
419 {
420     std::vector<Sequence>::iterator itSeq;
421 
422     for(itSeq = seqVector->begin(); itSeq != seqVector->end(); ++itSeq)
423     {
424         seqArray.push_back(*(*itSeq).getSequence());
425         names.push_back((*itSeq).getName());
426         titles.push_back((*itSeq).getTitle());
427         sequenceIds.push_back((*itSeq).getIdentifier());
428     }
429 
430     if(!(((int)seqArray.size() == numSeqs + 1) && ((int)names.size() == numSeqs + 1)
431           && ((int)titles.size() == numSeqs + 1) && ((int)sequenceIds.size() == numSeqs + 1)))
432     {
433         cerr << "There has been an error adding the sequences to Alignment.\n"
434              << "Must terminate the program. EaddSequencesrror occured in addSequences.\n";
435         exit(1);
436     }
437 }
438 
clearAlignment()439 void Alignment::clearAlignment()
440 {
441     // Erase all the elements from the vector!
442     clearSeqArray();
443     names.clear();
444     titles.clear();
445     outputIndex.clear();
446     sequenceIds.clear();
447     clearSecStruct1();
448     clearSecStruct2();
449     seqWeight.clear();
450     maxNames = 0;
451     numSeqs = 0;
452     maxAlignmentLength = 0;
453     lengthLongestSequence = 0;
454     userParameters->setProfileNum(0);
455     userParameters->setProfile1Empty(true);
456     userParameters->setProfile2Empty(true);
457 }
458 
clearSeqArray()459 void Alignment::clearSeqArray()
460 {
461     for(int i = 0; i < (int)seqArray.size(); i++)
462     {
463         seqArray[i].clear();
464     }
465     seqArray.clear();
466 }
467 
clearSecStruct1()468 void Alignment::clearSecStruct1()
469 {
470     gapPenaltyMask1.clear();
471     secStructMask1.clear();
472     secStructName1 = "";
473 }
474 
clearSecStruct2()475 void Alignment::clearSecStruct2()
476 {
477     gapPenaltyMask2.clear();
478     secStructMask2.clear();
479     secStructName2 = "";
480 }
481 
482 /*
483  * The function alignScore is used to score the alignment!
484  * This is used by other classes to see what score the alignment has gotten.
485  */
alignScore(void)486 int Alignment::alignScore(void)
487 {
488     int maxRes;
489     int seq1, seq2, res1, res2;
490     int ngaps;
491     int i, len1, len2;
492     int score;
493     int _maxAA = userParameters->getMaxAA();
494     float _gapOpen = userParameters->getGapOpen();
495     int matrix[NUMRES][NUMRES];
496 
497     //
498     // calculate an overall score for the alignment by summing the
499     // scores for each pairwise alignment
500     //
501     maxRes = subMatrix->getAlnScoreMatrix(matrix);
502     if (maxRes == 0)
503     {
504         utilityObject->error("Matrix for alignment scoring not found\n");
505         return 0;
506     }
507 
508     score = 0;
509     for (seq1 = 1; seq1 <= numSeqs; seq1++)
510     {
511         for (seq2 = 1; seq2 < seq1; seq2++)
512         {
513             len1 = seqArray[seq1].size() - 1;
514             len2 = seqArray[seq2].size() - 1;
515             for (i = 1; i < len1 && i < len2; i++)
516             {
517                 res1 = seqArray[seq1][i];
518                 res2 = seqArray[seq2][i];
519 
520                 if ((res1 >= 0) && (res1 <= _maxAA) && (res2 >= 0) && (res2 <= _maxAA))
521                 {
522                     score += matrix[res1][res2];
523                 }
524             }
525 
526             ngaps = countGaps(seq1, seq2, len1);
527 
528             score = static_cast<int>(score - (100 * _gapOpen * ngaps)); // Mark change 17-5-07
529 
530         }
531     }
532 
533     score /= 100;
534 
535     utilityObject->info("Alignment Score %d\n", score);
536     return score;
537 }
538 
countGaps(int seq1,int seq2,int len)539 int Alignment::countGaps(int seq1, int seq2, int len)
540 {
541     int i, g;
542     int q, r;//,  *Q,  *R;
543 
544     vector<int> Q, R;
545     Q.resize(len + 2, 0);
546     R.resize(len + 2, 0);
547 
548     int _maxAA = userParameters->getMaxAA();
549 
550     try
551     {
552         Q[0] = R[0] = g = 0;
553 
554         for (i = 1; i < len; i++)
555         {
556             if (seqArray[seq1][i] > _maxAA)
557             {
558                 q = 1;
559             }
560             else
561             {
562                 q = 0;
563             }
564 
565             if (seqArray[seq2][i] > _maxAA)
566             {
567                 r = 1;
568             }
569             else
570             {
571                 r = 0;
572             }
573 
574             // NOTE I havent a clue what this does!!!!
575             if (((Q[i - 1] <= R[i - 1]) && (q != 0) && (1-r != 0)) ||
576                 ((Q[i - 1] >= R[i - 1]) && (1-q != 0) && (r != 0)))
577             {
578                 g += 1;
579             }
580 
581             if (q != 0)
582             {
583                 Q[i] = Q[i - 1] + 1;
584             }
585             else
586             {
587                 Q[i] = 0;
588             }
589 
590             if (r != 0)
591             {
592                 R[i] = R[i - 1] + 1;
593             }
594             else
595             {
596                 R[i] = 0;
597             }
598         }
599     }
600     catch(const exception &ex)
601     {
602         cerr << ex.what() << endl;
603         cerr << "Terminating program. Cannot continue. Function = countGaps\n";
604         exit(1);
605     }
606 
607     return (g);
608 }
609 
resetAlign()610 void Alignment::resetAlign()
611 {
612     /* remove gaps from older alignments (code = gap_pos1) EXCEPT for
613        gaps that were INPUT with the seqs.  which have code =
614        gap_pos2
615     */
616 
617     register int sl;
618     int i, j;
619     int _gapPos1 = userParameters->getGapPos1();
620     int _gapPos2 = userParameters->getGapPos2();
621     bool _resetAlignNew = userParameters->getResetAlignmentsNew();
622     bool _resetAlignAll = userParameters->getResetAlignmentsAll();
623 
624 
625     for(i = 1; i <= numSeqs;++i)
626     {
627         sl = 0;
628         for(j = 1; j <= getSeqLength(i); ++j)
629         {
630             if(seqArray[i][j] == _gapPos1 && ( _resetAlignNew || _resetAlignAll))
631             {
632                 continue;
633             }
634             if(seqArray[i][j] == _gapPos2 && (_resetAlignAll))
635             {
636                 continue;
637             }
638             ++sl;
639             seqArray[i][sl] = seqArray[i][j];
640         }
641 
642 
643         // Andreas Wilm (UCD) added 2008-03-07:
644         // Remove excess bit at end of sequence
645         int numExtraElements = seqArray[i].size() - 1 - sl;
646         for(int k = 0; k < numExtraElements; k++)
647         {
648             seqArray[i].pop_back();
649         }
650     }
651 }
652 
fixGaps()653 void Alignment::fixGaps()
654 {
655     int i,j;
656 
657     if (userParameters->getStructPenalties1() != NONE)
658     {
659         for (j = 0; j < getSeqLength(1); ++j)
660         {
661             if (gapPenaltyMask1[j] == userParameters->getGapPos1())
662             {
663                 gapPenaltyMask1[j] = userParameters->getGapPos2();
664             }
665         }
666     }
667 
668     if (userParameters->getStructPenalties1() == SECST)
669     {
670         for (j = 0; j < getSeqLength(1); ++j)
671         {
672             if (secStructMask1[j] == userParameters->getGapPos1())
673             {
674                 secStructMask1[j] = userParameters->getGapPos2();
675             }
676         }
677     }
678 
679     for(i = 1; i <= numSeqs; ++i)
680     {
681         for(j = 1; j <= getSeqLength(i); ++j)
682         {
683             if(seqArray[i][j] == userParameters->getGapPos1())
684             {
685                 seqArray[i][j] = userParameters->getGapPos2();
686             }
687         }
688     }
689 
690 }
691 
countid(int s1,int s2)692 float Alignment::countid(int s1, int s2)
693 {
694     int c1, c2;
695     int i;
696     int count, total;
697     float score;
698     int shorterSeqLength = (getSeqLength(s1) < getSeqLength(s2)) ? getSeqLength(s1) : getSeqLength(s2);
699 
700     count = total = 0;
701     for (i = 1; i <= shorterSeqLength; i++) // NOTE june29
702     {
703         c1 = seqArray[s1][i];
704         c2 = seqArray[s2][i];
705         if ((c1 >= 0) && (c1 < userParameters->getMaxAA()))
706         {
707             total++;
708             if (c1 == c2)
709             {
710                 count++;
711             }
712         }
713 
714     }
715 
716     if (total == 0)
717     {
718         score = 0;
719     }
720     else
721     {
722         score = 100.0 *(float)count / (float)total;
723     }
724 
725     return (score);
726 }
727 
debugPrintSequences()728 void Alignment::debugPrintSequences()
729 {
730     cout << std::endl;
731     for(int i = 0; i < (int)seqArray.size(); i++)
732     {
733         for(int j = 0; j < (int)seqArray[i].size(); j++)
734         {
735             if(seqArray[i][j] > 9)
736                 cout << " " << seqArray[i][j];
737             else
738                 cout << "  " << seqArray[i][j];
739         }
740         cout << std::endl;
741     }
742 }
743 
744 /*
745  * Note the max_aln_length is now called maxAlignmentLength, and it will be stored
746  * and calculated in this class. Mostly it is used for allocating arrays. But not always.
747  */
calculateMaxLengths()748 void Alignment::calculateMaxLengths()
749 {
750     maxAlignmentLength = 0;
751     lengthLongestSequence = 0;
752     if(seqArray.size() > 0)
753     {
754         SeqArray::iterator seqArrayIter = seqArray.begin();
755 
756         while(seqArrayIter != seqArray.end())
757         {
758             // NOTE I needed to change this to >= for a bug I had!!!!!!!
759             if((int)(*seqArrayIter).size() - 1 >= lengthLongestSequence)
760             {
761                 lengthLongestSequence = (*seqArrayIter).size();
762             }
763             ++seqArrayIter;
764         }
765 
766         if(lengthLongestSequence > 0)
767         {
768             maxAlignmentLength = (lengthLongestSequence * 2) - 2;
769             lengthLongestSequence -= 1; // MADE A CHANGE HERE AS WELL!!
770         }
771         else
772         {
773             lengthLongestSequence = 0;
774             maxAlignmentLength = 0;
775         }
776 
777     }
778     else
779     {
780         maxAlignmentLength = 0;
781     }
782     maxNames = 0;
783     if(names.size() > 0)
784     {
785         vector<string>::iterator nameVecIter = names.begin();
786 
787         while(nameVecIter != names.end())
788         {
789             if((int)(*nameVecIter).size() > maxNames)
790             {
791                 maxNames = (*nameVecIter).size();
792             }
793             ++nameVecIter;
794         }
795         if(maxNames < 10)
796         {
797             maxNames = 10;
798         }
799     }
800     else
801     {
802         maxNames = 0;
803     }
804 }
805 
806 /*
807  * This function checks to see if all names are different. It returns true if they
808  * are all different, and false if there are 2 the same.
809  * currently quadratic - would be nice to speed up
810  */
checkAllNamesDifferent(string * offendingSeq)811 bool Alignment::checkAllNamesDifferent(string *offendingSeq)
812 {
813     bool different = true;
814     // NOTE I added the + 1 here because, if we had a sequence with a name as blank
815     // this would be the same as the first one!
816     vector<string>::iterator namesIter1 = names.begin() + 1;
817     vector<string>::iterator namesIter2;
818     int counter1 = 1;
819     int counter2 = 2;
820 
821 
822     while(namesIter1 != names.end())
823     {
824         namesIter2 = namesIter1 + 1;
825         while(namesIter2 != names.end())
826         {
827             if((*namesIter1).compare(*namesIter2) == 0) // If we have 2 strings the same.
828             {
829                 different = false;
830                 /* 23-03-2007,nige: let someone up the stack deal with this - GUI is too deeply entangled.
831                  * utilityObject->error("Multiple sequences found with same name '%s' (first %d chars are significant)\n", namesIter1->c_str(), MAXNAMES);
832                  */
833 
834                 offendingSeq->assign((*namesIter1));
835                 clearAlignment();
836                 return different; // Not all different!
837             }
838             namesIter2++;
839             counter2++;
840         }
841         namesIter1++;
842         counter1++;
843         counter2 = counter1 + 1;
844     }
845     return different;
846 }
847 
848 
849 
850 
851 
852 
addSecStructMask1(vector<char> * secStructMaskToAdd)853 void Alignment::addSecStructMask1(vector<char>* secStructMaskToAdd)
854 {
855     secStructMask1 = *secStructMaskToAdd;
856 }
857 
addSecStructMask2(vector<char> * secStructMaskToAdd)858 void Alignment::addSecStructMask2(vector<char>* secStructMaskToAdd)
859 {
860     secStructMask2 = *secStructMaskToAdd;
861 }
862 
addGapPenaltyMask1(vector<char> * gapPenaltyMaskToAdd)863 void Alignment::addGapPenaltyMask1(vector<char>* gapPenaltyMaskToAdd)
864 {
865     gapPenaltyMask1 = *gapPenaltyMaskToAdd;
866 }
867 
addGapPenaltyMask2(vector<char> * gapPenaltyMaskToAdd)868 void Alignment::addGapPenaltyMask2(vector<char>* gapPenaltyMaskToAdd)
869 {
870     gapPenaltyMask2 = *gapPenaltyMaskToAdd;
871 }
872 
addSecStructName1(string nameToAdd)873 void Alignment::addSecStructName1(string nameToAdd)
874 {
875     secStructName1 = nameToAdd;
876 }
877 
addSecStructName2(string nameToAdd)878 void Alignment::addSecStructName2(string nameToAdd)
879 {
880     secStructName2 = nameToAdd;
881 }
882 
addSeqWeight(vector<int> * _seqWeight)883 void Alignment::addSeqWeight(vector<int>* _seqWeight)
884 {
885     if(seqWeight.size() == _seqWeight->size())
886     {
887         int size = seqWeight.size();
888 
889         for(int i = 0; i < size; i++)
890         {
891             seqWeight[i] = (*_seqWeight)[i];
892         }
893     }
894 }
895 
printSequencesAddedInfo()896 void Alignment::printSequencesAddedInfo()
897 {
898     if(userParameters->getDisplayInfo())
899     {
900         int startSeq = userParameters->getProfile2Empty() ? 1:
901                        profile1NumSeqs + 1;
902         string dnaFlag = userParameters->getDNAFlag() ? "bp" : "aa";
903 
904 
905         for(int i = startSeq; i <= numSeqs; i++)
906         {
907             cout << "Sequence " << i << ": "
908                  << std::left << setw(maxNames) << names.at(i)
909                  << std::right << setw(6) << getSequenceLength(i)
910                  << " " << dnaFlag << std::endl;
911         }
912     }
913 }
914 
debugPrintOutAlignInfo()915 void Alignment::debugPrintOutAlignInfo()
916 {
917     for(int i = 1; i <= numSeqs; i++)
918     {
919         cout << "seq-no=" << i << ": name="
920              << std::left << setw(maxNames) << names.at(i)
921              << " length="
922              << std::right << setw(6) << getSequenceLength(i)
923              << std::endl;
924     }
925 }
926 
getSequenceLength(int index)927 int Alignment::getSequenceLength(int index)
928 {
929     return seqArray.at(index).size() - 1;
930 }
931 
getLengthLongestSequence()932 int Alignment::getLengthLongestSequence()
933 {
934     int _lengthLongestSequence = 0;
935 
936     for(int i = 1; i <= numSeqs; i++)
937     {
938         if(getSeqLength(i) > _lengthLongestSequence)
939         {
940             _lengthLongestSequence = getSeqLength(i);
941         }
942     }
943     return _lengthLongestSequence;
944 }
945 
946 /**
947  * This function is used to find the length of the longest sequence in the range.
948  */
getLengthLongestSequence(int firstSeq,int lastSeq)949 int Alignment::getLengthLongestSequence(int firstSeq, int lastSeq)
950 {
951     int _lengthLongestSequence = 0;
952 
953     if(firstSeq >= 1 && lastSeq <= numSeqs)
954     {
955         for(int i = firstSeq; i <= lastSeq; i++)
956         {
957             if(getSeqLength(i) > _lengthLongestSequence)
958             {
959                 _lengthLongestSequence = getSeqLength(i);
960             }
961         }
962     }
963     return _lengthLongestSequence; // Will return 0 if cant check seqs
964 }
965 
getMaxNames()966 int Alignment::getMaxNames()
967 {
968     return maxNames;
969 }
970 
getSecStructName1()971 string Alignment::getSecStructName1()
972 {
973     return secStructName1;
974 }
975 
getSecStructName2()976 string Alignment::getSecStructName2()
977 {
978     return secStructName2;
979 }
980 
getGapPenaltyMask1()981 vector<char>* Alignment::getGapPenaltyMask1()
982 {
983     return &gapPenaltyMask1;
984 }
985 
getGapPenaltyMask2()986 vector<char>* Alignment::getGapPenaltyMask2()
987 {
988     return &gapPenaltyMask2;
989 }
990 
getSecStructMask1()991 vector<char>* Alignment::getSecStructMask1()
992 {
993     return &secStructMask1;
994 }
995 
getSecStructMask2()996 vector<char>* Alignment::getSecStructMask2()
997 {
998     return &secStructMask2;
999 }
1000 
1001 
getSecStructMask1Element(int index)1002 int Alignment::getSecStructMask1Element(int index)
1003 {
1004     if(index > 0 && index < (int)secStructMask1.size())
1005     {
1006         return secStructMask1[index];
1007     }
1008     else
1009     {
1010         throw VectorOutOfRange(string("secStructMask1"), index, secStructMask1.size() - 1);
1011     }
1012 }
1013 
getSecStructMask2Element(int index)1014 int Alignment::getSecStructMask2Element(int index)
1015 {
1016     if(index > 0 && index < (int)secStructMask2.size())
1017     {
1018         return secStructMask2[index];
1019     }
1020     else
1021     {
1022         throw VectorOutOfRange(string("secStructMask2"), index, secStructMask2.size() - 1);
1023     }
1024 }
1025 
getGapPenaltyMask1Element(int index)1026 int Alignment::getGapPenaltyMask1Element(int index)
1027 {
1028     if(index > 0 && index < (int)gapPenaltyMask1.size())
1029     {
1030         return gapPenaltyMask1[index];
1031     }
1032     else
1033     {
1034         throw VectorOutOfRange(string("gapPenaltyMask1"), index, gapPenaltyMask1.size() - 1);
1035     }
1036 }
1037 
getGapPenaltyMask2Element(int index)1038 int Alignment::getGapPenaltyMask2Element(int index)
1039 {
1040     if(index > 0 && index < (int)gapPenaltyMask2.size())
1041     {
1042         return gapPenaltyMask2[index];
1043     }
1044     else
1045     {
1046         throw VectorOutOfRange(string("gapPenaltyMask2"), index, gapPenaltyMask2.size() - 1);
1047     }
1048 }
1049 
getName(int index)1050 string Alignment::getName(int index)
1051 {
1052     if(index > 0 && index < (int)names.size())
1053     {
1054         return names[index];
1055     }
1056     else
1057     {
1058         throw VectorOutOfRange(string("names"), index, names.size() - 1);
1059     }
1060 }
1061 
1062 /**
1063  * Change:
1064  * Mark 25-1-2007: This function was added to allow access to the unique id.
1065  */
getUniqueId(int seq)1066 unsigned long Alignment::getUniqueId(int seq)
1067 {
1068     if(seq > 0 && seq < (int)sequenceIds.size())
1069     {
1070         return sequenceIds[seq];
1071     }
1072     else
1073     {
1074         throw VectorOutOfRange(string("sequenceIds"), seq, sequenceIds.size() - 1);
1075     }
1076 }
1077 
getTitle(int index)1078 string Alignment::getTitle(int index)
1079 {
1080     if(index > 0 && index < (int)titles.size())
1081     {
1082         return titles[index];
1083     }
1084     else
1085     {
1086         throw VectorOutOfRange(string("titles"), index, titles.size() - 1);
1087     }
1088 }
1089 
getOutputIndex(int index)1090 int Alignment::getOutputIndex(int index)
1091 {
1092     if(index >= 0 && index < (int)outputIndex.size())
1093     {
1094         return outputIndex[index];
1095     }
1096     else
1097     {
1098         throw VectorOutOfRange(string("outputIndex"), index, outputIndex.size() - 1);
1099     }
1100 }
1101 
getSeqWeight(int index) const1102 int Alignment::getSeqWeight(int index) const
1103 {
1104     if(index >= 0 && index < (int)seqWeight.size())
1105     {
1106         return seqWeight[index];
1107     }
1108     else
1109     {
1110         throw VectorOutOfRange(string("seqWeight"), index, seqWeight.size() - 1);
1111     }
1112 }
1113 
1114 
1115 /**
1116  * This function is used by Qt. It is used to calculate the histogram values for the
1117  * widget in clustalQt. The function is in here because it needs access to the SubMatrix
1118  * class.
1119  * @param matNum The number of the matrix to be used in the comparison.
1120  * @return A pointer to a vector containing the histogram values.
1121  */
QTcalcHistColumnHeights(int firstSeq,int nSeqs,Array2D<int> * exceptionalRes)1122 vector<int>* Alignment::QTcalcHistColumnHeights(int firstSeq, int nSeqs,
1123                                               Array2D<int>* exceptionalRes)
1124 {
1125     int n, i, s, p, r, r1;
1126     int numColumns = getLengthLongestSequence();
1127     int scoreScale = userParameters->getQTScorePlotScale();//5; // From ClustalX.
1128     int scoreCutOff = userParameters->getQTResExceptionCutOff();
1129     bool includeGaps = false;
1130     //short  *mat_xref, *matptr;
1131     float median, mean;
1132     float t, q1, q3, ul;
1133     vector<float> seqdist, sorteddist;
1134     float diff;
1135     vector<int> seqvector;
1136     vector<int> freq;
1137     vector<vector<int> > profile;
1138     int matrix[NUMRES][NUMRES];
1139     int _maxAA = userParameters->getMaxAA();
1140     int _gapPos1 = userParameters->getGapPos1();
1141     histogramColumnHeights.resize(numColumns);
1142     //panel_data data1;
1143     subMatrix->getQTMatrixForHistogram(matrix);
1144 
1145     profile.resize(numColumns + 2, vector<int>(_maxAA + 2));
1146     freq.resize(_maxAA + 2);
1147 
1148     for(p = 0; p < numColumns; p++)
1149     {
1150         for(r = 0; r < _maxAA; r++)
1151         {
1152             freq[r] = 0;
1153         }
1154         for(s = firstSeq; s < firstSeq + nSeqs; s++)
1155         {
1156             if(p < getSeqLength(s + 1) && seqArray[s + 1][p + 1] >= 0 &&
1157                  seqArray[s + 1][p + 1] < _maxAA)
1158             {
1159                 freq[seqArray[s + 1][p + 1]]++;
1160             }
1161         }
1162         for(r = 0; r < _maxAA; r++)
1163         {
1164             profile[p][r] = 0;
1165             for(r1 = 0; r1 < _maxAA; r1++)
1166             {
1167                 profile[p][r] += freq[r1] * matrix[r1][r];
1168             }
1169             profile[p][r] = static_cast<int>(profile[p][r] /
1170                              static_cast<float>(nSeqs)); // Mark change 17-5-07
1171         }
1172     }
1173 
1174     seqvector.resize(_maxAA + 2);
1175     seqdist.resize(nSeqs + 1);
1176     sorteddist.resize(nSeqs + 1);
1177 
1178     for(p = 0; p < numColumns; p++)
1179     {
1180         for(s = firstSeq; s < firstSeq + nSeqs; s++)
1181         {
1182             if (p < getSeqLength(s + 1))
1183             {
1184                 for (r = 0; r < _maxAA; r++)
1185                 {
1186                     seqvector[r] = matrix[r][seqArray[s + 1][p + 1]];
1187                 }
1188             }
1189             else
1190             {
1191                 for (r = 0; r < _maxAA; r++)
1192                 {
1193                     seqvector[r] = matrix[r][_gapPos1];
1194                 }
1195             }
1196             seqdist[s - firstSeq] = 0.0;
1197             for(r = 0; r < _maxAA; r++)
1198             {
1199                 diff = profile[p][r] - seqvector[r];
1200                 diff /= 1000.0;
1201                 seqdist[s - firstSeq] += diff * diff;
1202             }
1203             seqdist[s - firstSeq] = sqrt((double)seqdist[s - firstSeq]);
1204         }
1205 
1206         // calculate mean,median and rms of seq distances
1207         mean = median = 0.0;
1208         if(includeGaps)
1209         {
1210             for(s = 0; s < nSeqs; s++)
1211             {
1212                 mean += seqdist[s];
1213             }
1214             mean /= nSeqs;
1215             n = nSeqs;
1216             for(s = 0; s < nSeqs; s++)
1217             {
1218                 sorteddist[s] = seqdist[s];
1219             }
1220         }
1221         else
1222         {
1223             n = 0;
1224             for(s = firstSeq; s < firstSeq + nSeqs; s++)
1225             {
1226                 if(p < getSeqLength(s + 1) && seqArray[s + 1][p + 1] >= 0 &&
1227                    seqArray[s + 1][p + 1] < _maxAA)
1228                 {
1229                     mean += seqdist[s - firstSeq];
1230                     n++;
1231                 }
1232             }
1233             if(n > 0)
1234             {
1235                 mean /= n;
1236             }
1237             for(s = firstSeq, i = 0; s < firstSeq + nSeqs; s++)
1238             {
1239                 if(p < getSeqLength(s + 1) && seqArray[s + 1][p + 1] >= 0 &&
1240                    seqArray[s + 1][p + 1] < _maxAA)
1241                 {
1242                     sorteddist[i++] = seqdist[s - firstSeq];
1243                 }
1244             }
1245         }
1246         sortScores(&sorteddist, 0, n - 1);
1247 
1248 
1249         if(n == 0)
1250         {
1251             median = 0;
1252         }
1253         else if(n % 2 == 0)
1254         {
1255             median = (sorteddist[n / 2 - 1] + sorteddist[n / 2]) / 2.0;
1256         }
1257         else
1258         {
1259             median = sorteddist[n / 2];
1260         }
1261 
1262         if(scoreScale <= 5)
1263         {
1264             histogramColumnHeights[p] = static_cast<int>(exp((double)(-mean *
1265                                         (6 - scoreScale) / 4.0)) * 100.0 * n / nSeqs);
1266         }
1267         else
1268         {
1269             histogramColumnHeights[p] = static_cast<int>(exp((double)(-mean /
1270                                         (4.0 * (scoreScale - 4)))) * 100.0 * n / nSeqs);
1271         }
1272 
1273         if(n == 0)
1274         {
1275             ul = 0;
1276         }
1277         else
1278         {
1279             t = n/4.0 + 0.5;
1280             if(t - (int)t == 0.5)
1281             {
1282                 q3 = (sorteddist[(int)t] + sorteddist[(int)t + 1]) / 2.0;
1283                 q1 = (sorteddist[n-(int)t] + sorteddist[n - (int)t - 1]) / 2.0;
1284             }
1285             else if(t - (int)t > 0.5)
1286             {
1287                 q3 = sorteddist[(int)t + 1];
1288                 q1 = sorteddist[n - (int)t - 1];
1289             }
1290             else
1291             {
1292                 q3 = sorteddist[(int)t];
1293                 q1 = sorteddist[n - (int)t];
1294             }
1295             if (n < 4)
1296             {
1297                 ul = sorteddist[0];
1298             }
1299             else
1300             {
1301                 ul = q3 + (q3 - q1) * ((float)scoreCutOff / 2.0);
1302             }
1303         }
1304 
1305         if((exceptionalRes->getRowSize() >= nSeqs) &&
1306             exceptionalRes->getColSize() >= numColumns)
1307         {
1308             for(int s = firstSeq; s < firstSeq + nSeqs; s++)
1309             {
1310                 if(seqdist[s - firstSeq] > ul && p < getSeqLength(s + 1) &&
1311                    seqArray[s + 1][p + 1] >= 0 &&
1312                    seqArray[s + 1][p + 1] < userParameters->getMaxAA())
1313                 {
1314                     (*exceptionalRes)[s - firstSeq][p] = 1;
1315                 }
1316                 else
1317                 {
1318                     (*exceptionalRes)[s - firstSeq][p] = 0;
1319                 }
1320             }
1321         }
1322     }
1323     return &histogramColumnHeights;
1324 }
1325 
sortScores(vector<float> * scores,int f,int l)1326 void Alignment::sortScores(vector<float>* scores, int f, int l)
1327 {
1328     int i,last;
1329 
1330     if(f >= l)
1331         return;
1332 
1333     swap(scores, f, (f + l) / 2);
1334     last = f;
1335     for(i = f + 1; i <= l; i++)
1336     {
1337         if((*scores)[i] > (*scores)[f])
1338         {
1339             swap(scores, ++last, i);
1340         }
1341     }
1342     swap(scores, f, last);
1343     sortScores(scores, f, last - 1);
1344     sortScores(scores, last + 1, l);
1345 }
1346 
swap(vector<float> * scores,int s1,int s2)1347 void Alignment::swap(vector<float>* scores, int s1, int s2)
1348 {
1349     float temp;
1350 
1351     temp = (*scores)[s1];
1352     (*scores)[s1] = (*scores)[s2];
1353     (*scores)[s2] = temp;
1354 }
1355 
searchForString(bool * found,int seq,int beginRes,string search)1356 int Alignment::searchForString(bool* found, int seq, int beginRes, string search)
1357 {
1358     int lengthSeq = getSeqLength(seq);
1359     if(beginRes > lengthSeq)
1360     {
1361         *found = false;
1362         return beginRes;
1363     }
1364 
1365     int res = beginRes;
1366 
1367     // First need to convert search into a vector of ints!
1368     vector<int> codedSearch;
1369     int size = search.size();
1370     codedSearch.resize(size);
1371     int code;
1372     for(int i = 0; i < size; i++)
1373     {
1374         code = userParameters->resIndex(userParameters->getAminoAcidCodes(), search[i]);
1375         codedSearch[i] = code;
1376     }
1377 
1378     int numSame = 0;
1379     int startPos = -1;
1380     int searchSize = codedSearch.size();
1381     // Now check for the string of ints!!!!
1382     for(; res <= lengthSeq; res++) //- nige: res starts at 1
1383 
1384     {
1385         if(seqArray[seq][res] == codedSearch[0])
1386         {
1387             startPos = res; //- nige
1388             for(int i = 0; i < searchSize && (i + res) <= lengthSeq; i++) //- nige: res starts at 1
1389             {
1390                 if(seqArray[seq][res + i] == codedSearch[i])
1391                 {
1392                     numSame++;
1393                 }
1394                 //nige: hack: encoded gap character: see also AlignmentScroll.cpp
1395                 else if (seqArray[seq][res + i] == 31 || seqArray[seq][res + i] == 30)
1396                 {
1397                     res++;
1398                     i--;
1399                 }
1400                 else
1401                 {
1402                     break; // Not the same
1403                 }
1404             }
1405             if(numSame == searchSize)
1406             {
1407                 *found = true;
1408                 return startPos;
1409             }
1410             else
1411             {
1412                 numSame = 0;
1413             }
1414         }
1415     }
1416     *found = false;
1417     return startPos; // Not found!!!!!!!
1418 }
1419 
removeGapsFromSelectedSeqs(vector<int> * selected)1420 void Alignment::removeGapsFromSelectedSeqs(vector<int>* selected)
1421 {
1422     //getNumSeqs()
1423     int size = selected->size();
1424     int lengthOfSelectedSeq = 0;
1425     int gapPos1 = userParameters->getGapPos1();
1426     int gapPos2 = userParameters->getGapPos2();
1427     int s1;
1428 
1429     for(int i = 1; i <= getNumSeqs() && i < size; i++)
1430     {
1431         if((*selected)[i] == 1)
1432         {
1433             // remove gaps from this seq!
1434             lengthOfSelectedSeq = getSeqLength(i);
1435             s1 = 0;
1436             for(int j = 1; j <= lengthOfSelectedSeq; j++)
1437             {
1438                 if(seqArray[i][j] == gapPos1 || seqArray[i][j] == gapPos2)
1439                 {
1440                     continue;
1441                 }
1442                 ++s1;
1443                 seqArray[i][s1] = seqArray[i][j];
1444             }
1445             // Then remove the excess bit at the end of the array
1446             int numExtraElements = lengthOfSelectedSeq - s1;
1447 
1448             if((int)seqArray[i].size() > numExtraElements)
1449             {
1450                 for(int k = 0; k < numExtraElements; k++)
1451                 {
1452                     seqArray[i].pop_back();
1453                 }
1454             }
1455         }
1456     }
1457 
1458 }
1459 
removeGapOnlyColsFromSelectedSeqs(vector<int> * selected)1460 void Alignment::removeGapOnlyColsFromSelectedSeqs(vector<int>* selected)
1461 {
1462     int numGaps = 0;
1463     int NoneSelected = -1;
1464     int numColumns = 0;
1465     int sizeSelected = selected->size();
1466     int firstSeqSelected = NoneSelected;
1467     int gapPos1 = userParameters->getGapPos1();
1468     int gapPos2 = userParameters->getGapPos2();
1469     int k;
1470 
1471     for(int i = 1; i < sizeSelected; i++)
1472     {
1473         if((*selected)[i] == 1)
1474         {
1475             numColumns++;
1476             if(firstSeqSelected == -1)
1477             {
1478                 firstSeqSelected = i;
1479             }
1480         }
1481     }
1482 
1483     if(firstSeqSelected == NoneSelected)
1484     {
1485         cout << "No Sequences have been selected\n";
1486         return;
1487     }
1488 
1489     for(int i = 1; i <= getSeqLength(firstSeqSelected);)
1490     {
1491         numGaps = 0;
1492         for(int j = firstSeqSelected; j < sizeSelected && (*selected)[j] == 1; j++)
1493         {
1494             if(getSeqLength(j) >= i)
1495             {
1496                 if(seqArray[j][i] == gapPos1 || seqArray[j][i] == gapPos2)
1497                 {
1498                     numGaps++;
1499                 }
1500             }
1501         }
1502         if(numGaps == numColumns)
1503         {
1504             //cout << "                removing a gap column\n\n";
1505             for(int j = firstSeqSelected; j < sizeSelected && (*selected)[j] == 1; j++)
1506             {
1507                 for(k = i + 1; k <= getSeqLength(j) + 1 && (int)seqArray[j].size() > k; k++)
1508                 {
1509                     seqArray[j][k - 1] = seqArray[j][k];
1510                 }
1511                 seqArray[j].pop_back(); // Remove the last element!!
1512 
1513                 if(getSeqLength(firstSeqSelected) <= 0)
1514                 {
1515                     break;
1516                 }
1517             }
1518         }
1519         else
1520         {
1521             i++;
1522         }
1523     }
1524 
1525 }
1526 
removeAllGapOnlyColumns(int fSeq,int lSeq,int profileNum)1527 void Alignment::removeAllGapOnlyColumns(int fSeq, int lSeq, int profileNum)
1528 {
1529     if(fSeq >= lSeq)
1530     {
1531         return;
1532     }
1533     int gapPos1 = userParameters->getGapPos1();
1534     int gapPos2 = userParameters->getGapPos2();
1535 
1536     int numGaps = 0;
1537     int numColumns = lSeq - fSeq + 1;
1538     int k;
1539     // We must cheack each column to see if it consists of only '-'
1540     for(int i = 1; i <= getSeqLength(fSeq);)
1541     {
1542         numGaps = 0;
1543         for(int j = fSeq; j <= lSeq; j++)
1544         {
1545             if(getSeqLength(j) >= i)
1546             {
1547                 if(seqArray[j][i] == gapPos1 || seqArray[j][i] == gapPos2)
1548                 {
1549                     numGaps++;
1550                 }
1551             }
1552         }
1553         if(numGaps == numColumns)
1554         {
1555             for(int j = fSeq; j <= lSeq; j++)
1556             {
1557                 for(k = i + 1; k <= getSeqLength(j) + 1; k++)
1558                 {
1559                     seqArray[j][k - 1] = seqArray[j][k];
1560                 }
1561                 seqArray[j].pop_back(); // Remove the last element!!
1562 
1563                 if(profileNum == 1)
1564                 {
1565                     int lengthSecStruct = secStructMask1.size();
1566                     int lengthGapMask = gapPenaltyMask1.size();
1567 
1568                     for(k = i; k <= getSeqLength(fSeq) && k < lengthSecStruct; k++)
1569                     {
1570                         secStructMask1[k - 1] = secStructMask1[k];
1571                     }
1572                     for(k = i; k <= getSeqLength(fSeq) && k < lengthGapMask; k++)
1573                     {
1574                         gapPenaltyMask1[k - 1] = gapPenaltyMask1[k];
1575                     }
1576                 }
1577 
1578                 if(profileNum == 2)
1579                 {
1580                     int lengthSecStruct = secStructMask2.size();
1581                     int lengthGapMask = gapPenaltyMask2.size();
1582 
1583                     for(k = i; k <= getSeqLength(fSeq) && k < lengthSecStruct; k++)
1584                     {
1585                         secStructMask2[k - 1] = secStructMask2[k];
1586                     }
1587                     for(k = i; k <= getSeqLength(fSeq) && k < lengthGapMask; k++)
1588                     {
1589                         gapPenaltyMask2[k - 1] = gapPenaltyMask2[k];
1590                     }
1591                 }
1592                 if(getSeqLength(fSeq) <= 0)
1593                 {
1594                     break;
1595                 }
1596             }
1597         }
1598         else
1599         {
1600             i++;
1601         }
1602     }
1603 }
1604 
cutSelectedSequencesFromAlignment(vector<int> * selected)1605 vector<Sequence> Alignment::cutSelectedSequencesFromAlignment(vector<int>* selected)
1606 {
1607     vector<Sequence> cutSequences;
1608     int sizeOfSelected = selected->size();
1609     SeqArray::iterator seqArrayIterator;
1610     vector<string>::iterator namesIterator;
1611     vector<string>::iterator titlesIterator;
1612     vector<int>::iterator seqWeightIterator;
1613     vector<unsigned long>::iterator sequenceIdsIterator;
1614 
1615     int newProfile1NumSeqs = profile1NumSeqs;
1616     int profNum = userParameters->getProfileNum();
1617     int prof1NumSeqs = profile1NumSeqs;
1618     int numCutSoFar = 0;
1619     int intialNumSeqs = numSeqs;
1620 
1621     for(int i = 1; i < sizeOfSelected && i <= intialNumSeqs; i++)
1622     {
1623         if((*selected)[i] == 1)
1624         {
1625             // Cut the sequence from the alignment!
1626             seqArrayIterator = seqArray.begin() + i - numCutSoFar;
1627             namesIterator = names.begin() + i - numCutSoFar;
1628             titlesIterator = titles.begin() + i - numCutSoFar;
1629             seqWeightIterator = seqWeight.begin() + i - numCutSoFar;
1630             sequenceIdsIterator = sequenceIds.begin() + i - numCutSoFar;
1631             Sequence SeqToCut(&seqArray[i - numCutSoFar], *namesIterator, *titlesIterator,
1632                               *sequenceIdsIterator);
1633 
1634             numCutSoFar++;
1635 
1636             seqArrayIterator->clear();
1637             seqArray.erase(seqArrayIterator);
1638             names.erase(namesIterator);
1639             titles.erase(titlesIterator);
1640             seqWeight.erase(seqWeightIterator);
1641             sequenceIds.erase(sequenceIdsIterator);
1642 
1643             if(numSeqs > 0)
1644             {
1645                 numSeqs--;
1646             }
1647             else
1648             {
1649                 numSeqs = 0;
1650                 break;
1651             }
1652             if(profNum > 0 && i <= prof1NumSeqs)
1653             {
1654                 if(newProfile1NumSeqs > 0)
1655                 {
1656                     newProfile1NumSeqs--;
1657                 }
1658                 else
1659                 {
1660                     newProfile1NumSeqs = 0;
1661                 }
1662             }
1663             cutSequences.push_back(SeqToCut);
1664         }
1665     }
1666     profile1NumSeqs = newProfile1NumSeqs;
1667     setDefaultOutputIndex();
1668     resetAllSeqWeights();
1669     return cutSequences;
1670 }
1671 
setDefaultOutputIndex()1672 void Alignment::setDefaultOutputIndex()
1673 {
1674     outputIndex.clear();
1675     outputIndex.resize(numSeqs);
1676     for(int iseq = 1; iseq <= numSeqs; iseq++)
1677     {
1678         outputIndex[iseq - 1] = iseq;
1679     }
1680 }
1681 
removeAllOutsideRange(int beginPos,int endPos)1682 bool Alignment::removeAllOutsideRange(int beginPos, int endPos)
1683 {
1684     bool ok;
1685     if(beginPos < 0 || endPos > getLengthLongestSequence())
1686     {
1687         return false; // cannot do it!!!!
1688     }
1689 
1690     // trim the seqArray
1691     ok = keepPortionOfSeqArray(beginPos, endPos);
1692     if(!ok)
1693     {
1694         cerr << "There was a problem removing a portion of the array\n";
1695         return false;
1696     }
1697 
1698     // recalculate the maxLengths
1699     calculateMaxLengths();
1700 
1701     // Clear the histogram columns
1702     histogramColumnHeights.clear();
1703 
1704     // reset the weights
1705     resetAllSeqWeights();
1706 	return true;
1707 }
1708 
keepPortionOfSeqArray(int beginRangeIndex,int endRangeIndex)1709 bool Alignment::keepPortionOfSeqArray(int beginRangeIndex, int endRangeIndex)
1710 {
1711     SeqArray sectionToRealign;
1712     vector<int> emptyVec;
1713     sectionToRealign.push_back(emptyVec); // EMPTY sequence
1714 
1715     SeqArray::iterator posToAddTo = sectionToRealign.begin();
1716     // erase from all sequences the range specified here!!!!!
1717     if(beginRangeIndex < 0 || endRangeIndex < 0)
1718     {
1719         return false;
1720     }
1721 
1722     SeqArray::iterator mainBeginIt = seqArray.begin() + 1;
1723     SeqArray::iterator mainEndIt = seqArray.end();
1724 
1725     vector<int>::iterator begin, end, beginRange, endRange, beginCopyRange, endCopyRange;
1726 
1727     for(; mainBeginIt != mainEndIt; mainBeginIt++)
1728     {
1729         vector<int> vecToAdd;
1730         begin = mainBeginIt->begin() + 1;
1731         end = mainBeginIt->end();
1732         beginRange = begin + beginRangeIndex;
1733         endRange = begin + endRangeIndex + 1;
1734         beginCopyRange = beginRange;
1735         endCopyRange = endRange;
1736 
1737         // We need to copy all of this into another vector.
1738         if(endCopyRange < end && beginCopyRange < end)
1739         {
1740             vecToAdd.push_back(0);
1741             for(; beginCopyRange != endCopyRange; beginCopyRange++)
1742             {
1743                 vecToAdd.push_back(*beginCopyRange);
1744             }
1745             sectionToRealign.push_back(vecToAdd);
1746         }
1747         else
1748         {
1749             return false;
1750         }
1751 
1752         if(endRange < end && beginRange < end)
1753         {
1754             mainBeginIt->erase(beginRange, endRange);
1755         }
1756         else
1757         {
1758             return false;
1759         }
1760     }
1761     clearSeqArray();
1762     seqArray = sectionToRealign;
1763     return true;
1764 }
1765 
debugPrintSeqArray(SeqArray * arrayToPrint)1766 void Alignment::debugPrintSeqArray(SeqArray* arrayToPrint)
1767 {
1768     // I need to use iterators for everything here.
1769     SeqArray::iterator mainBeginIt = arrayToPrint->begin();
1770     SeqArray::iterator mainEndIt = arrayToPrint->end();
1771     vector<int>::iterator begin, end;
1772     string aaCodes = userParameters->getAminoAcidCodes();
1773 
1774     for(; mainBeginIt != mainEndIt; mainBeginIt++)
1775     {
1776         if(mainBeginIt->size() > 0)
1777         {
1778             begin = mainBeginIt->begin() + 1;
1779             end = mainBeginIt->end();
1780             for(; begin != end; begin++)
1781             {
1782                 if(*begin < (int)aaCodes.size())
1783                 {
1784                     cout << aaCodes[*begin];
1785                 }
1786                 else
1787                 {
1788                     cout << "-";
1789                 }
1790             }
1791             cout << "\n";
1792         }
1793     }
1794 }
1795 
debugPrintProfile1()1796 void Alignment::debugPrintProfile1()
1797 {
1798     cout << "************** PROFILE1 *********************\n";
1799     SeqArray::iterator mainBeginIt = seqArray.begin() + 1;
1800     SeqArray::iterator mainEndIt = mainBeginIt + profile1NumSeqs;
1801     vector<int>::iterator begin, end;
1802     string aaCodes = userParameters->getAminoAcidCodes();
1803 
1804     for(; mainBeginIt != mainEndIt; mainBeginIt++)
1805     {
1806         cout << "PROFILE1 SEQ: ";
1807         if(mainBeginIt->size() > 0)
1808         {
1809             begin = mainBeginIt->begin() + 1;
1810             end = mainBeginIt->end();
1811             for(; begin != end; begin++)
1812             {
1813                 if(*begin < (int)aaCodes.size())
1814                 {
1815                     cout << aaCodes[*begin];
1816                 }
1817                 else
1818                 {
1819                     cout << "-";
1820                 }
1821             }
1822             cout << "\n";
1823         }
1824     }
1825 }
1826 
debugPrintProfile2()1827 void Alignment::debugPrintProfile2()
1828 {
1829     cout << "************** PROFILE2 *********************\n";
1830     SeqArray::iterator mainBeginIt = seqArray.begin() + 1 + profile1NumSeqs;
1831     SeqArray::iterator mainEndIt = seqArray.end();
1832     vector<int>::iterator begin, end;
1833     string aaCodes = userParameters->getAminoAcidCodes();
1834 
1835     for(; mainBeginIt != mainEndIt; mainBeginIt++)
1836     {
1837         cout << "PROFILE2 SEQ: ";
1838         if(mainBeginIt->size() > 0)
1839         {
1840             begin = mainBeginIt->begin() + 1;
1841             end = mainBeginIt->end();
1842             for(; begin != end; begin++)
1843             {
1844                 if(*begin < (int)aaCodes.size())
1845                 {
1846                     cout << aaCodes[*begin];
1847                 }
1848                 else
1849                 {
1850                     cout << "-";
1851                 }
1852             }
1853             cout << "\n";
1854         }
1855     }
1856 }
1857 
updateRealignedRange(SeqArray realignedSeqs,int beginPos,int endPos)1858 bool Alignment::updateRealignedRange(SeqArray realignedSeqs, int beginPos, int endPos)
1859 {
1860     if(realignedSeqs.size() != seqArray.size())
1861     {
1862         return false;
1863     }
1864     if(beginPos < 0 || endPos < 0)
1865     {
1866         return false;
1867     }
1868 
1869     // erase from all sequences the range specified here!!!!!
1870 
1871     SeqArray::iterator mainBeginIt = seqArray.begin() + 1;
1872     SeqArray::iterator mainEndIt = seqArray.end();
1873 
1874     SeqArray::iterator pasteBeginIt = realignedSeqs.begin() + 1;
1875     SeqArray::iterator pasteEndIt = realignedSeqs.end();
1876 
1877     vector<int>::iterator begin, end, beginRange, endRange;
1878 
1879     for(; mainBeginIt != mainEndIt && pasteBeginIt != pasteEndIt; mainBeginIt++)
1880     {
1881         vector<int> vecToAdd;
1882         begin = mainBeginIt->begin() + 1;
1883         end = mainBeginIt->end();
1884         beginRange = begin + beginPos;
1885         endRange = begin + endPos + 1;
1886 
1887         if(endRange < end && beginRange < end)
1888         {
1889             mainBeginIt->erase(beginRange, endRange);
1890             mainBeginIt->insert(beginRange, pasteBeginIt->begin() + 1,
1891                                 pasteBeginIt->end());
1892         }
1893         else
1894         {
1895             return false;
1896         }
1897         pasteBeginIt++;
1898     }
1899     return true;
1900 }
1901 
reloadAlignment()1902 bool Alignment::reloadAlignment()
1903 {
1904     if(getNumSeqs() <= 0)
1905     {
1906         return false;
1907     }
1908     if(userParameters->getOutputOrder() == INPUT)
1909     {
1910         return true;
1911     }
1912     if((int)outputIndex.size() != getNumSeqs())
1913     {
1914         outputIndex.clear();
1915         return false;
1916     }
1917     vector<int> emptyVec;
1918     string emptyString = "";
1919     SeqArray outputOrderSeqArray;
1920     outputOrderSeqArray.resize(getNumSeqs() + 1);
1921     outputOrderSeqArray[0] = emptyVec;
1922     vector<string> outputOrderNames;
1923     outputOrderNames.resize(getNumSeqs() + 1);
1924     outputOrderNames[0] = emptyString;
1925     vector<string> outputOrderTitles;
1926     outputOrderTitles.resize(getNumSeqs() + 1);
1927     outputOrderTitles[0] = emptyString;
1928     vector<unsigned long> outputOrderSequenceIds;
1929     outputOrderSequenceIds.resize(getNumSeqs() + 1);
1930     outputOrderSequenceIds[0] = 0;
1931 
1932     int size = seqArray.size();
1933     if((seqArray.size() != names.size()) || (seqArray.size() != titles.size()) ||
1934         sequenceIds.size() != names.size())
1935     {
1936         return false;
1937     }
1938 
1939     int _outIndex;
1940     // Now for each seq,
1941     for(int i = 1; i < size; i++)
1942     {
1943         if(i < (int)outputOrderSeqArray.size() && i - 1 < (int)outputIndex.size() &&
1944            outputIndex[i - 1] < size)
1945         {
1946             _outIndex = outputIndex[i - 1];
1947             outputOrderSeqArray[i] = seqArray[_outIndex];
1948             outputOrderNames[i] = names[_outIndex];
1949             outputOrderTitles[i] = titles[_outIndex];
1950             outputOrderSequenceIds[i] = sequenceIds[_outIndex];
1951         }
1952         else
1953         {
1954             return false;
1955         }
1956     }
1957 
1958     // Now we have a copy in the correct order.
1959     // Remove all the elements from the old ones and set them to be these arrays
1960     clearSeqArray();
1961     seqArray = outputOrderSeqArray;
1962     names.clear();
1963     names = outputOrderNames;
1964     titles.clear();
1965     titles = outputOrderTitles;
1966     sequenceIds.clear();
1967     sequenceIds = outputOrderSequenceIds;
1968 
1969     return true;
1970 }
1971 
getSequenceFromUniqueId(unsigned long id)1972 const vector<int>* Alignment::getSequenceFromUniqueId(unsigned long id)
1973 {
1974     for(int i = 0; i < (int)sequenceIds.size(); i++)
1975     {
1976         if(sequenceIds[i] == id)
1977         {
1978             return getSequence(i);
1979         }
1980     }
1981 
1982     // We have not found it, throw an exception!!!
1983     throw SequenceNotFoundException();
1984 }
1985 
1986 /**
1987  * Change:
1988  * Mark 26-1-2007: This function was added to allow access to the unique id.
1989  */
updateSequence(int index,const vector<int> * seq)1990 void Alignment::updateSequence(int index, const vector<int>* seq)
1991 {
1992     if(index >= 1 && index < (int)seqArray.size())
1993     {
1994         seqArray[index] = *seq;
1995     }
1996 }
1997 
1998 /**
1999  * Change:
2000  * Mark 17-2-2007: This function was added to check if a res is a gap or not.
2001  */
isGap(int seq,int col) const2002 bool Alignment::isGap(int seq, int col) const
2003 {
2004     int res = seqArray[seq][col];
2005     if(res == gapPos1 || res == gapPos2)
2006     {
2007         return true;
2008     }
2009     else
2010     {
2011         return false;
2012     }
2013 }
2014 /**
2015  * This function will be used so that we can create an alignment object from a seqArray.
2016  * This will be used for the tree iteration.
2017  */
addSequences(SeqArray * seqVector)2018 void Alignment::addSequences(SeqArray* seqVector)
2019 {
2020     clearAlignment();
2021     numSeqs = seqVector->size() - 1;
2022     vector<int> emptyVec;
2023 
2024     seqArray.push_back(emptyVec); // EMPTY sequence
2025     names.push_back(string(""));
2026     titles.push_back(string(""));
2027     sequenceIds.push_back(0);
2028     cout << "\nThere are " << numSeqs << " in the alignment obj\n";
2029     for(int i = 1; i <= numSeqs; i++)
2030     {
2031         ostringstream name;
2032         seqArray.push_back((*seqVector)[i]);
2033         titles.push_back(string(""));
2034         sequenceIds.push_back(utilityObject->getUniqueSequenceIdentifier());
2035         name << "name" << numSeqs;
2036         names.push_back(name.str());
2037     }
2038 
2039     calculateMaxLengths();
2040     seqWeight.resize(numSeqs + 1, 100);
2041 }
2042 
2043 }
2044 
2045