1 /**
2 * Author: Mark Larkin
3 *
4 * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson.
5 */
6 /**
7 * Changes:
8 *
9 * 16-02-07,Nigel Brown(EMBL): Added friend NameIterator to allow a caller to
10 * process the name vector.
11 *
12 * 05-03-07,Nigel Brown(EMBL): modified searchForString() to skip over gaps in
13 * sequence while matching.
14 *
15 * 26-03-07,Nigel Brown(EMBL): suppressed error message box for name clashes;
16 * added testUniqueNames() predicate, which compares new sequence names with
17 * those in the alignment vector BEFORE appending them.
18 */
19 #ifdef HAVE_CONFIG_H
20 #include "config.h"
21 #endif
22 #include <exception>
23 #include <cmath>
24 #include <sstream>
25 #include "Alignment.h"
26
27 using namespace std;
28
29 namespace clustalw
30 {
31
Alignment()32 Alignment::Alignment()
33 : gapPos1(userParameters->getGapPos1()),
34 gapPos2(userParameters->getGapPos2())
35 {
36 maxNames = 0;
37 numSeqs = 0;
38 maxAlignmentLength = 0;
39 lengthLongestSequence = 0;
40 profile1NumSeqs = 0;
41 }
42
43 // Andreas Wilm (UCD): shouldn't resetProfile1 and resetProfile2 be merged?
44 /** remove gaps from older alignments (code = gap_pos1) */
resetProfile1()45 void Alignment::resetProfile1()
46 {
47 register int sl; /* which have code = gap_pos2 */
48 int i,j;
49 int _gapPos1 = userParameters->getGapPos1();
50 int _gapPos2 = userParameters->getGapPos2();
51 bool _resetAlignNew = userParameters->getResetAlignmentsNew();
52 bool _resetAlignAll = userParameters->getResetAlignmentsAll();
53
54 if (userParameters->getStructPenalties1() != NONE)
55 {
56 sl = 0;
57 for (j = 0; j < (int)gapPenaltyMask1.size(); ++j)
58 {
59 if (gapPenaltyMask1[j] == _gapPos1 && (_resetAlignNew ||
60 _resetAlignAll))
61 {
62 continue;
63 }
64 if (gapPenaltyMask1[j] == _gapPos2 && (_resetAlignAll))
65 {
66 continue;
67 }
68 gapPenaltyMask1[sl] = gapPenaltyMask1[j];
69 ++sl;
70 }
71 }
72
73 if (userParameters->getStructPenalties1() == SECST)
74 {
75 sl = 0;
76 for (j = 0; j < (int)secStructMask1.size(); ++j)
77 {
78 if (secStructMask1[j] == _gapPos1 && (_resetAlignNew ||
79 _resetAlignAll))
80 {
81 continue;
82 }
83 if (secStructMask1[j] == _gapPos2 && (_resetAlignAll))
84 {
85 continue;
86 }
87 secStructMask1[sl] = secStructMask1[j];
88 ++sl;
89 }
90 }
91
92 for(i = 1; i <= profile1NumSeqs; ++i)
93 {
94 sl = 0;
95 for(j = 1; j <= getSeqLength(i); ++j)
96 {
97 if(seqArray[i][j] == _gapPos1 && (_resetAlignNew || _resetAlignAll))
98 {
99 continue;
100 }
101 if(seqArray[i][j] == _gapPos2 && (_resetAlignAll))
102 {
103 continue;
104 }
105 ++sl;
106 seqArray[i][sl] = seqArray[i][j];
107 }
108
109 // Andreas Wilm (UCD): added 2008-03-07:
110 // Remove excess bit at end of sequence
111 int numExtraElements = seqArray[i].size() - 1 - sl;
112 for(int k = 0; k < numExtraElements; k++)
113 {
114 seqArray[i].pop_back();
115 }
116 }
117 }
118
119 // Andreas Wilm (UCD): shouldn't resetProfile1 and resetProfile2 be merged?
resetProfile2()120 void Alignment::resetProfile2()
121 {
122 register int sl; /* which have code = gap_pos2 */
123 int i, j;
124 int _gapPos1 = userParameters->getGapPos1();
125 int _gapPos2 = userParameters->getGapPos2();
126 bool _resetAlignNew = userParameters->getResetAlignmentsNew();
127 bool _resetAlignAll = userParameters->getResetAlignmentsAll();
128 int _profile1NumSeqs = profile1NumSeqs;
129
130
131 if (userParameters->getStructPenalties2() != NONE)
132 {
133 sl = 0;
134 for (j = 0; j < (int)gapPenaltyMask2.size(); ++j)
135 {
136 if (gapPenaltyMask2[j] == _gapPos1 && (_resetAlignNew || _resetAlignAll))
137 {
138 continue;
139 }
140 if (gapPenaltyMask2[j] == _gapPos2 && (_resetAlignAll))
141 {
142 continue;
143 }
144 gapPenaltyMask2[sl] = gapPenaltyMask2[j];
145 ++sl;
146 }
147 }
148
149 if (userParameters->getStructPenalties2() == SECST)
150 {
151 sl = 0;
152 for (j = 0; j < (int)secStructMask2.size(); ++j)
153 {
154 if (secStructMask2[j] == _gapPos1 && (_resetAlignNew ||
155 _resetAlignAll))
156 {
157 continue;
158 }
159 if (secStructMask2[j] == _gapPos2 && (_resetAlignAll))
160 {
161 continue;
162 }
163 secStructMask2[sl] = secStructMask2[j];
164 ++sl;
165 }
166 }
167
168 for(i = _profile1NumSeqs + 1; i <= numSeqs; ++i)
169 {
170 sl = 0;
171 for(j = 1; j <= getSeqLength(i); ++j)
172 {
173 if(seqArray[i][j] == _gapPos1 && (_resetAlignNew || _resetAlignAll))
174 {
175 continue;
176 }
177 if(seqArray[i][j] == _gapPos2 && (_resetAlignAll))
178 {
179 continue;
180 }
181 ++sl;
182 seqArray[i][sl] = seqArray[i][j];
183 }
184
185
186 // Andreas Wilm (UCD) added 2008-03-07:
187 // Remove excess bit at end of sequence
188 int numExtraElements = seqArray[i].size() - 1 - sl;
189 for(int k = 0; k < numExtraElements; k++)
190 {
191 seqArray[i].pop_back();
192 }
193 }
194
195 }
196
resetAllSeqWeights()197 void Alignment::resetAllSeqWeights()
198 {
199 seqWeight.clear();
200 seqWeight.resize(numSeqs + 1, 100);
201 }
202
203 /*
204 * The function addOutputIndex must check that the number of outputindex
205 * Not sure what to do with this one.
206 */
addOutputIndex(vector<int> * outputIndexToAdd)207 bool Alignment::addOutputIndex(vector<int>* outputIndexToAdd)
208 {
209 // First need to clear the old outputIndex
210 outputIndex.clear();
211 // Check if the size is correct
212 if((int)outputIndexToAdd->size() == numSeqs)
213 {
214 // Add the output index
215 outputIndex = *outputIndexToAdd;
216 return true;
217 }
218 else
219 {
220 clearAlignment();
221 return false; // Could not add them
222 }
223
224 }
225
226 /*
227 * The function appendOutputIndex is used when we are appending the outputIndex
228 * to the current one. We do this when we are adding a profile2.
229 */
appendOutputIndex(vector<int> * outputIndexToAppend)230 bool Alignment::appendOutputIndex(vector<int>* outputIndexToAppend)
231 {
232 // Check if the size is correct
233 if((int)(outputIndexToAppend->size() + outputIndex.size()) == numSeqs)
234 {
235 // Append the outputIndex
236 vector<int>::iterator outIndexIter = outputIndexToAppend->begin();
237 while(outIndexIter != outputIndexToAppend->end())
238 {
239 outputIndex.push_back(*outIndexIter);
240 outIndexIter++;
241 }
242 if((int)outputIndex.size() == numSeqs)
243 {
244 return true;
245 }
246 else
247 {
248 clearAlignment();
249 cerr << "There is a problem with adding the sequences\n";
250 return false;
251 }
252 }
253 else
254 {
255 clearAlignment();
256 return false;
257 }
258 }
259
260 /*
261 * The function addSequences takes a vector of sequences and adds it to the Alignment.
262 * This is used if there is nothing in memory that we need.
263 *
264 */
addSequences(vector<Sequence> * seqVector)265 void Alignment::addSequences(vector<Sequence>* seqVector)
266 {
267 clearAlignment();
268
269 numSeqs = seqVector->size();
270 vector<int> emptyVec;
271
272 // NOTE push dummy sequences on first!
273 /********************************************************
274 * It was decided to stay with the seqs and seqArray *
275 * index begining at 1. This is because it is difficult *
276 * to change in the code, so we push dummy seqs on *
277 ********************************************************/
278 seqArray.push_back(emptyVec); // EMPTY sequence
279 names.push_back(string(""));
280 titles.push_back(string(""));
281 sequenceIds.push_back(0);
282
283 addSequencesToVector(seqVector);
284
285 calculateMaxLengths();
286 seqWeight.resize(numSeqs + 1, 100);
287 }
288
289 /*
290 * The function appendSequences is used when we have a profile2.
291 *
292 */
appendSequences(vector<Sequence> * seqVector)293 void Alignment::appendSequences(vector<Sequence>* seqVector)
294 {
295 numSeqs += seqVector->size(); // Add to the number we already have!
296 addSequencesToVector(seqVector);
297 seqWeight.clear();
298 seqWeight.resize(numSeqs + 1, 100);
299 calculateMaxLengths();
300 }
301
pasteSequencesIntoPosition(vector<Sequence> * seqVector,int pos,bool explicitPasteToProfile2)302 void Alignment::pasteSequencesIntoPosition(vector<Sequence>* seqVector, int pos,
303 bool explicitPasteToProfile2)
304 {
305 SeqArray::iterator seqArrayIterator;
306 vector<string>::iterator namesIterator;
307 vector<string>::iterator titlesIterator;
308 vector<unsigned long>::iterator sequenceIdsIterator;
309
310 int profNum = userParameters->getProfileNum();
311 int numSeqsToInsert = seqVector->size();
312 if(numSeqsToInsert == 0 || pos < 0)
313 {
314 return;
315 }
316 if(pos == numSeqs)
317 {
318 seqArrayIterator = seqArray.end();
319 namesIterator = names.end();
320 titlesIterator = titles.end();
321 sequenceIdsIterator = sequenceIds.end();
322 }
323 else
324 {
325 seqArrayIterator = seqArray.begin() + pos + 1;
326 namesIterator = names.begin() + pos + 1;
327 titlesIterator = titles.begin() + pos + 1;
328 sequenceIdsIterator = sequenceIds.begin() + pos + 1;
329 }
330
331 int prof1NumSeqs = profile1NumSeqs;
332
333 for(int i = numSeqsToInsert - 1; i >= 0; i--)
334 {
335 seqArray.insert(seqArrayIterator, *(*seqVector)[i].getSequence());
336 names.insert(namesIterator, (*seqVector)[i].getName());
337 titles.insert(titlesIterator, (*seqVector)[i].getTitle());
338 sequenceIds.insert(sequenceIdsIterator, (*seqVector)[i].getIdentifier());
339
340 numSeqs++;
341 if(profNum != 0 && !explicitPasteToProfile2 && pos <= prof1NumSeqs)
342 {
343 prof1NumSeqs++;
344 }
345 }
346
347 if(profNum != 0 && pos <= prof1NumSeqs)
348 {
349 profile1NumSeqs = prof1NumSeqs;
350 }
351
352 resetAllSeqWeights();
353 setDefaultOutputIndex();
354 }
355
debugPrintAllNames()356 void Alignment::debugPrintAllNames()
357 {
358 vector<string>::iterator nameIter = names.begin();
359 while(nameIter != names.end())
360 {
361 cout << *nameIter << endl;
362 nameIter++;
363 }
364 }
365
begin(Alignment * a)366 void Alignment::NameIterator::begin(Alignment *a) {
367 //cout << "begin()\n";
368 if (a) {
369 alignment = a;
370 i = alignment->names.begin();
371 }
372 }
373
next()374 const string Alignment::NameIterator::next() {
375 //cout << "next()\n";
376 if (!alignment)
377 return string();
378 if (i == alignment->names.end())
379 return string();
380 return *i++;
381 }
382
end()383 bool Alignment::NameIterator::end() {
384 //cout << "end()\n";
385 if (!alignment)
386 return true;
387 if (i == alignment->names.end())
388 return true;
389 return false;
390 }
391
392 // 26-03-03,nige: test before appending seqVector
testUniqueNames(vector<Sequence> * seqVector,string * offendingSeq)393 bool Alignment::testUniqueNames(vector<Sequence>* seqVector, string *offendingSeq)
394 {
395 vector<string>::iterator oldName;
396 vector<Sequence>::iterator newName;
397 bool unique = true;
398
399 //iterate over new candidate names
400 for (newName = seqVector->begin(); unique && newName != seqVector->end(); newName++) {
401 //iterate over old stored names
402 for (oldName = names.begin() + 1; unique && oldName != names.end(); oldName++) {
403 if (*oldName == newName->getName()) {
404 offendingSeq->assign(*oldName);
405 unique = false;
406 }
407 }
408 }
409 return unique;
410 }
411
412
413 /*
414 * The function addSequencesToVector is used to add sequences to our seqVector
415 * It is used by both addSequences and appendSequences. This is to reduce code
416 * duplication.
417 */
addSequencesToVector(vector<Sequence> * seqVector)418 void Alignment::addSequencesToVector(vector<Sequence>* seqVector)
419 {
420 std::vector<Sequence>::iterator itSeq;
421
422 for(itSeq = seqVector->begin(); itSeq != seqVector->end(); ++itSeq)
423 {
424 seqArray.push_back(*(*itSeq).getSequence());
425 names.push_back((*itSeq).getName());
426 titles.push_back((*itSeq).getTitle());
427 sequenceIds.push_back((*itSeq).getIdentifier());
428 }
429
430 if(!(((int)seqArray.size() == numSeqs + 1) && ((int)names.size() == numSeqs + 1)
431 && ((int)titles.size() == numSeqs + 1) && ((int)sequenceIds.size() == numSeqs + 1)))
432 {
433 cerr << "There has been an error adding the sequences to Alignment.\n"
434 << "Must terminate the program. EaddSequencesrror occured in addSequences.\n";
435 exit(1);
436 }
437 }
438
clearAlignment()439 void Alignment::clearAlignment()
440 {
441 // Erase all the elements from the vector!
442 clearSeqArray();
443 names.clear();
444 titles.clear();
445 outputIndex.clear();
446 sequenceIds.clear();
447 clearSecStruct1();
448 clearSecStruct2();
449 seqWeight.clear();
450 maxNames = 0;
451 numSeqs = 0;
452 maxAlignmentLength = 0;
453 lengthLongestSequence = 0;
454 userParameters->setProfileNum(0);
455 userParameters->setProfile1Empty(true);
456 userParameters->setProfile2Empty(true);
457 }
458
clearSeqArray()459 void Alignment::clearSeqArray()
460 {
461 for(int i = 0; i < (int)seqArray.size(); i++)
462 {
463 seqArray[i].clear();
464 }
465 seqArray.clear();
466 }
467
clearSecStruct1()468 void Alignment::clearSecStruct1()
469 {
470 gapPenaltyMask1.clear();
471 secStructMask1.clear();
472 secStructName1 = "";
473 }
474
clearSecStruct2()475 void Alignment::clearSecStruct2()
476 {
477 gapPenaltyMask2.clear();
478 secStructMask2.clear();
479 secStructName2 = "";
480 }
481
482 /*
483 * The function alignScore is used to score the alignment!
484 * This is used by other classes to see what score the alignment has gotten.
485 */
alignScore(void)486 int Alignment::alignScore(void)
487 {
488 int maxRes;
489 int seq1, seq2, res1, res2;
490 int ngaps;
491 int i, len1, len2;
492 int score;
493 int _maxAA = userParameters->getMaxAA();
494 float _gapOpen = userParameters->getGapOpen();
495 int matrix[NUMRES][NUMRES];
496
497 //
498 // calculate an overall score for the alignment by summing the
499 // scores for each pairwise alignment
500 //
501 maxRes = subMatrix->getAlnScoreMatrix(matrix);
502 if (maxRes == 0)
503 {
504 utilityObject->error("Matrix for alignment scoring not found\n");
505 return 0;
506 }
507
508 score = 0;
509 for (seq1 = 1; seq1 <= numSeqs; seq1++)
510 {
511 for (seq2 = 1; seq2 < seq1; seq2++)
512 {
513 len1 = seqArray[seq1].size() - 1;
514 len2 = seqArray[seq2].size() - 1;
515 for (i = 1; i < len1 && i < len2; i++)
516 {
517 res1 = seqArray[seq1][i];
518 res2 = seqArray[seq2][i];
519
520 if ((res1 >= 0) && (res1 <= _maxAA) && (res2 >= 0) && (res2 <= _maxAA))
521 {
522 score += matrix[res1][res2];
523 }
524 }
525
526 ngaps = countGaps(seq1, seq2, len1);
527
528 score = static_cast<int>(score - (100 * _gapOpen * ngaps)); // Mark change 17-5-07
529
530 }
531 }
532
533 score /= 100;
534
535 utilityObject->info("Alignment Score %d\n", score);
536 return score;
537 }
538
countGaps(int seq1,int seq2,int len)539 int Alignment::countGaps(int seq1, int seq2, int len)
540 {
541 int i, g;
542 int q, r;//, *Q, *R;
543
544 vector<int> Q, R;
545 Q.resize(len + 2, 0);
546 R.resize(len + 2, 0);
547
548 int _maxAA = userParameters->getMaxAA();
549
550 try
551 {
552 Q[0] = R[0] = g = 0;
553
554 for (i = 1; i < len; i++)
555 {
556 if (seqArray[seq1][i] > _maxAA)
557 {
558 q = 1;
559 }
560 else
561 {
562 q = 0;
563 }
564
565 if (seqArray[seq2][i] > _maxAA)
566 {
567 r = 1;
568 }
569 else
570 {
571 r = 0;
572 }
573
574 // NOTE I havent a clue what this does!!!!
575 if (((Q[i - 1] <= R[i - 1]) && (q != 0) && (1-r != 0)) ||
576 ((Q[i - 1] >= R[i - 1]) && (1-q != 0) && (r != 0)))
577 {
578 g += 1;
579 }
580
581 if (q != 0)
582 {
583 Q[i] = Q[i - 1] + 1;
584 }
585 else
586 {
587 Q[i] = 0;
588 }
589
590 if (r != 0)
591 {
592 R[i] = R[i - 1] + 1;
593 }
594 else
595 {
596 R[i] = 0;
597 }
598 }
599 }
600 catch(const exception &ex)
601 {
602 cerr << ex.what() << endl;
603 cerr << "Terminating program. Cannot continue. Function = countGaps\n";
604 exit(1);
605 }
606
607 return (g);
608 }
609
resetAlign()610 void Alignment::resetAlign()
611 {
612 /* remove gaps from older alignments (code = gap_pos1) EXCEPT for
613 gaps that were INPUT with the seqs. which have code =
614 gap_pos2
615 */
616
617 register int sl;
618 int i, j;
619 int _gapPos1 = userParameters->getGapPos1();
620 int _gapPos2 = userParameters->getGapPos2();
621 bool _resetAlignNew = userParameters->getResetAlignmentsNew();
622 bool _resetAlignAll = userParameters->getResetAlignmentsAll();
623
624
625 for(i = 1; i <= numSeqs;++i)
626 {
627 sl = 0;
628 for(j = 1; j <= getSeqLength(i); ++j)
629 {
630 if(seqArray[i][j] == _gapPos1 && ( _resetAlignNew || _resetAlignAll))
631 {
632 continue;
633 }
634 if(seqArray[i][j] == _gapPos2 && (_resetAlignAll))
635 {
636 continue;
637 }
638 ++sl;
639 seqArray[i][sl] = seqArray[i][j];
640 }
641
642
643 // Andreas Wilm (UCD) added 2008-03-07:
644 // Remove excess bit at end of sequence
645 int numExtraElements = seqArray[i].size() - 1 - sl;
646 for(int k = 0; k < numExtraElements; k++)
647 {
648 seqArray[i].pop_back();
649 }
650 }
651 }
652
fixGaps()653 void Alignment::fixGaps()
654 {
655 int i,j;
656
657 if (userParameters->getStructPenalties1() != NONE)
658 {
659 for (j = 0; j < getSeqLength(1); ++j)
660 {
661 if (gapPenaltyMask1[j] == userParameters->getGapPos1())
662 {
663 gapPenaltyMask1[j] = userParameters->getGapPos2();
664 }
665 }
666 }
667
668 if (userParameters->getStructPenalties1() == SECST)
669 {
670 for (j = 0; j < getSeqLength(1); ++j)
671 {
672 if (secStructMask1[j] == userParameters->getGapPos1())
673 {
674 secStructMask1[j] = userParameters->getGapPos2();
675 }
676 }
677 }
678
679 for(i = 1; i <= numSeqs; ++i)
680 {
681 for(j = 1; j <= getSeqLength(i); ++j)
682 {
683 if(seqArray[i][j] == userParameters->getGapPos1())
684 {
685 seqArray[i][j] = userParameters->getGapPos2();
686 }
687 }
688 }
689
690 }
691
countid(int s1,int s2)692 float Alignment::countid(int s1, int s2)
693 {
694 int c1, c2;
695 int i;
696 int count, total;
697 float score;
698 int shorterSeqLength = (getSeqLength(s1) < getSeqLength(s2)) ? getSeqLength(s1) : getSeqLength(s2);
699
700 count = total = 0;
701 for (i = 1; i <= shorterSeqLength; i++) // NOTE june29
702 {
703 c1 = seqArray[s1][i];
704 c2 = seqArray[s2][i];
705 if ((c1 >= 0) && (c1 < userParameters->getMaxAA()))
706 {
707 total++;
708 if (c1 == c2)
709 {
710 count++;
711 }
712 }
713
714 }
715
716 if (total == 0)
717 {
718 score = 0;
719 }
720 else
721 {
722 score = 100.0 *(float)count / (float)total;
723 }
724
725 return (score);
726 }
727
debugPrintSequences()728 void Alignment::debugPrintSequences()
729 {
730 cout << std::endl;
731 for(int i = 0; i < (int)seqArray.size(); i++)
732 {
733 for(int j = 0; j < (int)seqArray[i].size(); j++)
734 {
735 if(seqArray[i][j] > 9)
736 cout << " " << seqArray[i][j];
737 else
738 cout << " " << seqArray[i][j];
739 }
740 cout << std::endl;
741 }
742 }
743
744 /*
745 * Note the max_aln_length is now called maxAlignmentLength, and it will be stored
746 * and calculated in this class. Mostly it is used for allocating arrays. But not always.
747 */
calculateMaxLengths()748 void Alignment::calculateMaxLengths()
749 {
750 maxAlignmentLength = 0;
751 lengthLongestSequence = 0;
752 if(seqArray.size() > 0)
753 {
754 SeqArray::iterator seqArrayIter = seqArray.begin();
755
756 while(seqArrayIter != seqArray.end())
757 {
758 // NOTE I needed to change this to >= for a bug I had!!!!!!!
759 if((int)(*seqArrayIter).size() - 1 >= lengthLongestSequence)
760 {
761 lengthLongestSequence = (*seqArrayIter).size();
762 }
763 ++seqArrayIter;
764 }
765
766 if(lengthLongestSequence > 0)
767 {
768 maxAlignmentLength = (lengthLongestSequence * 2) - 2;
769 lengthLongestSequence -= 1; // MADE A CHANGE HERE AS WELL!!
770 }
771 else
772 {
773 lengthLongestSequence = 0;
774 maxAlignmentLength = 0;
775 }
776
777 }
778 else
779 {
780 maxAlignmentLength = 0;
781 }
782 maxNames = 0;
783 if(names.size() > 0)
784 {
785 vector<string>::iterator nameVecIter = names.begin();
786
787 while(nameVecIter != names.end())
788 {
789 if((int)(*nameVecIter).size() > maxNames)
790 {
791 maxNames = (*nameVecIter).size();
792 }
793 ++nameVecIter;
794 }
795 if(maxNames < 10)
796 {
797 maxNames = 10;
798 }
799 }
800 else
801 {
802 maxNames = 0;
803 }
804 }
805
806 /*
807 * This function checks to see if all names are different. It returns true if they
808 * are all different, and false if there are 2 the same.
809 * currently quadratic - would be nice to speed up
810 */
checkAllNamesDifferent(string * offendingSeq)811 bool Alignment::checkAllNamesDifferent(string *offendingSeq)
812 {
813 bool different = true;
814 // NOTE I added the + 1 here because, if we had a sequence with a name as blank
815 // this would be the same as the first one!
816 vector<string>::iterator namesIter1 = names.begin() + 1;
817 vector<string>::iterator namesIter2;
818 int counter1 = 1;
819 int counter2 = 2;
820
821
822 while(namesIter1 != names.end())
823 {
824 namesIter2 = namesIter1 + 1;
825 while(namesIter2 != names.end())
826 {
827 if((*namesIter1).compare(*namesIter2) == 0) // If we have 2 strings the same.
828 {
829 different = false;
830 /* 23-03-2007,nige: let someone up the stack deal with this - GUI is too deeply entangled.
831 * utilityObject->error("Multiple sequences found with same name '%s' (first %d chars are significant)\n", namesIter1->c_str(), MAXNAMES);
832 */
833
834 offendingSeq->assign((*namesIter1));
835 clearAlignment();
836 return different; // Not all different!
837 }
838 namesIter2++;
839 counter2++;
840 }
841 namesIter1++;
842 counter1++;
843 counter2 = counter1 + 1;
844 }
845 return different;
846 }
847
848
849
850
851
852
addSecStructMask1(vector<char> * secStructMaskToAdd)853 void Alignment::addSecStructMask1(vector<char>* secStructMaskToAdd)
854 {
855 secStructMask1 = *secStructMaskToAdd;
856 }
857
addSecStructMask2(vector<char> * secStructMaskToAdd)858 void Alignment::addSecStructMask2(vector<char>* secStructMaskToAdd)
859 {
860 secStructMask2 = *secStructMaskToAdd;
861 }
862
addGapPenaltyMask1(vector<char> * gapPenaltyMaskToAdd)863 void Alignment::addGapPenaltyMask1(vector<char>* gapPenaltyMaskToAdd)
864 {
865 gapPenaltyMask1 = *gapPenaltyMaskToAdd;
866 }
867
addGapPenaltyMask2(vector<char> * gapPenaltyMaskToAdd)868 void Alignment::addGapPenaltyMask2(vector<char>* gapPenaltyMaskToAdd)
869 {
870 gapPenaltyMask2 = *gapPenaltyMaskToAdd;
871 }
872
addSecStructName1(string nameToAdd)873 void Alignment::addSecStructName1(string nameToAdd)
874 {
875 secStructName1 = nameToAdd;
876 }
877
addSecStructName2(string nameToAdd)878 void Alignment::addSecStructName2(string nameToAdd)
879 {
880 secStructName2 = nameToAdd;
881 }
882
addSeqWeight(vector<int> * _seqWeight)883 void Alignment::addSeqWeight(vector<int>* _seqWeight)
884 {
885 if(seqWeight.size() == _seqWeight->size())
886 {
887 int size = seqWeight.size();
888
889 for(int i = 0; i < size; i++)
890 {
891 seqWeight[i] = (*_seqWeight)[i];
892 }
893 }
894 }
895
printSequencesAddedInfo()896 void Alignment::printSequencesAddedInfo()
897 {
898 if(userParameters->getDisplayInfo())
899 {
900 int startSeq = userParameters->getProfile2Empty() ? 1:
901 profile1NumSeqs + 1;
902 string dnaFlag = userParameters->getDNAFlag() ? "bp" : "aa";
903
904
905 for(int i = startSeq; i <= numSeqs; i++)
906 {
907 cout << "Sequence " << i << ": "
908 << std::left << setw(maxNames) << names.at(i)
909 << std::right << setw(6) << getSequenceLength(i)
910 << " " << dnaFlag << std::endl;
911 }
912 }
913 }
914
debugPrintOutAlignInfo()915 void Alignment::debugPrintOutAlignInfo()
916 {
917 for(int i = 1; i <= numSeqs; i++)
918 {
919 cout << "seq-no=" << i << ": name="
920 << std::left << setw(maxNames) << names.at(i)
921 << " length="
922 << std::right << setw(6) << getSequenceLength(i)
923 << std::endl;
924 }
925 }
926
getSequenceLength(int index)927 int Alignment::getSequenceLength(int index)
928 {
929 return seqArray.at(index).size() - 1;
930 }
931
getLengthLongestSequence()932 int Alignment::getLengthLongestSequence()
933 {
934 int _lengthLongestSequence = 0;
935
936 for(int i = 1; i <= numSeqs; i++)
937 {
938 if(getSeqLength(i) > _lengthLongestSequence)
939 {
940 _lengthLongestSequence = getSeqLength(i);
941 }
942 }
943 return _lengthLongestSequence;
944 }
945
946 /**
947 * This function is used to find the length of the longest sequence in the range.
948 */
getLengthLongestSequence(int firstSeq,int lastSeq)949 int Alignment::getLengthLongestSequence(int firstSeq, int lastSeq)
950 {
951 int _lengthLongestSequence = 0;
952
953 if(firstSeq >= 1 && lastSeq <= numSeqs)
954 {
955 for(int i = firstSeq; i <= lastSeq; i++)
956 {
957 if(getSeqLength(i) > _lengthLongestSequence)
958 {
959 _lengthLongestSequence = getSeqLength(i);
960 }
961 }
962 }
963 return _lengthLongestSequence; // Will return 0 if cant check seqs
964 }
965
getMaxNames()966 int Alignment::getMaxNames()
967 {
968 return maxNames;
969 }
970
getSecStructName1()971 string Alignment::getSecStructName1()
972 {
973 return secStructName1;
974 }
975
getSecStructName2()976 string Alignment::getSecStructName2()
977 {
978 return secStructName2;
979 }
980
getGapPenaltyMask1()981 vector<char>* Alignment::getGapPenaltyMask1()
982 {
983 return &gapPenaltyMask1;
984 }
985
getGapPenaltyMask2()986 vector<char>* Alignment::getGapPenaltyMask2()
987 {
988 return &gapPenaltyMask2;
989 }
990
getSecStructMask1()991 vector<char>* Alignment::getSecStructMask1()
992 {
993 return &secStructMask1;
994 }
995
getSecStructMask2()996 vector<char>* Alignment::getSecStructMask2()
997 {
998 return &secStructMask2;
999 }
1000
1001
getSecStructMask1Element(int index)1002 int Alignment::getSecStructMask1Element(int index)
1003 {
1004 if(index > 0 && index < (int)secStructMask1.size())
1005 {
1006 return secStructMask1[index];
1007 }
1008 else
1009 {
1010 throw VectorOutOfRange(string("secStructMask1"), index, secStructMask1.size() - 1);
1011 }
1012 }
1013
getSecStructMask2Element(int index)1014 int Alignment::getSecStructMask2Element(int index)
1015 {
1016 if(index > 0 && index < (int)secStructMask2.size())
1017 {
1018 return secStructMask2[index];
1019 }
1020 else
1021 {
1022 throw VectorOutOfRange(string("secStructMask2"), index, secStructMask2.size() - 1);
1023 }
1024 }
1025
getGapPenaltyMask1Element(int index)1026 int Alignment::getGapPenaltyMask1Element(int index)
1027 {
1028 if(index > 0 && index < (int)gapPenaltyMask1.size())
1029 {
1030 return gapPenaltyMask1[index];
1031 }
1032 else
1033 {
1034 throw VectorOutOfRange(string("gapPenaltyMask1"), index, gapPenaltyMask1.size() - 1);
1035 }
1036 }
1037
getGapPenaltyMask2Element(int index)1038 int Alignment::getGapPenaltyMask2Element(int index)
1039 {
1040 if(index > 0 && index < (int)gapPenaltyMask2.size())
1041 {
1042 return gapPenaltyMask2[index];
1043 }
1044 else
1045 {
1046 throw VectorOutOfRange(string("gapPenaltyMask2"), index, gapPenaltyMask2.size() - 1);
1047 }
1048 }
1049
getName(int index)1050 string Alignment::getName(int index)
1051 {
1052 if(index > 0 && index < (int)names.size())
1053 {
1054 return names[index];
1055 }
1056 else
1057 {
1058 throw VectorOutOfRange(string("names"), index, names.size() - 1);
1059 }
1060 }
1061
1062 /**
1063 * Change:
1064 * Mark 25-1-2007: This function was added to allow access to the unique id.
1065 */
getUniqueId(int seq)1066 unsigned long Alignment::getUniqueId(int seq)
1067 {
1068 if(seq > 0 && seq < (int)sequenceIds.size())
1069 {
1070 return sequenceIds[seq];
1071 }
1072 else
1073 {
1074 throw VectorOutOfRange(string("sequenceIds"), seq, sequenceIds.size() - 1);
1075 }
1076 }
1077
getTitle(int index)1078 string Alignment::getTitle(int index)
1079 {
1080 if(index > 0 && index < (int)titles.size())
1081 {
1082 return titles[index];
1083 }
1084 else
1085 {
1086 throw VectorOutOfRange(string("titles"), index, titles.size() - 1);
1087 }
1088 }
1089
getOutputIndex(int index)1090 int Alignment::getOutputIndex(int index)
1091 {
1092 if(index >= 0 && index < (int)outputIndex.size())
1093 {
1094 return outputIndex[index];
1095 }
1096 else
1097 {
1098 throw VectorOutOfRange(string("outputIndex"), index, outputIndex.size() - 1);
1099 }
1100 }
1101
getSeqWeight(int index) const1102 int Alignment::getSeqWeight(int index) const
1103 {
1104 if(index >= 0 && index < (int)seqWeight.size())
1105 {
1106 return seqWeight[index];
1107 }
1108 else
1109 {
1110 throw VectorOutOfRange(string("seqWeight"), index, seqWeight.size() - 1);
1111 }
1112 }
1113
1114
1115 /**
1116 * This function is used by Qt. It is used to calculate the histogram values for the
1117 * widget in clustalQt. The function is in here because it needs access to the SubMatrix
1118 * class.
1119 * @param matNum The number of the matrix to be used in the comparison.
1120 * @return A pointer to a vector containing the histogram values.
1121 */
QTcalcHistColumnHeights(int firstSeq,int nSeqs,Array2D<int> * exceptionalRes)1122 vector<int>* Alignment::QTcalcHistColumnHeights(int firstSeq, int nSeqs,
1123 Array2D<int>* exceptionalRes)
1124 {
1125 int n, i, s, p, r, r1;
1126 int numColumns = getLengthLongestSequence();
1127 int scoreScale = userParameters->getQTScorePlotScale();//5; // From ClustalX.
1128 int scoreCutOff = userParameters->getQTResExceptionCutOff();
1129 bool includeGaps = false;
1130 //short *mat_xref, *matptr;
1131 float median, mean;
1132 float t, q1, q3, ul;
1133 vector<float> seqdist, sorteddist;
1134 float diff;
1135 vector<int> seqvector;
1136 vector<int> freq;
1137 vector<vector<int> > profile;
1138 int matrix[NUMRES][NUMRES];
1139 int _maxAA = userParameters->getMaxAA();
1140 int _gapPos1 = userParameters->getGapPos1();
1141 histogramColumnHeights.resize(numColumns);
1142 //panel_data data1;
1143 subMatrix->getQTMatrixForHistogram(matrix);
1144
1145 profile.resize(numColumns + 2, vector<int>(_maxAA + 2));
1146 freq.resize(_maxAA + 2);
1147
1148 for(p = 0; p < numColumns; p++)
1149 {
1150 for(r = 0; r < _maxAA; r++)
1151 {
1152 freq[r] = 0;
1153 }
1154 for(s = firstSeq; s < firstSeq + nSeqs; s++)
1155 {
1156 if(p < getSeqLength(s + 1) && seqArray[s + 1][p + 1] >= 0 &&
1157 seqArray[s + 1][p + 1] < _maxAA)
1158 {
1159 freq[seqArray[s + 1][p + 1]]++;
1160 }
1161 }
1162 for(r = 0; r < _maxAA; r++)
1163 {
1164 profile[p][r] = 0;
1165 for(r1 = 0; r1 < _maxAA; r1++)
1166 {
1167 profile[p][r] += freq[r1] * matrix[r1][r];
1168 }
1169 profile[p][r] = static_cast<int>(profile[p][r] /
1170 static_cast<float>(nSeqs)); // Mark change 17-5-07
1171 }
1172 }
1173
1174 seqvector.resize(_maxAA + 2);
1175 seqdist.resize(nSeqs + 1);
1176 sorteddist.resize(nSeqs + 1);
1177
1178 for(p = 0; p < numColumns; p++)
1179 {
1180 for(s = firstSeq; s < firstSeq + nSeqs; s++)
1181 {
1182 if (p < getSeqLength(s + 1))
1183 {
1184 for (r = 0; r < _maxAA; r++)
1185 {
1186 seqvector[r] = matrix[r][seqArray[s + 1][p + 1]];
1187 }
1188 }
1189 else
1190 {
1191 for (r = 0; r < _maxAA; r++)
1192 {
1193 seqvector[r] = matrix[r][_gapPos1];
1194 }
1195 }
1196 seqdist[s - firstSeq] = 0.0;
1197 for(r = 0; r < _maxAA; r++)
1198 {
1199 diff = profile[p][r] - seqvector[r];
1200 diff /= 1000.0;
1201 seqdist[s - firstSeq] += diff * diff;
1202 }
1203 seqdist[s - firstSeq] = sqrt((double)seqdist[s - firstSeq]);
1204 }
1205
1206 // calculate mean,median and rms of seq distances
1207 mean = median = 0.0;
1208 if(includeGaps)
1209 {
1210 for(s = 0; s < nSeqs; s++)
1211 {
1212 mean += seqdist[s];
1213 }
1214 mean /= nSeqs;
1215 n = nSeqs;
1216 for(s = 0; s < nSeqs; s++)
1217 {
1218 sorteddist[s] = seqdist[s];
1219 }
1220 }
1221 else
1222 {
1223 n = 0;
1224 for(s = firstSeq; s < firstSeq + nSeqs; s++)
1225 {
1226 if(p < getSeqLength(s + 1) && seqArray[s + 1][p + 1] >= 0 &&
1227 seqArray[s + 1][p + 1] < _maxAA)
1228 {
1229 mean += seqdist[s - firstSeq];
1230 n++;
1231 }
1232 }
1233 if(n > 0)
1234 {
1235 mean /= n;
1236 }
1237 for(s = firstSeq, i = 0; s < firstSeq + nSeqs; s++)
1238 {
1239 if(p < getSeqLength(s + 1) && seqArray[s + 1][p + 1] >= 0 &&
1240 seqArray[s + 1][p + 1] < _maxAA)
1241 {
1242 sorteddist[i++] = seqdist[s - firstSeq];
1243 }
1244 }
1245 }
1246 sortScores(&sorteddist, 0, n - 1);
1247
1248
1249 if(n == 0)
1250 {
1251 median = 0;
1252 }
1253 else if(n % 2 == 0)
1254 {
1255 median = (sorteddist[n / 2 - 1] + sorteddist[n / 2]) / 2.0;
1256 }
1257 else
1258 {
1259 median = sorteddist[n / 2];
1260 }
1261
1262 if(scoreScale <= 5)
1263 {
1264 histogramColumnHeights[p] = static_cast<int>(exp((double)(-mean *
1265 (6 - scoreScale) / 4.0)) * 100.0 * n / nSeqs);
1266 }
1267 else
1268 {
1269 histogramColumnHeights[p] = static_cast<int>(exp((double)(-mean /
1270 (4.0 * (scoreScale - 4)))) * 100.0 * n / nSeqs);
1271 }
1272
1273 if(n == 0)
1274 {
1275 ul = 0;
1276 }
1277 else
1278 {
1279 t = n/4.0 + 0.5;
1280 if(t - (int)t == 0.5)
1281 {
1282 q3 = (sorteddist[(int)t] + sorteddist[(int)t + 1]) / 2.0;
1283 q1 = (sorteddist[n-(int)t] + sorteddist[n - (int)t - 1]) / 2.0;
1284 }
1285 else if(t - (int)t > 0.5)
1286 {
1287 q3 = sorteddist[(int)t + 1];
1288 q1 = sorteddist[n - (int)t - 1];
1289 }
1290 else
1291 {
1292 q3 = sorteddist[(int)t];
1293 q1 = sorteddist[n - (int)t];
1294 }
1295 if (n < 4)
1296 {
1297 ul = sorteddist[0];
1298 }
1299 else
1300 {
1301 ul = q3 + (q3 - q1) * ((float)scoreCutOff / 2.0);
1302 }
1303 }
1304
1305 if((exceptionalRes->getRowSize() >= nSeqs) &&
1306 exceptionalRes->getColSize() >= numColumns)
1307 {
1308 for(int s = firstSeq; s < firstSeq + nSeqs; s++)
1309 {
1310 if(seqdist[s - firstSeq] > ul && p < getSeqLength(s + 1) &&
1311 seqArray[s + 1][p + 1] >= 0 &&
1312 seqArray[s + 1][p + 1] < userParameters->getMaxAA())
1313 {
1314 (*exceptionalRes)[s - firstSeq][p] = 1;
1315 }
1316 else
1317 {
1318 (*exceptionalRes)[s - firstSeq][p] = 0;
1319 }
1320 }
1321 }
1322 }
1323 return &histogramColumnHeights;
1324 }
1325
sortScores(vector<float> * scores,int f,int l)1326 void Alignment::sortScores(vector<float>* scores, int f, int l)
1327 {
1328 int i,last;
1329
1330 if(f >= l)
1331 return;
1332
1333 swap(scores, f, (f + l) / 2);
1334 last = f;
1335 for(i = f + 1; i <= l; i++)
1336 {
1337 if((*scores)[i] > (*scores)[f])
1338 {
1339 swap(scores, ++last, i);
1340 }
1341 }
1342 swap(scores, f, last);
1343 sortScores(scores, f, last - 1);
1344 sortScores(scores, last + 1, l);
1345 }
1346
swap(vector<float> * scores,int s1,int s2)1347 void Alignment::swap(vector<float>* scores, int s1, int s2)
1348 {
1349 float temp;
1350
1351 temp = (*scores)[s1];
1352 (*scores)[s1] = (*scores)[s2];
1353 (*scores)[s2] = temp;
1354 }
1355
searchForString(bool * found,int seq,int beginRes,string search)1356 int Alignment::searchForString(bool* found, int seq, int beginRes, string search)
1357 {
1358 int lengthSeq = getSeqLength(seq);
1359 if(beginRes > lengthSeq)
1360 {
1361 *found = false;
1362 return beginRes;
1363 }
1364
1365 int res = beginRes;
1366
1367 // First need to convert search into a vector of ints!
1368 vector<int> codedSearch;
1369 int size = search.size();
1370 codedSearch.resize(size);
1371 int code;
1372 for(int i = 0; i < size; i++)
1373 {
1374 code = userParameters->resIndex(userParameters->getAminoAcidCodes(), search[i]);
1375 codedSearch[i] = code;
1376 }
1377
1378 int numSame = 0;
1379 int startPos = -1;
1380 int searchSize = codedSearch.size();
1381 // Now check for the string of ints!!!!
1382 for(; res <= lengthSeq; res++) //- nige: res starts at 1
1383
1384 {
1385 if(seqArray[seq][res] == codedSearch[0])
1386 {
1387 startPos = res; //- nige
1388 for(int i = 0; i < searchSize && (i + res) <= lengthSeq; i++) //- nige: res starts at 1
1389 {
1390 if(seqArray[seq][res + i] == codedSearch[i])
1391 {
1392 numSame++;
1393 }
1394 //nige: hack: encoded gap character: see also AlignmentScroll.cpp
1395 else if (seqArray[seq][res + i] == 31 || seqArray[seq][res + i] == 30)
1396 {
1397 res++;
1398 i--;
1399 }
1400 else
1401 {
1402 break; // Not the same
1403 }
1404 }
1405 if(numSame == searchSize)
1406 {
1407 *found = true;
1408 return startPos;
1409 }
1410 else
1411 {
1412 numSame = 0;
1413 }
1414 }
1415 }
1416 *found = false;
1417 return startPos; // Not found!!!!!!!
1418 }
1419
removeGapsFromSelectedSeqs(vector<int> * selected)1420 void Alignment::removeGapsFromSelectedSeqs(vector<int>* selected)
1421 {
1422 //getNumSeqs()
1423 int size = selected->size();
1424 int lengthOfSelectedSeq = 0;
1425 int gapPos1 = userParameters->getGapPos1();
1426 int gapPos2 = userParameters->getGapPos2();
1427 int s1;
1428
1429 for(int i = 1; i <= getNumSeqs() && i < size; i++)
1430 {
1431 if((*selected)[i] == 1)
1432 {
1433 // remove gaps from this seq!
1434 lengthOfSelectedSeq = getSeqLength(i);
1435 s1 = 0;
1436 for(int j = 1; j <= lengthOfSelectedSeq; j++)
1437 {
1438 if(seqArray[i][j] == gapPos1 || seqArray[i][j] == gapPos2)
1439 {
1440 continue;
1441 }
1442 ++s1;
1443 seqArray[i][s1] = seqArray[i][j];
1444 }
1445 // Then remove the excess bit at the end of the array
1446 int numExtraElements = lengthOfSelectedSeq - s1;
1447
1448 if((int)seqArray[i].size() > numExtraElements)
1449 {
1450 for(int k = 0; k < numExtraElements; k++)
1451 {
1452 seqArray[i].pop_back();
1453 }
1454 }
1455 }
1456 }
1457
1458 }
1459
removeGapOnlyColsFromSelectedSeqs(vector<int> * selected)1460 void Alignment::removeGapOnlyColsFromSelectedSeqs(vector<int>* selected)
1461 {
1462 int numGaps = 0;
1463 int NoneSelected = -1;
1464 int numColumns = 0;
1465 int sizeSelected = selected->size();
1466 int firstSeqSelected = NoneSelected;
1467 int gapPos1 = userParameters->getGapPos1();
1468 int gapPos2 = userParameters->getGapPos2();
1469 int k;
1470
1471 for(int i = 1; i < sizeSelected; i++)
1472 {
1473 if((*selected)[i] == 1)
1474 {
1475 numColumns++;
1476 if(firstSeqSelected == -1)
1477 {
1478 firstSeqSelected = i;
1479 }
1480 }
1481 }
1482
1483 if(firstSeqSelected == NoneSelected)
1484 {
1485 cout << "No Sequences have been selected\n";
1486 return;
1487 }
1488
1489 for(int i = 1; i <= getSeqLength(firstSeqSelected);)
1490 {
1491 numGaps = 0;
1492 for(int j = firstSeqSelected; j < sizeSelected && (*selected)[j] == 1; j++)
1493 {
1494 if(getSeqLength(j) >= i)
1495 {
1496 if(seqArray[j][i] == gapPos1 || seqArray[j][i] == gapPos2)
1497 {
1498 numGaps++;
1499 }
1500 }
1501 }
1502 if(numGaps == numColumns)
1503 {
1504 //cout << " removing a gap column\n\n";
1505 for(int j = firstSeqSelected; j < sizeSelected && (*selected)[j] == 1; j++)
1506 {
1507 for(k = i + 1; k <= getSeqLength(j) + 1 && (int)seqArray[j].size() > k; k++)
1508 {
1509 seqArray[j][k - 1] = seqArray[j][k];
1510 }
1511 seqArray[j].pop_back(); // Remove the last element!!
1512
1513 if(getSeqLength(firstSeqSelected) <= 0)
1514 {
1515 break;
1516 }
1517 }
1518 }
1519 else
1520 {
1521 i++;
1522 }
1523 }
1524
1525 }
1526
removeAllGapOnlyColumns(int fSeq,int lSeq,int profileNum)1527 void Alignment::removeAllGapOnlyColumns(int fSeq, int lSeq, int profileNum)
1528 {
1529 if(fSeq >= lSeq)
1530 {
1531 return;
1532 }
1533 int gapPos1 = userParameters->getGapPos1();
1534 int gapPos2 = userParameters->getGapPos2();
1535
1536 int numGaps = 0;
1537 int numColumns = lSeq - fSeq + 1;
1538 int k;
1539 // We must cheack each column to see if it consists of only '-'
1540 for(int i = 1; i <= getSeqLength(fSeq);)
1541 {
1542 numGaps = 0;
1543 for(int j = fSeq; j <= lSeq; j++)
1544 {
1545 if(getSeqLength(j) >= i)
1546 {
1547 if(seqArray[j][i] == gapPos1 || seqArray[j][i] == gapPos2)
1548 {
1549 numGaps++;
1550 }
1551 }
1552 }
1553 if(numGaps == numColumns)
1554 {
1555 for(int j = fSeq; j <= lSeq; j++)
1556 {
1557 for(k = i + 1; k <= getSeqLength(j) + 1; k++)
1558 {
1559 seqArray[j][k - 1] = seqArray[j][k];
1560 }
1561 seqArray[j].pop_back(); // Remove the last element!!
1562
1563 if(profileNum == 1)
1564 {
1565 int lengthSecStruct = secStructMask1.size();
1566 int lengthGapMask = gapPenaltyMask1.size();
1567
1568 for(k = i; k <= getSeqLength(fSeq) && k < lengthSecStruct; k++)
1569 {
1570 secStructMask1[k - 1] = secStructMask1[k];
1571 }
1572 for(k = i; k <= getSeqLength(fSeq) && k < lengthGapMask; k++)
1573 {
1574 gapPenaltyMask1[k - 1] = gapPenaltyMask1[k];
1575 }
1576 }
1577
1578 if(profileNum == 2)
1579 {
1580 int lengthSecStruct = secStructMask2.size();
1581 int lengthGapMask = gapPenaltyMask2.size();
1582
1583 for(k = i; k <= getSeqLength(fSeq) && k < lengthSecStruct; k++)
1584 {
1585 secStructMask2[k - 1] = secStructMask2[k];
1586 }
1587 for(k = i; k <= getSeqLength(fSeq) && k < lengthGapMask; k++)
1588 {
1589 gapPenaltyMask2[k - 1] = gapPenaltyMask2[k];
1590 }
1591 }
1592 if(getSeqLength(fSeq) <= 0)
1593 {
1594 break;
1595 }
1596 }
1597 }
1598 else
1599 {
1600 i++;
1601 }
1602 }
1603 }
1604
cutSelectedSequencesFromAlignment(vector<int> * selected)1605 vector<Sequence> Alignment::cutSelectedSequencesFromAlignment(vector<int>* selected)
1606 {
1607 vector<Sequence> cutSequences;
1608 int sizeOfSelected = selected->size();
1609 SeqArray::iterator seqArrayIterator;
1610 vector<string>::iterator namesIterator;
1611 vector<string>::iterator titlesIterator;
1612 vector<int>::iterator seqWeightIterator;
1613 vector<unsigned long>::iterator sequenceIdsIterator;
1614
1615 int newProfile1NumSeqs = profile1NumSeqs;
1616 int profNum = userParameters->getProfileNum();
1617 int prof1NumSeqs = profile1NumSeqs;
1618 int numCutSoFar = 0;
1619 int intialNumSeqs = numSeqs;
1620
1621 for(int i = 1; i < sizeOfSelected && i <= intialNumSeqs; i++)
1622 {
1623 if((*selected)[i] == 1)
1624 {
1625 // Cut the sequence from the alignment!
1626 seqArrayIterator = seqArray.begin() + i - numCutSoFar;
1627 namesIterator = names.begin() + i - numCutSoFar;
1628 titlesIterator = titles.begin() + i - numCutSoFar;
1629 seqWeightIterator = seqWeight.begin() + i - numCutSoFar;
1630 sequenceIdsIterator = sequenceIds.begin() + i - numCutSoFar;
1631 Sequence SeqToCut(&seqArray[i - numCutSoFar], *namesIterator, *titlesIterator,
1632 *sequenceIdsIterator);
1633
1634 numCutSoFar++;
1635
1636 seqArrayIterator->clear();
1637 seqArray.erase(seqArrayIterator);
1638 names.erase(namesIterator);
1639 titles.erase(titlesIterator);
1640 seqWeight.erase(seqWeightIterator);
1641 sequenceIds.erase(sequenceIdsIterator);
1642
1643 if(numSeqs > 0)
1644 {
1645 numSeqs--;
1646 }
1647 else
1648 {
1649 numSeqs = 0;
1650 break;
1651 }
1652 if(profNum > 0 && i <= prof1NumSeqs)
1653 {
1654 if(newProfile1NumSeqs > 0)
1655 {
1656 newProfile1NumSeqs--;
1657 }
1658 else
1659 {
1660 newProfile1NumSeqs = 0;
1661 }
1662 }
1663 cutSequences.push_back(SeqToCut);
1664 }
1665 }
1666 profile1NumSeqs = newProfile1NumSeqs;
1667 setDefaultOutputIndex();
1668 resetAllSeqWeights();
1669 return cutSequences;
1670 }
1671
setDefaultOutputIndex()1672 void Alignment::setDefaultOutputIndex()
1673 {
1674 outputIndex.clear();
1675 outputIndex.resize(numSeqs);
1676 for(int iseq = 1; iseq <= numSeqs; iseq++)
1677 {
1678 outputIndex[iseq - 1] = iseq;
1679 }
1680 }
1681
removeAllOutsideRange(int beginPos,int endPos)1682 bool Alignment::removeAllOutsideRange(int beginPos, int endPos)
1683 {
1684 bool ok;
1685 if(beginPos < 0 || endPos > getLengthLongestSequence())
1686 {
1687 return false; // cannot do it!!!!
1688 }
1689
1690 // trim the seqArray
1691 ok = keepPortionOfSeqArray(beginPos, endPos);
1692 if(!ok)
1693 {
1694 cerr << "There was a problem removing a portion of the array\n";
1695 return false;
1696 }
1697
1698 // recalculate the maxLengths
1699 calculateMaxLengths();
1700
1701 // Clear the histogram columns
1702 histogramColumnHeights.clear();
1703
1704 // reset the weights
1705 resetAllSeqWeights();
1706 return true;
1707 }
1708
keepPortionOfSeqArray(int beginRangeIndex,int endRangeIndex)1709 bool Alignment::keepPortionOfSeqArray(int beginRangeIndex, int endRangeIndex)
1710 {
1711 SeqArray sectionToRealign;
1712 vector<int> emptyVec;
1713 sectionToRealign.push_back(emptyVec); // EMPTY sequence
1714
1715 SeqArray::iterator posToAddTo = sectionToRealign.begin();
1716 // erase from all sequences the range specified here!!!!!
1717 if(beginRangeIndex < 0 || endRangeIndex < 0)
1718 {
1719 return false;
1720 }
1721
1722 SeqArray::iterator mainBeginIt = seqArray.begin() + 1;
1723 SeqArray::iterator mainEndIt = seqArray.end();
1724
1725 vector<int>::iterator begin, end, beginRange, endRange, beginCopyRange, endCopyRange;
1726
1727 for(; mainBeginIt != mainEndIt; mainBeginIt++)
1728 {
1729 vector<int> vecToAdd;
1730 begin = mainBeginIt->begin() + 1;
1731 end = mainBeginIt->end();
1732 beginRange = begin + beginRangeIndex;
1733 endRange = begin + endRangeIndex + 1;
1734 beginCopyRange = beginRange;
1735 endCopyRange = endRange;
1736
1737 // We need to copy all of this into another vector.
1738 if(endCopyRange < end && beginCopyRange < end)
1739 {
1740 vecToAdd.push_back(0);
1741 for(; beginCopyRange != endCopyRange; beginCopyRange++)
1742 {
1743 vecToAdd.push_back(*beginCopyRange);
1744 }
1745 sectionToRealign.push_back(vecToAdd);
1746 }
1747 else
1748 {
1749 return false;
1750 }
1751
1752 if(endRange < end && beginRange < end)
1753 {
1754 mainBeginIt->erase(beginRange, endRange);
1755 }
1756 else
1757 {
1758 return false;
1759 }
1760 }
1761 clearSeqArray();
1762 seqArray = sectionToRealign;
1763 return true;
1764 }
1765
debugPrintSeqArray(SeqArray * arrayToPrint)1766 void Alignment::debugPrintSeqArray(SeqArray* arrayToPrint)
1767 {
1768 // I need to use iterators for everything here.
1769 SeqArray::iterator mainBeginIt = arrayToPrint->begin();
1770 SeqArray::iterator mainEndIt = arrayToPrint->end();
1771 vector<int>::iterator begin, end;
1772 string aaCodes = userParameters->getAminoAcidCodes();
1773
1774 for(; mainBeginIt != mainEndIt; mainBeginIt++)
1775 {
1776 if(mainBeginIt->size() > 0)
1777 {
1778 begin = mainBeginIt->begin() + 1;
1779 end = mainBeginIt->end();
1780 for(; begin != end; begin++)
1781 {
1782 if(*begin < (int)aaCodes.size())
1783 {
1784 cout << aaCodes[*begin];
1785 }
1786 else
1787 {
1788 cout << "-";
1789 }
1790 }
1791 cout << "\n";
1792 }
1793 }
1794 }
1795
debugPrintProfile1()1796 void Alignment::debugPrintProfile1()
1797 {
1798 cout << "************** PROFILE1 *********************\n";
1799 SeqArray::iterator mainBeginIt = seqArray.begin() + 1;
1800 SeqArray::iterator mainEndIt = mainBeginIt + profile1NumSeqs;
1801 vector<int>::iterator begin, end;
1802 string aaCodes = userParameters->getAminoAcidCodes();
1803
1804 for(; mainBeginIt != mainEndIt; mainBeginIt++)
1805 {
1806 cout << "PROFILE1 SEQ: ";
1807 if(mainBeginIt->size() > 0)
1808 {
1809 begin = mainBeginIt->begin() + 1;
1810 end = mainBeginIt->end();
1811 for(; begin != end; begin++)
1812 {
1813 if(*begin < (int)aaCodes.size())
1814 {
1815 cout << aaCodes[*begin];
1816 }
1817 else
1818 {
1819 cout << "-";
1820 }
1821 }
1822 cout << "\n";
1823 }
1824 }
1825 }
1826
debugPrintProfile2()1827 void Alignment::debugPrintProfile2()
1828 {
1829 cout << "************** PROFILE2 *********************\n";
1830 SeqArray::iterator mainBeginIt = seqArray.begin() + 1 + profile1NumSeqs;
1831 SeqArray::iterator mainEndIt = seqArray.end();
1832 vector<int>::iterator begin, end;
1833 string aaCodes = userParameters->getAminoAcidCodes();
1834
1835 for(; mainBeginIt != mainEndIt; mainBeginIt++)
1836 {
1837 cout << "PROFILE2 SEQ: ";
1838 if(mainBeginIt->size() > 0)
1839 {
1840 begin = mainBeginIt->begin() + 1;
1841 end = mainBeginIt->end();
1842 for(; begin != end; begin++)
1843 {
1844 if(*begin < (int)aaCodes.size())
1845 {
1846 cout << aaCodes[*begin];
1847 }
1848 else
1849 {
1850 cout << "-";
1851 }
1852 }
1853 cout << "\n";
1854 }
1855 }
1856 }
1857
updateRealignedRange(SeqArray realignedSeqs,int beginPos,int endPos)1858 bool Alignment::updateRealignedRange(SeqArray realignedSeqs, int beginPos, int endPos)
1859 {
1860 if(realignedSeqs.size() != seqArray.size())
1861 {
1862 return false;
1863 }
1864 if(beginPos < 0 || endPos < 0)
1865 {
1866 return false;
1867 }
1868
1869 // erase from all sequences the range specified here!!!!!
1870
1871 SeqArray::iterator mainBeginIt = seqArray.begin() + 1;
1872 SeqArray::iterator mainEndIt = seqArray.end();
1873
1874 SeqArray::iterator pasteBeginIt = realignedSeqs.begin() + 1;
1875 SeqArray::iterator pasteEndIt = realignedSeqs.end();
1876
1877 vector<int>::iterator begin, end, beginRange, endRange;
1878
1879 for(; mainBeginIt != mainEndIt && pasteBeginIt != pasteEndIt; mainBeginIt++)
1880 {
1881 vector<int> vecToAdd;
1882 begin = mainBeginIt->begin() + 1;
1883 end = mainBeginIt->end();
1884 beginRange = begin + beginPos;
1885 endRange = begin + endPos + 1;
1886
1887 if(endRange < end && beginRange < end)
1888 {
1889 mainBeginIt->erase(beginRange, endRange);
1890 mainBeginIt->insert(beginRange, pasteBeginIt->begin() + 1,
1891 pasteBeginIt->end());
1892 }
1893 else
1894 {
1895 return false;
1896 }
1897 pasteBeginIt++;
1898 }
1899 return true;
1900 }
1901
reloadAlignment()1902 bool Alignment::reloadAlignment()
1903 {
1904 if(getNumSeqs() <= 0)
1905 {
1906 return false;
1907 }
1908 if(userParameters->getOutputOrder() == INPUT)
1909 {
1910 return true;
1911 }
1912 if((int)outputIndex.size() != getNumSeqs())
1913 {
1914 outputIndex.clear();
1915 return false;
1916 }
1917 vector<int> emptyVec;
1918 string emptyString = "";
1919 SeqArray outputOrderSeqArray;
1920 outputOrderSeqArray.resize(getNumSeqs() + 1);
1921 outputOrderSeqArray[0] = emptyVec;
1922 vector<string> outputOrderNames;
1923 outputOrderNames.resize(getNumSeqs() + 1);
1924 outputOrderNames[0] = emptyString;
1925 vector<string> outputOrderTitles;
1926 outputOrderTitles.resize(getNumSeqs() + 1);
1927 outputOrderTitles[0] = emptyString;
1928 vector<unsigned long> outputOrderSequenceIds;
1929 outputOrderSequenceIds.resize(getNumSeqs() + 1);
1930 outputOrderSequenceIds[0] = 0;
1931
1932 int size = seqArray.size();
1933 if((seqArray.size() != names.size()) || (seqArray.size() != titles.size()) ||
1934 sequenceIds.size() != names.size())
1935 {
1936 return false;
1937 }
1938
1939 int _outIndex;
1940 // Now for each seq,
1941 for(int i = 1; i < size; i++)
1942 {
1943 if(i < (int)outputOrderSeqArray.size() && i - 1 < (int)outputIndex.size() &&
1944 outputIndex[i - 1] < size)
1945 {
1946 _outIndex = outputIndex[i - 1];
1947 outputOrderSeqArray[i] = seqArray[_outIndex];
1948 outputOrderNames[i] = names[_outIndex];
1949 outputOrderTitles[i] = titles[_outIndex];
1950 outputOrderSequenceIds[i] = sequenceIds[_outIndex];
1951 }
1952 else
1953 {
1954 return false;
1955 }
1956 }
1957
1958 // Now we have a copy in the correct order.
1959 // Remove all the elements from the old ones and set them to be these arrays
1960 clearSeqArray();
1961 seqArray = outputOrderSeqArray;
1962 names.clear();
1963 names = outputOrderNames;
1964 titles.clear();
1965 titles = outputOrderTitles;
1966 sequenceIds.clear();
1967 sequenceIds = outputOrderSequenceIds;
1968
1969 return true;
1970 }
1971
getSequenceFromUniqueId(unsigned long id)1972 const vector<int>* Alignment::getSequenceFromUniqueId(unsigned long id)
1973 {
1974 for(int i = 0; i < (int)sequenceIds.size(); i++)
1975 {
1976 if(sequenceIds[i] == id)
1977 {
1978 return getSequence(i);
1979 }
1980 }
1981
1982 // We have not found it, throw an exception!!!
1983 throw SequenceNotFoundException();
1984 }
1985
1986 /**
1987 * Change:
1988 * Mark 26-1-2007: This function was added to allow access to the unique id.
1989 */
updateSequence(int index,const vector<int> * seq)1990 void Alignment::updateSequence(int index, const vector<int>* seq)
1991 {
1992 if(index >= 1 && index < (int)seqArray.size())
1993 {
1994 seqArray[index] = *seq;
1995 }
1996 }
1997
1998 /**
1999 * Change:
2000 * Mark 17-2-2007: This function was added to check if a res is a gap or not.
2001 */
isGap(int seq,int col) const2002 bool Alignment::isGap(int seq, int col) const
2003 {
2004 int res = seqArray[seq][col];
2005 if(res == gapPos1 || res == gapPos2)
2006 {
2007 return true;
2008 }
2009 else
2010 {
2011 return false;
2012 }
2013 }
2014 /**
2015 * This function will be used so that we can create an alignment object from a seqArray.
2016 * This will be used for the tree iteration.
2017 */
addSequences(SeqArray * seqVector)2018 void Alignment::addSequences(SeqArray* seqVector)
2019 {
2020 clearAlignment();
2021 numSeqs = seqVector->size() - 1;
2022 vector<int> emptyVec;
2023
2024 seqArray.push_back(emptyVec); // EMPTY sequence
2025 names.push_back(string(""));
2026 titles.push_back(string(""));
2027 sequenceIds.push_back(0);
2028 cout << "\nThere are " << numSeqs << " in the alignment obj\n";
2029 for(int i = 1; i <= numSeqs; i++)
2030 {
2031 ostringstream name;
2032 seqArray.push_back((*seqVector)[i]);
2033 titles.push_back(string(""));
2034 sequenceIds.push_back(utilityObject->getUniqueSequenceIdentifier());
2035 name << "name" << numSeqs;
2036 names.push_back(name.str());
2037 }
2038
2039 calculateMaxLengths();
2040 seqWeight.resize(numSeqs + 1, 100);
2041 }
2042
2043 }
2044
2045