1 /**
2 * Author: Mark Larkin
3 *
4 * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson.
5 */
6 /**
7 * Changes:
8 *
9 * 10-02-07,Nigel Brown(EMBL): changed ifstream to InFileStream to handle
10 * cross-platform end-of-lines (for dendrograms, not for help file). Remerged
11 * this change 13-04-07.
12 *
13 * Mark 10-5-2007: Bug fix # 42. call getWeightsForQtLowScore function in
14 * QTcalcWeightsForLowScoreSeg instead of getWeightsFromDistMat.
15 *
16 * Mark 22-5-07, changed the distmatrix to be the size of alignObject.numSeqs
17 * Mark Change 20-6-07, added call to calculateMaxLengths()
18 * Mark, 3-7-07, Changed getHelp.
19 */
20 #ifdef HAVE_CONFIG_H
21 #include "config.h"
22 #endif
23
24 #include <iostream>
25 #include <fstream>
26 #include <cstdio>
27 #include "Clustal.h"
28 #include "pairwise/FullPairwiseAlign.h"
29 #include "pairwise/FastPairwiseAlign.h"
30 #include "multipleAlign/MSA.h"
31 #include "multipleAlign/LowScoreSegProfile.h"
32 #include "multipleAlign/Iteration.h"
33 #include "tree/TreeInterface.h"
34 #include "general/debuglogObject.h"
35 #include "general/statsObject.h"
36 #include "alignment/ObjectiveScore.h"
37 #include "general/ClustalWResources.h"
38 #include "Help.h"
39 #include <ctime>
40
41 using namespace std;
42
43 namespace clustalw
44 {
45
Clustal()46 Clustal::Clustal()
47 {
48 #ifdef WINDOWS
49 helpFileName = string("clustalw.hlp");
50 #else
51 helpFileName = string("clustalw_help");
52 #endif
53
54 checkTree = true;
55 newSeq = 0;
56 sequencesMsg = "\nUse the existing GUIDE TREE file, ";
57 profile1Msg = "\nUse the existing GUIDE TREE file for Profile 1, " ;
58 profile2Msg = "\nUse the existing GUIDE TREE file for Profile 2, ";
59
60 newProfile1TreePrompt = "\nEnter name for new GUIDE TREE file for profile 1 [";
61 newProfile2TreePrompt = "\nEnter name for new GUIDE TREE file for profile 2 [";
62
63 initInterface();
64 }
65
66 /** ***********************************************************************
67 * The function align is used to do a full multiple sequence alignment. *
68 **************************************************************************/
69 // FIXME: merge doAlignUseOldTree in here
align(string * phylipName,bool createOutput)70 void Clustal::align(string* phylipName, bool createOutput)
71 {
72 //time_t start, end;
73 //double dif;
74 //start = time (NULL);
75 //ObjectiveScore score;
76 //double _score = score.getSagaScore(&alignmentObj);
77 //cout << "SAGA score " << _score << "\n";
78
79 string path;
80 int count;
81 AlignmentOutput alignOutput;
82
83 if(userParameters->getEmpty() && userParameters->getMenuFlag())
84 {
85 utilityObject->error("No sequences in memory. Load sequences first.");
86 return;
87 }
88
89 userParameters->setStructPenalties1(NONE);
90 userParameters->setStructPenalties2(NONE);
91
92 alignmentObj.clearSecStruct1();
93 alignmentObj.clearSecStruct2();
94
95 utilityObject->getPath(userParameters->getSeqName(), &path);
96
97 if(createOutput && (userParameters->getMenuFlag() || !userParameters->getInteractive()))
98 {
99 if(!alignOutput.openAlignmentOutput(path))
100 {
101 return;
102 }
103 }
104 else if(createOutput)
105 {
106 // We are using clustalQT.
107 // Open all the files.
108 if(!alignOutput.QTOpenFilesForOutput(QTFileNames))
109 {
110 return; // could not open the files.
111 }
112 }
113
114 if (userParameters->getSaveParameters())
115 {
116 userParameters->createParameterOutput();
117 }
118
119 if(userParameters->getResetAlignmentsNew() || userParameters->getResetAlignmentsAll())
120 {
121 alignmentObj.resetAlign();
122 }
123 if(userParameters->getDisplayInfo())
124 {
125 cout << "Start of Pairwise alignments\n";
126 cout << "Aligning...\n";
127 }
128 if(userParameters->getDNAFlag())
129 {
130 userParameters->setDNAParams();
131 }
132 else
133 {
134 userParameters->setProtParams();
135 }
136
137 if (statsObject->isEnabled()) {
138 statsObject->logInputSeqStats(&alignmentObj);
139 }
140
141
142 /// STEP 1: PAIRWISE ALIGNMENT TO GENERATE DISTANCE MATRIX
143
144 SymMatrix distMat;
145 int _numSeqs = alignmentObj.getNumSeqs();
146 distMat.ResizeRect(_numSeqs + 1);
147
148 PairwiseAlignBase* pairwiseDist;
149 if (userParameters->getQuickPairAlign())
150 {
151 pairwiseDist = new FastPairwiseAlign();
152 }
153 else
154 {
155 pairwiseDist = new FullPairwiseAlign();
156 }
157 // Generate distance matrix!
158 pairwiseDist->pairwiseAlign(&alignmentObj, &distMat, 0, _numSeqs, 0, _numSeqs);
159 delete pairwiseDist;
160
161 bool success = false;
162 auto_ptr<AlignmentSteps> progSteps;
163 vector<int> seqWeight(_numSeqs + 1);
164
165 #if DEBUGFULL
166 if(logObject && DEBUGLOG)
167 {
168 logObject->logMsg("Calling getWeightsAndStepsFromDistMat\n");
169 }
170 #endif
171
172 TreeInterface calcSteps;
173 progSteps = calcSteps.getWeightsAndStepsFromDistMat(&seqWeight, &distMat, &alignmentObj,
174 1, _numSeqs, phylipName, &success);
175 //cout << "weights and steps calculated!\n";
176 //end = time (NULL);
177 //dif = difftime(end, start);
178 //cout << "It took " << dif << " seconds so Far\n";
179
180 if(!success)
181 {
182 #if DEBUGFULL
183 if(logObject && DEBUGLOG)
184 {
185 logObject->logMsg("Unsuccessful!!!\n");
186 }
187 #endif
188 return;
189 }
190 #if DEBUGFULL
191 if(logObject && DEBUGLOG)
192 {
193 logObject->logMsg("doing multiSeqAlign\n");
194 }
195 #endif
196 MSA* msaObj = new MSA();
197 count = msaObj->multiSeqAlign(&alignmentObj, &distMat, &seqWeight, progSteps.get(), 0);
198 delete msaObj;
199 //cout << "alignment finished!\n";
200
201 //end = time (NULL);
202 //dif = difftime(end, start);
203 //cout << "It took " << dif << " seconds so Far\n";
204
205 if (count <= 0)
206 {
207 return;
208 }
209
210 if (userParameters->getMenuFlag())
211 {
212 cout << "\n\n\n";
213 }
214
215 /// Do iteration to improve alignment!!!
216 if(userParameters->getDoRemoveFirstIteration() == ALIGNMENT)
217 {
218 //userParameters->setNumIterations(_numSeqs * 2);
219 Iteration iterateObj;
220 iterateObj.removeFirstIterate(&alignmentObj);
221 alignmentObj.calculateMaxLengths(); // Mark Change 20-6-07
222 if(userParameters->getDisplayInfo())
223 cout << "Finished iteration\n";
224 }
225
226 if (statsObject->isEnabled()) {
227 statsObject->logAlignedSeqStats(&alignmentObj);
228 }
229
230
231
232 /// STEP 4: OUTPUT THE ALIGNMENT
233 if(createOutput)
234 {
235 alignOutput.createAlignmentOutput(&alignmentObj, 1, _numSeqs);
236 }
237 (*phylipName) = "";
238
239 //end = time (NULL);
240 //dif = difftime(end, start);
241 //cout << "It took " << dif << " seconds\n";
242 }
243
244 /** ****************************************************************************
245 * The function sequencesAlignToProfile is used to align a set of sequences to *
246 * a profile *
247 *******************************************************************************/
sequencesAlignToProfile(string * phylipName)248 void Clustal::sequencesAlignToProfile(string* phylipName)
249 {
250 string path;
251 string treeName;
252 bool useTree;
253 int i, j, count;
254 float dscore;
255 bool saveSS2;
256 AlignmentOutput alignOutput;
257
258 if(userParameters->getProfile1Empty() && userParameters->getMenuFlag())
259 {
260 utilityObject->error("No profile in memory. Input 1st profile first.\n");
261 return;
262 }
263
264 if(userParameters->getProfile2Empty() && userParameters->getMenuFlag())
265 {
266 utilityObject->error("No sequences in memory. Input sequences first.\n");
267 return;
268 }
269
270 utilityObject->getPath(userParameters->getProfile2Name(), &path);
271
272 if(userParameters->getMenuFlag() || !userParameters->getInteractive())
273 {
274 if(!alignOutput.openAlignmentOutput(path))
275 {
276 return;
277 }
278 }
279 else
280 {
281 // We are using clustalQT.
282 // Open all the files.
283 if(!alignOutput.QTOpenFilesForOutput(QTFileNames))
284 {
285 return; // could not open the files.
286 }
287 }
288
289 newSeq = alignmentObj.getProfile1NumSeqs() + 1;
290
291 // check for secondary structure information for list of sequences
292
293 saveSS2 = userParameters->getUseSS2();
294
295 if (userParameters->getStructPenalties2() != NONE && userParameters->getUseSS2() == true
296 && (alignmentObj.getNumSeqs() - alignmentObj.getProfile1NumSeqs() > 1))
297 {
298 if (userParameters->getStructPenalties2() == SECST)
299 {
300 utilityObject->warning("\n\nWARNING: ignoring secondary structure for a list of sequences\n\n");
301 }
302 else if (userParameters->getStructPenalties2() == GMASK)
303 {
304 utilityObject->warning("\n\nWARNING: ignoring gap penalty mask for a list of sequences\n\n");
305 }
306 userParameters->setUseSS2(false);
307 }
308
309 DistMatrix distMat;
310 int _numSeqs = alignmentObj.getNumSeqs();
311 distMat.ResizeRect(_numSeqs + 1);
312
313 //
314 // Convert to similarities!!!!!!!!
315 // This is calcualting the similarities of the sequences in the profile part
316 //
317 for (i = 1; i <= newSeq; i++)
318 {
319 for (j = i + 1; j <= newSeq; j++)
320 {
321 dscore = alignmentObj.countid(i,j);
322 distMat(i, j) = ((double)100.0 - (double)dscore)/(double)100.0;
323 distMat(j, i) = distMat(i, j);
324 }
325 }
326 //distMat.printArray();
327
328 InFileStream _treeFile; //nige
329
330 useTree = false;
331 //
332 // Note put this into a separate function!!!!!
333 //
334 if (_numSeqs >= 2)
335 {
336 useTree = useExistingGuideTree(Sequences, phylipName, path);
337 }
338
339 if (userParameters->getSaveParameters())
340 {
341 userParameters->createParameterOutput();
342 }
343
344 if(userParameters->getResetAlignmentsNew() || userParameters->getResetAlignmentsAll())
345 {
346 alignmentObj.resetProfile2();
347 }
348 else
349 {
350 alignmentObj.fixGaps();
351 }
352
353 int _length = 0;
354 if (userParameters->getStructPenalties1() == SECST)
355 {
356 _length = alignmentObj.getSeqLength(1);
357 calcGapPenaltyMask(_length, alignmentObj.getSecStructMask1(),
358 alignmentObj.getGapPenaltyMask1());
359 }
360 if (userParameters->getStructPenalties2() == SECST)
361 {
362 _length = alignmentObj.getSeqLength(alignmentObj.getProfile1NumSeqs() + 1);
363 calcGapPenaltyMask(_length, alignmentObj.getSecStructMask2(),
364 alignmentObj.getGapPenaltyMask2());
365 }
366
367 // PROGRESSIVE ALIGNMENT ALGORITHM //
368 vector<int> seqWeights(_numSeqs + 1);
369
370 bool success = false;
371
372 if (useTree == false) // create the new tree file, if necessary
373 {
374 if(userParameters->getDisplayInfo())
375 {
376 cout << "Start of Pairwise alignments\n";
377 cout << "Aligning...\n";
378 }
379
380 if(userParameters->getDNAFlag())
381 {
382 userParameters->setDNAParams();
383 }
384 else
385 {
386 userParameters->setProtParams();
387 }
388
389 // STEP 1: CALCULATE DISTANCE MATRIX USING PAIRWISE ALIGNMENT //
390 PairwiseAlignBase* pairwiseDist;
391 if (userParameters->getQuickPairAlign())
392 {
393 pairwiseDist = new FastPairwiseAlign();
394 }
395 else
396 {
397 pairwiseDist = new FullPairwiseAlign();
398 }
399
400 // Generate distance matrix!
401 pairwiseDist->pairwiseAlign(&alignmentObj, &distMat, 0, _numSeqs, newSeq-2, _numSeqs);
402 delete pairwiseDist;
403
404 if(userParameters->getDisplayInfo())
405 cout << "\n\n";
406
407 TreeInterface calcSeqWeights;
408 calcSeqWeights.getWeightsFromDistMat(&seqWeights, &distMat, &alignmentObj, 1,
409 _numSeqs, phylipName, &success);
410 }
411 else
412 {
413 TreeInterface calcSeqWeights;
414 calcSeqWeights.getWeightsFromGuideTree(&alignmentObj, &distMat, phylipName,
415 &seqWeights, 1, _numSeqs, &success);
416 }
417
418 if(!success)
419 {
420 return;
421 }
422 // If it is true, call the function here to get the seqweights etc from it.
423
424 // if users request only the guide tree, return
425 if (userParameters->getNewTreeFile())
426 {
427 return;
428 }
429
430
431 MSA* msaObj = new MSA();
432 count = msaObj->seqsAlignToProfile(&alignmentObj, &distMat, &seqWeights, newSeq - 2,
433 *phylipName);
434 delete msaObj;
435
436 userParameters->setUseSS2(saveSS2);
437
438 if (count <= 0)
439 {
440 return;
441 }
442
443 if (userParameters->getMenuFlag())
444 {
445 cout << "\n\n\n";
446 }
447
448 /// STEP 4: OUTPUT THE ALIGNMENT //
449 alignOutput.createAlignmentOutput(&alignmentObj, 1, _numSeqs);
450
451 (*phylipName) = "";
452 }
453
454 /** ****************************************************************************
455 * The function profileAlign is used to align two profiles *
456 *******************************************************************************/
profileAlign(string * p1TreeName,string * p2TreeName)457 void Clustal::profileAlign(string* p1TreeName, string* p2TreeName)
458 {
459 string path;
460 //string treeName;
461 bool useTree1, useTree2;
462 int count, i, j, dscore;
463 int _profile1NumSeqs = alignmentObj.getProfile1NumSeqs();
464 AlignmentOutput alignOutput;
465
466 if(userParameters->getProfile1Empty() || userParameters->getProfile2Empty())
467 {
468 utilityObject->error("No sequences in memory. Load sequences first.\n\n");
469 return;
470 }
471
472 utilityObject->getPath(userParameters->getProfile1Name(), &path);
473
474 if(userParameters->getMenuFlag() || !userParameters->getInteractive())
475 {
476 if(!alignOutput.openAlignmentOutput(path))
477 {
478 return;
479 }
480 }
481 else
482 {
483 // We are using clustalQT.
484 // Open all the files.
485 if(!alignOutput.QTOpenFilesForOutput(QTFileNames))
486 {
487 return; // could not open the files.
488 }
489 }
490
491 if(userParameters->getResetAlignmentsNew() || userParameters->getResetAlignmentsAll())
492 {
493 alignmentObj.resetProfile1();
494 alignmentObj.resetProfile2();
495 }
496 else
497 {
498 alignmentObj.fixGaps();
499 }
500
501 // Check if there exists a tree for profile1
502 useTree1 = false;
503
504 if (_profile1NumSeqs >= 2)
505 {
506 useTree1 = useExistingGuideTree(Profile1, p1TreeName, path);
507 }
508
509 // Check if there exists a tree for profile2
510 useTree2 = false;
511
512 utilityObject->getPath(userParameters->getProfile2Name(), &path);
513
514 if (alignmentObj.getNumSeqs() - _profile1NumSeqs >= 2)
515 {
516 useTree2 = useExistingGuideTree(Profile2, p2TreeName, path);
517 }
518
519 if (userParameters->getSaveParameters())
520 {
521 userParameters->createParameterOutput();
522 }
523 int _length = 0;
524 if (userParameters->getStructPenalties1() == SECST)
525 {
526 _length = alignmentObj.getSeqLength(1);
527 calcGapPenaltyMask(_length, alignmentObj.getSecStructMask1(),
528 alignmentObj.getGapPenaltyMask1());
529 }
530
531 if (userParameters->getStructPenalties2() == SECST)
532 {
533 _length = alignmentObj.getSeqLength(_profile1NumSeqs + 1);
534 calcGapPenaltyMask(_length, alignmentObj.getSecStructMask2(),
535 alignmentObj.getGapPenaltyMask2());
536 }
537
538 // Declare the distance matrix
539 DistMatrix distMat;
540 int _numSeqs = alignmentObj.getNumSeqs();
541 distMat.ResizeRect(_numSeqs + 1);
542
543 if (useTree1 == false)
544 {
545 if (_profile1NumSeqs >= 2)
546 {
547 for (i = 1; i <= _profile1NumSeqs; i++)
548 {
549 for (j = i + 1; j <= _profile1NumSeqs; j++)
550 {
551 dscore = static_cast<int>(alignmentObj.countid(i,j));
552 distMat(i, j) = (100.0 - dscore)/100.0;
553 distMat(j, i) = distMat(i, j);
554 }
555 }
556 utilityObject->getPath(userParameters->getProfile1Name(), &path);
557
558 // We need to get the name of the file first because the message is different!
559 if(userParameters->getMenuFlag())
560 {
561 promptForNewGuideTreeName(Profile1, p1TreeName, path);
562 }
563 else
564 {
565 string treeName;
566 treeName = path + "dnd";
567 p1TreeName = new string(treeName);
568 }
569 }
570
571 if (useTree2 == false)
572 {
573 if(_numSeqs - _profile1NumSeqs >= 2)
574 {
575 for (i = 1 + _profile1NumSeqs; i <= _numSeqs; i++)
576 {
577 for (j = i + 1; j <= _numSeqs; j++)
578 {
579 dscore = static_cast<int>(alignmentObj.countid(i,j));
580 distMat(i, j) = (100.0 - dscore) / 100.0;
581 distMat(j, i) = distMat(i, j);
582 }
583 }
584
585 utilityObject->getPath(userParameters->getProfile2Name(), &path);
586
587 if(userParameters->getMenuFlag())
588 {
589 promptForNewGuideTreeName(Profile2, p2TreeName, path);
590 }
591 else
592 {
593 string treeName;
594 treeName = path + "dnd";
595 p2TreeName = new string(treeName);
596 }
597
598 }
599 }
600 }
601
602 bool success = false;
603 vector<int> prof1Weight, prof2Weight;
604 prof1Weight.resize(_profile1NumSeqs);
605 prof2Weight.resize(_numSeqs);
606 TreeInterface tree;
607 tree.getWeightsForProfileAlign(&alignmentObj, &distMat, p1TreeName, &prof1Weight,
608 p2TreeName, &prof2Weight, _numSeqs, _profile1NumSeqs,
609 useTree1, useTree2, &success);
610 if(!success)
611 {
612 return;
613 }
614
615 // do an initial alignment to get the pairwise identities between the two
616 // profiles - used to set parameters for the final alignment
617 MSA* msaObj = new MSA();
618
619 alignmentObj.resetProfile1();
620 alignmentObj.resetProfile2();
621
622 count = msaObj->doProfileAlign(&alignmentObj, &distMat, &prof1Weight, &prof2Weight);
623 delete msaObj;
624
625 if (count == 0)
626 {
627 return;
628 }
629 if(userParameters->getMenuFlag())
630 {
631 cout << "\n\n\n";
632 }
633
634 alignOutput.createAlignmentOutput(&alignmentObj, 1, _numSeqs);
635
636 (*p1TreeName) = "";
637 (*p2TreeName) = "";
638 }
639
doGuideTreeOnly(string * phylipName)640 void Clustal::doGuideTreeOnly(string* phylipName)
641 {
642 string path;
643 if(userParameters->getEmpty())
644 {
645 utilityObject->error("No sequences in memory. Load sequences first.\n");
646 return;
647 }
648
649 userParameters->setStructPenalties1(NONE);
650 userParameters->setStructPenalties2(NONE);
651
652 alignmentObj.clearSecStruct1();
653 alignmentObj.clearSecStruct2();
654
655 if(userParameters->getResetAlignmentsNew() || userParameters->getResetAlignmentsAll())
656 {
657 alignmentObj.resetAlign();
658 }
659
660 utilityObject->getPath(userParameters->getSeqName(), &path);
661 int _numSeqs = alignmentObj.getNumSeqs();
662
663 if (_numSeqs < 2)
664 {
665 utilityObject->error("Less than 2 sequences in memory. Phylogenetic tree cannot be built.\n");
666 return;
667 }
668
669 if (userParameters->getSaveParameters())
670 {
671 userParameters->createParameterOutput();
672 }
673
674 if(userParameters->getDisplayInfo())
675 {
676 cout << "Start of Pairwise alignments\n";
677 cout << "Aligning...\n";
678 }
679
680 if(userParameters->getDNAFlag())
681 {
682 userParameters->setDNAParams();
683 }
684 else
685 {
686 userParameters->setProtParams();
687 }
688
689 ///STEP 1: PAIRWISE ALIGNMENT TO GENERATE DISTANCE MATRIX //
690
691 DistMatrix distMat;
692 distMat.ResizeRect(_numSeqs + 1);
693
694 PairwiseAlignBase* pairwiseDist;
695 if (userParameters->getQuickPairAlign())
696 {
697 pairwiseDist = new FastPairwiseAlign();
698 }
699 else
700 {
701 pairwiseDist = new FullPairwiseAlign();
702 }
703 // Generate distance matrix!
704 pairwiseDist->pairwiseAlign(&alignmentObj, &distMat, 0, _numSeqs, 0, _numSeqs);
705 delete pairwiseDist;
706
707 /* AW DEBUG
708 fprintf(stdout, "\nDEBUG: distance matrix following...\n");
709 distMat.printArray();
710 */
711
712 bool success = false;
713 TreeInterface tree;
714 tree.generateTreeFromDistMat(&distMat, &alignmentObj, 1, _numSeqs, phylipName, &success);
715
716 if(userParameters->getResetAlignmentsNew() || userParameters->getResetAlignmentsAll())
717 {
718 alignmentObj.resetAlign();
719 }
720
721 (*phylipName) = "";
722 }
723
724
725 // FIXME this is to 90% identical with align(), please merge
doAlignUseOldTree(string * phylipName)726 void Clustal::doAlignUseOldTree(string* phylipName)
727 {
728 string path;
729 int count;
730 AlignmentOutput alignOutput;
731
732 if(userParameters->getEmpty())
733 {
734 utilityObject->error("No sequences in memory. Load sequences first.\n");
735 return;
736 }
737
738 userParameters->setStructPenalties1(NONE);
739 userParameters->setStructPenalties2(NONE);
740
741 alignmentObj.clearSecStruct1();
742 alignmentObj.clearSecStruct2();
743
744 utilityObject->getPath(userParameters->getSeqName(), &path);
745
746 if(userParameters->getMenuFlag() || !userParameters->getInteractive())
747 {
748 if(!alignOutput.openAlignmentOutput(path))
749 {
750 return;
751 }
752 }
753 else
754 {
755 // We are using clustalQT.
756 // Open all the files.
757 if(!alignOutput.QTOpenFilesForOutput(QTFileNames))
758 {
759 return; // could not open the files.
760 }
761 }
762
763 if(userParameters->getResetAlignmentsNew() || userParameters->getResetAlignmentsAll())
764 {
765 alignmentObj.resetAlign();
766 }
767
768 int _numSeqs = alignmentObj.getNumSeqs();
769 DistMatrix distMat;
770 distMat.ResizeRect(_numSeqs + 1);
771 utilityObject->getPath(userParameters->getSeqName(), &path);
772
773 if (_numSeqs >= 2)
774 {
775
776 if(userParameters->getMenuFlag())
777 {
778 phylipName = new string(path);
779 *phylipName = *phylipName + "dnd";
780
781 string message, answer;
782 message = "\nEnter a name for the guide tree file [" + *phylipName + "]";
783 utilityObject->getStr(message, answer);
784
785 if(!answer.empty())
786 {
787 phylipName = new string(answer);
788 }
789 }
790
791 if(userParameters->getMenuFlag() || !userParameters->getInteractive())
792 {
793 InFileStream _treeFile; //nige
794 _treeFile.open(phylipName->c_str());
795 if(!_treeFile.is_open())
796 {
797 utilityObject->error("Cannot open tree file [%s]\n", phylipName->c_str());
798 return;
799 }
800 _treeFile.close();
801 }
802 }
803 else
804 {
805 if(userParameters->getDisplayInfo())
806 {
807 cout << "Start of Pairwise alignments\n";
808 cout << "Aligning...\n";
809 }
810 if(userParameters->getDNAFlag())
811 {
812 userParameters->setDNAParams();
813 }
814 else
815 {
816 userParameters->setProtParams();
817 }
818
819 PairwiseAlignBase* pairwiseDist;
820 if (userParameters->getQuickPairAlign())
821 {
822 pairwiseDist = new FastPairwiseAlign();
823 }
824 else
825 {
826 pairwiseDist = new FullPairwiseAlign();
827 }
828 // Generate distance matrix!
829 pairwiseDist->pairwiseAlign(&alignmentObj, &distMat, 0, _numSeqs, 0, _numSeqs);
830 delete pairwiseDist;
831 }
832
833 if (userParameters->getSaveParameters())
834 {
835 userParameters->createParameterOutput();
836 }
837 auto_ptr<AlignmentSteps> progSteps;
838 vector<int> seqWeights(_numSeqs + 1);
839 bool success = false;
840 TreeInterface tree;
841 progSteps = tree.getWeightsAndStepsFromTree(&alignmentObj, &distMat, phylipName,
842 &seqWeights, 1, _numSeqs, &success);
843
844 if(!success)
845 {
846 return;
847 }
848
849 MSA* msaObj = new MSA();
850 count = msaObj->multiSeqAlign(&alignmentObj, &distMat, &seqWeights, progSteps.get(), 0);
851 delete msaObj;
852
853 if (count <= 0)
854 {
855 return;
856 }
857
858 if (userParameters->getMenuFlag())
859 {
860 cout << "\n\n\n";
861 }
862
863 // same as in align()
864 if(userParameters->getDoRemoveFirstIteration() == ALIGNMENT)
865 {
866 //userParameters->setNumIterations(_numSeqs * 2);
867 Iteration iterateObj;
868 iterateObj.removeFirstIterate(&alignmentObj);
869 alignmentObj.calculateMaxLengths(); // Mark Change 20-6-07
870 if(userParameters->getDisplayInfo())
871 cout << "Finished iteration\n";
872 }
873
874
875 alignOutput.createAlignmentOutput(&alignmentObj, 1, _numSeqs);
876
877 phylipName = new string("");
878 }
879
880
881
getFullHelp()882 void Clustal::getFullHelp()
883 {
884 vector<string> markers;
885 Help myhelp;
886 bool showtitle = true;
887
888 markers = myhelp.ListSectionMarkers();
889 for (unsigned int i=0; i<markers.size(); i++) {
890 string m = markers[i];
891 getHelp(m, showtitle);
892 }
893 }
894
getHelp(char helpPointer,bool printTitle)895 void Clustal::getHelp(char helpPointer, bool printTitle)
896 {
897 string s = "";
898 s += helpPointer;
899 Clustal::getHelp(s, printTitle);
900 }
901
902
903
904 /*
905 * Andreas Wilm (UCD): edited to support new help system (separate
906 * file now which is compiled in)
907 *
908 * Author?
909 * The clustal help file is called clustalw_help. Should be in the same
910 * directory. I have changed it to a C++ style implementation. I am taking
911 * out support for VMS until later. We will see if we are still supporting that platform.
912 */
getHelp(string helpPointer,bool printTitle)913 void Clustal::getHelp(string helpPointer, bool printTitle)
914 {
915 Help myhelp;
916 string helpString;
917
918 helpString = myhelp.GetSection(helpPointer);
919
920 if (printTitle) {
921 helpString = "\n\n>> HELP " +
922 helpPointer +
923 " << " +
924 myhelp.GetSectionTitle(helpPointer) +
925 "\n\n" +
926 helpString;
927 }
928
929
930 if(! userParameters->getMenuFlag())
931 {
932 cout << helpString;
933 }
934 else
935 {
936 string::size_type lastPos = 0;
937 string::size_type pos = helpString.find_first_of("\n", lastPos);
938 int nlines = 0;
939
940 while (pos != string::npos)
941 {
942 cout << helpString.substr(lastPos, pos - lastPos);
943 nlines++;
944
945 if(nlines >= PAGE_LEN)
946 {
947 char tempChar;
948 cout << "\nPress [RETURN] to continue or X to stop ";
949 cin.get(tempChar);
950 if(toupper(tempChar) == 'X')
951 {
952 return;
953 }
954 else
955 {
956 nlines = 0;
957 }
958 }
959
960 lastPos = pos; //helpString.find_first_not_of("\n", pos);
961 pos = helpString.find_first_of("\n", lastPos+1);
962 //cerr << "DEBUG: pos=" << pos << " lastPos=" << lastPos << "/" << helpString.length() << "\n";
963 }
964 }
965 }
966
967
968
969 /**
970 * The wrap functions will be used with interface classes to perform the tasks
971 * that were previously done there.
972 */
sequenceInput(bool append,string * offendingSeq)973 int Clustal::sequenceInput(bool append, string *offendingSeq)
974 {
975 int code;
976 // If we are not appending, we need to clear the Alignment object.
977 if(!append)
978 {
979 alignmentObj.clearAlignment();
980 }
981
982 FileReader readSeqFile;
983 code = readSeqFile.seqInput(&alignmentObj, append, offendingSeq);
984 return code;
985 }
986
987 /**
988 * profile1Input is a wrapper function for the profileInput. This is because the
989 * other classes dont have access to FileReader
990 */
profile1Input(string profile1Name)991 int Clustal::profile1Input(string profile1Name)
992 {
993 int code;
994 // I need to clear out the Alignment object.
995 alignmentObj.clearAlignment();
996 userParameters->setProfileNum(1);
997
998 userParameters->setSeqName(profile1Name);
999 userParameters->setProfile1Name(profile1Name);
1000
1001 FileReader readProfileFile;
1002 code = readProfileFile.profileInput(&alignmentObj);
1003
1004 // If we are using the commandline check if there are seqs!
1005 if(!userParameters->getInteractive())
1006 {
1007 // AW: FIXME code should be handled higher up the stack and check all codes
1008 // also, shouldnt we use utilityObject->error()?
1009 if(code != OK)
1010 {
1011 if (code==NOSEQUENCESINFILE)
1012 cerr << "ERROR: There are no sequences in profile2 file." << std::endl;
1013 else if (code==ALLNAMESNOTDIFFERENT)
1014 cerr << "ERROR: Not all sequence names are different" << std::endl;
1015 else
1016 cerr << "ERROR: Unhandled error code (" << code << ") returned from profileInput.\n";
1017 // AW: should we really exit here? What if called from clustalx?
1018 exit(2);
1019 }
1020 }
1021
1022 return code;
1023 }
1024
1025 /**
1026 * profile2Input is a wrapper function for the profileInput. This is because the
1027 * other classes dont have access to FileReader
1028 */
profile2Input(string profile2Name)1029 int Clustal::profile2Input(string profile2Name)
1030 {
1031 int code;
1032
1033 if(userParameters->getProfileNum() == 2)
1034 {
1035 // Remove the sequences from the previous one.
1036 int numSeqsProfile1 = alignmentObj.getProfile1NumSeqs();
1037 alignmentObj.resizeSeqArray(numSeqsProfile1 + 1);
1038 // Clear anything from profile2 in alignment.
1039 alignmentObj.clearSecStruct2();
1040 }
1041 userParameters->setProfileNum(2);
1042
1043 userParameters->setSeqName(profile2Name);
1044 userParameters->setProfile2Name(profile2Name);
1045
1046
1047 FileReader readProfileFile;
1048 cout << "before profileInput\n";
1049 code = readProfileFile.profileInput(&alignmentObj);
1050 cout << "after profileInput\n";
1051
1052 if(!userParameters->getInteractive())
1053 {
1054 // AW: FIXME code should be handled higher up the stack and check all codes
1055 // also, shouldnt we use utilityObject->error()?
1056 if(code != OK) {
1057 if (code==NOSEQUENCESINFILE)
1058 cerr << "ERROR: There are no sequences in profile2 file." << std::endl;
1059 else if (code==ALLNAMESNOTDIFFERENT)
1060 cerr << "ERROR: Not all sequence names are different" << std::endl;
1061 else
1062 cerr << "ERROR: Unhandled error code (" << code << ") returned from profileInput.\n";
1063 // AW: should we really exit here? What if called from clustalx?
1064 // DD: fixed
1065 if(!userParameters->getGui())
1066 exit(2);
1067 }
1068 }
1069 return code;
1070 }
1071
1072
1073 /**
1074 * The function commandline_readseq is called by the command line to
1075 * read in the sequences. It also prints out the names. This was
1076 * previously done by the command line parser.
1077 */
commandLineReadSeq(int firstSeq)1078 int Clustal::commandLineReadSeq(int firstSeq)
1079 {
1080 // Clear the alignment, although obviously there shouldnt be anything in it.
1081 alignmentObj.clearAlignment();
1082 userParameters->setProfileNum(0);
1083 int code = 0;
1084 string offendingSeq;
1085 FileReader readInputFile;
1086 code = readInputFile.readSeqs(&alignmentObj, firstSeq, &offendingSeq);
1087
1088 if(code != OK)
1089 {
1090 if(code == CANNOTOPENFILE)
1091 {
1092 utilityObject->error("Cannot open input file. No alignment!\n");
1093 }
1094 else if(code == NOSEQUENCESINFILE)
1095 {
1096 utilityObject->error("No sequences in file. No alignment!\n");
1097 }
1098 else if(code == ALLNAMESNOTDIFFERENT)
1099 {
1100 utilityObject->error("Multiple sequences found with same name (found %s at least twice)!", offendingSeq.c_str());
1101 }
1102 else if(code == EMPTYSEQUENCE)
1103 {
1104 utilityObject->error("Empty sequences found: %s\n", offendingSeq.c_str());
1105 }
1106 else if(code == SEQUENCETOOBIG)
1107 {
1108 utilityObject->error("Sequence(s) too big: %s\n", offendingSeq.c_str());
1109 }
1110 else if(code == BADFORMAT)
1111 {
1112 utilityObject->error("Sequences are badly formatted!\n");
1113 }
1114 else
1115 {
1116 utilityObject->error("\nThere was a problem reading in the file. No alignment!\n");
1117 }
1118 exit(-1);
1119 }
1120
1121 alignmentObj.printSequencesAddedInfo();
1122
1123 userParameters->setEmpty(false);
1124 return code;
1125 }
1126
1127 /*
1128 * The function outputNow is used to output the alignment. It can be called by the
1129 * menu or command line parser.
1130 */
outputNow()1131 void Clustal::outputNow()
1132 {
1133 if(alignmentObj.getNumSeqs() > 0)
1134 {
1135 string path = "";
1136 if(!userParameters->getMenuFlag())
1137 {
1138 string _seqName = userParameters->getSeqName();
1139 utilityObject->getPath(_seqName, &path);
1140 }
1141 AlignmentOutput alignmentOutput;
1142 alignmentOutput.openAlignmentOutput(path);
1143
1144 alignmentOutput.createAlignmentOutput(&alignmentObj, 1, alignmentObj.getNumSeqs());
1145 }
1146 else
1147 {
1148 utilityObject->error("No sequences have been loaded\n");
1149 }
1150 }
1151
phylogeneticTree(string * phylipName,string * clustalName,string * distName,string * nexusName,string pimName)1152 void Clustal::phylogeneticTree(string* phylipName, string* clustalName, string* distName,
1153 string* nexusName, string pimName)
1154 {
1155 TreeNames treeNames;
1156 treeNames.clustalName = *clustalName;
1157 treeNames.distName = *distName;
1158 treeNames.nexusName = *nexusName;
1159 treeNames.phylipName = *phylipName;
1160 treeNames.pimName = pimName;
1161 TreeInterface tree;
1162 tree.treeFromAlignment(&treeNames, &alignmentObj);
1163 }
1164
bootstrapTree(string * phylipName,string * clustalName,string * nexusName)1165 void Clustal::bootstrapTree(string* phylipName, string* clustalName, string* nexusName)
1166 {
1167 TreeNames treeNames;
1168 treeNames.clustalName = *clustalName;
1169 treeNames.nexusName = *nexusName;
1170 treeNames.phylipName = *phylipName;
1171 TreeInterface tree;
1172 tree.bootstrapTree(&treeNames, &alignmentObj);
1173 }
1174
initInterface()1175 void Clustal::initInterface()
1176 {
1177 userParameters->setEmpty(true);
1178 userParameters->setProfile1Empty(true);
1179 userParameters->setProfile2Empty(true);
1180 }
1181
1182 /**
1183 * The function calcGapPenaltyMask is used to calculate the gapMask from the secondary
1184 * structure information.
1185 */
calcGapPenaltyMask(int prfLength,vector<char> * mask,vector<char> * gapMask)1186 void Clustal::calcGapPenaltyMask(int prfLength, vector<char>* mask, vector<char>* gapMask)
1187 {
1188 int i,j;
1189
1190 vector<char> structMask;
1191 structMask.resize(prfLength + 1);
1192
1193 int _helixEndPlus = userParameters->getHelixEndPlus();
1194 int _helixEndMinus = userParameters->getHelixEndMinus();
1195 int _strandEndPlus = userParameters->getStrandEndPlus();
1196 int _strandEndMinus = userParameters->getStrandEndMinus();
1197
1198 i = 0;
1199 while (i < prfLength)
1200 {
1201 if (tolower((*mask)[i]) == 'a' || (*mask)[i] == '$')
1202 {
1203 for (j = -_helixEndPlus; j < 0; j++)
1204 {
1205 if(i + j < (int)structMask.size())
1206 {
1207 if ((i + j >= 0) && (tolower(structMask[i + j]) != 'a') &&
1208 (tolower(structMask[i + j]) != 'b'))
1209 {
1210 structMask[i + j] = 'a';
1211 }
1212 }
1213 }
1214 for (j = 0; j < _helixEndMinus; j++)
1215 {
1216 if (i + j >= prfLength || (tolower((*mask)[i + j]) != 'a'
1217 && (*mask)[i + j] != '$'))
1218 {
1219 break;
1220 }
1221 structMask[i + j] = 'a';
1222 }
1223 i += j;
1224 while (tolower((*mask)[i]) == 'a' || (*mask)[i] == '$')
1225 {
1226 if (i >= prfLength)
1227 {
1228 break;
1229 }
1230 if ((*mask)[i] == '$')
1231 {
1232 structMask[i] = 'A';
1233 i++;
1234 break;
1235 }
1236 else
1237 {
1238 structMask[i] = (*mask)[i];
1239 }
1240 i++;
1241 }
1242 for (j = 0; j < _helixEndMinus; j++)
1243 {
1244 if ((i - j - 1 >= 0) && (tolower((*mask)[i - j - 1]) == 'a'
1245 || (*mask)[i - j - 1] == '$'))
1246 {
1247 structMask[i - j - 1] = 'a';
1248 }
1249 }
1250 for (j = 0; j < _helixEndPlus; j++)
1251 {
1252 if (i + j >= prfLength)
1253 {
1254 break;
1255 }
1256 structMask[i+j] = 'a';
1257 }
1258 }
1259 else if (tolower((*mask)[i]) == 'b' || (*mask)[i] == '%')
1260 {
1261 for (j = -_strandEndPlus; j < 0; j++)
1262 {
1263 if ((i + j >= 0) && (tolower(structMask[i + j]) != 'a')
1264 && (tolower(structMask[i + j]) != 'b'))
1265 {
1266 structMask[i + j] = 'b';
1267 }
1268 }
1269 for (j = 0; j < _strandEndPlus; j++)
1270 {
1271 if (i + j >= prfLength || (tolower((*mask)[i + j]) != 'b'
1272 && (*mask)[i + j] != '%'))
1273 {
1274 break;
1275 }
1276 structMask[i+j] = 'b';
1277 }
1278 i += j;
1279 while (tolower((*mask)[i]) == 'b' || (*mask)[i] == '%')
1280 {
1281 if (i >= prfLength)
1282 {
1283 break;
1284 }
1285 if ((*mask)[i] == '%')
1286 {
1287 structMask[i] = 'B';
1288 i++;
1289 break;
1290 }
1291 else
1292 {
1293 structMask[i] = (*mask)[i];
1294 }
1295 i++;
1296 }
1297 for (j = 0; j < _strandEndMinus; j++)
1298 {
1299 if ((i-j-1>=0) && (tolower((*mask)[i-j-1]) == 'b' || (*mask)[i-j-1] == '%'))
1300 {
1301 structMask[i - j - 1] = 'b';
1302 }
1303 }
1304 for (j = 0; j < _strandEndPlus; j++)
1305 {
1306 if (i + j >= prfLength)
1307 {
1308 break;
1309 }
1310 structMask[i+j] = 'b';
1311 }
1312 }
1313 else
1314 {
1315 i++;
1316 }
1317 }
1318
1319 for(i = 0; i < prfLength;i++)
1320 {
1321 switch (structMask[i])
1322 {
1323 case 'A':
1324 (*gapMask)[i] = userParameters->getHelixPenalty() + '0';
1325 break;
1326 case 'a':
1327 (*gapMask)[i] = userParameters->getHelixEndPenalty() + '0';
1328 break;
1329 case 'B':
1330 (*gapMask)[i] = userParameters->getStrandPenalty() +'0';
1331 break;
1332 case 'b':
1333 (*gapMask)[i] = userParameters->getStrandEndPenalty() + '0';
1334 break;
1335 default:
1336 (*gapMask)[i] = userParameters->getLoopPenalty() + '0';
1337 break;
1338 }
1339 }
1340 }
1341
1342
1343 /**
1344 * The function QTcalcLowScoreSegments is used to calculate the residues in the sequences
1345 * that score badly.
1346 * @param firstSeq first seq in the alignment or profile
1347 * @param nSeqs the number of sequences
1348 * @param nCols the length of the longest seq
1349 * @param seqWeight a vector to hold the sequence weights
1350 * @param lowScoreRes
1351 * @param seqWeightCalculated
1352 */
QTcalcLowScoreSegments(LowScoreSegParams * params)1353 void Clustal::QTcalcLowScoreSegments(LowScoreSegParams* params)
1354 {
1355 int i, j;
1356 float sum, prevSum;
1357 float gscale;
1358 vector<int> weight;
1359 int sweight;
1360 vector<int> gaps;
1361 int matrix[NUMRES][NUMRES];
1362 vector<float> fsum;
1363 vector<float> bsum;
1364 vector<float> pscore;
1365 int _maxAA = userParameters->getMaxAA();
1366
1367 // STEP 1: Calculate the sequence weights
1368 QTcalcWeightsForLowScoreSeg(params);
1369
1370 subMatrix->getQTMatrixForLowScoreSeg(matrix);
1371
1372 const SeqArray* seqArray = alignmentObj.getSeqArray();
1373
1374 gaps.resize(params->nCols + 1);
1375
1376 for (j = 1; j <= params->nCols; j++)
1377 {
1378 gaps[j - 1] = 0;
1379 for(i = params->firstSeq + 1; i < params->firstSeq + params->nSeqs; i++)
1380 {
1381 if (j < alignmentObj.getSeqLength(i))
1382 {
1383 if (((*seqArray)[i][j] < 0) || ((*seqArray)[i][j] > _maxAA))
1384 {
1385 gaps[j-1]++;
1386 }
1387 }
1388 }
1389 }
1390
1391 // STEP 2: Calculate the profile
1392 LowScoreSegProfile lowScoreProfile(params->nCols, params->firstSeq,
1393 params->firstSeq + params->nSeqs);
1394
1395 weight.resize(params->firstSeq + params->nSeqs + 1);
1396
1397 for(i = params->firstSeq;i < params->firstSeq + params->nSeqs;i++)
1398 {
1399 weight[i] = (*(params->seqWeight))[i - params->firstSeq];
1400 }
1401
1402 lowScoreProfile.calcLowScoreSegProfile(seqArray, matrix, &weight);
1403
1404 const SeqArray* profile = lowScoreProfile.getProfilePtr();
1405
1406 sweight = 0;
1407 for(i = params->firstSeq; i < params->firstSeq + params->nSeqs; i++)
1408 {
1409 sweight += weight[i];
1410 }
1411
1412 //Now, use the profile scores to mark segments of each sequence which score badly.
1413
1414 fsum.resize(params->nCols + 2);
1415 bsum.resize(params->nCols + 2);
1416 pscore.resize(params->nCols + 2);
1417
1418 for(i = params->firstSeq + 1; i < params->firstSeq + params->nSeqs + 1; i++)
1419 {
1420 // In a forward phase, sum the profile scores. Mark negative sums as exceptions.
1421 //If the sum is positive, then it gets reset to 0.
1422 sum = 0.0;
1423 for(j = 1; j <= alignmentObj.getSeqLength(i); j++)
1424 {
1425 gscale = (float)(params->nSeqs - gaps[j - 1]) / (float)params->nSeqs;
1426 if((*seqArray)[i][j] < 0 || (*seqArray)[i][j] >= _maxAA)
1427 {
1428 pscore[j - 1] = 0.0;
1429 sum = 0.0;
1430 }
1431 else
1432 {
1433 pscore[j-1]=((*profile)[j][(*seqArray)[i][j]]- weight[i - 1] *
1434 matrix[(*seqArray)[i][j]][(*seqArray)[i][j]]) * gscale / sweight;
1435 }
1436 sum += pscore[j - 1];
1437 if(sum > 0.0)
1438 {
1439 sum = 0.0;
1440 }
1441 fsum[j - 1] = sum;
1442 }
1443 // trim off any positive scoring residues from the end of the segments
1444 prevSum = 0;
1445 for(j = alignmentObj.getSeqLength(i) - 1; j >= 0; j--)
1446 {
1447 if(prevSum >= 0.0 && fsum[j] < 0.0 && pscore[j] >= 0.0)
1448 {
1449 fsum[j] = 0.0;
1450 }
1451 prevSum = fsum[j];
1452 }
1453
1454 // Now, in a backward phase, do the same summing process.
1455 sum = 0.0;
1456 for(j = alignmentObj.getSeqLength(i); j >= 1; j--)
1457 {
1458 if((*seqArray)[i][j] < 0 || (*seqArray)[i][j] >= _maxAA)
1459 {
1460 sum = 0;
1461 }
1462 else
1463 {
1464 sum += pscore[j - 1];
1465 }
1466 if(sum > 0.0)
1467 {
1468 sum = 0.0;
1469 }
1470 bsum[j - 1] = sum;
1471 }
1472 // trim off any positive scoring residues from the start of the segments
1473 prevSum = 0;
1474 for(j = 0; j < alignmentObj.getSeqLength(i); j++)
1475 {
1476 if(prevSum >= 0.0 && bsum[j] < 0.0 && pscore[j] >= 0.0)
1477 {
1478 bsum[j] = 0.0;
1479 }
1480 prevSum = bsum[j];
1481 }
1482 //Mark residues as exceptions if they score negative in the forward AND backward directions.
1483 for(j = 1; j <= alignmentObj.getSeqLength(i); j++)
1484 {
1485 if(fsum[j - 1] < 0.0 && bsum[j - 1] < 0.0)
1486 {
1487 if((*seqArray)[i][j] >= 0 && (*seqArray)[i][j] < _maxAA)
1488 {
1489 (*(params->lowScoreRes))[i - params->firstSeq - 1][j - 1] = -1;
1490 }
1491 }
1492 }
1493 }
1494
1495 // Finally, apply the length cutoff to the segments - removing segments shorter
1496 //than the cutoff
1497
1498 QTremoveShortSegments(params);
1499
1500 }
1501
QTremoveShortSegments(LowScoreSegParams * params)1502 void Clustal::QTremoveShortSegments(LowScoreSegParams* params)
1503 {
1504 int i,j,k,start;
1505 //panel_data data;
1506
1507 //GetPanelExtra(p,&data);
1508 if(params->nSeqs <= 0)
1509 return;
1510
1511 // Reset all the exceptions - a value of 1 indicates an exception that
1512 // will be displayed. A value of -1 is used to remember exceptions that
1513 // are temporarily hidden in the display
1514
1515 for(i = 0; i < params->nSeqs; i++)
1516 {
1517 for(j = 0; j < params->nCols; j++)
1518 {
1519 if((*(params->lowScoreRes))[i][j] == -1)
1520 {
1521 (*(params->lowScoreRes))[i][j] = 1;
1522 }
1523 }
1524 }
1525
1526 for(i = 0; i < params->nSeqs; i++)
1527 {
1528 start = -1;
1529 for(j = 0; j <= params->nCols; j++)
1530 {
1531 if(start == -1)
1532 {
1533 if((*(params->lowScoreRes))[i][j] == 1)
1534 start = j;
1535 }
1536 else
1537 {
1538 if(j == params->nCols || (*(params->lowScoreRes))[i][j] == 0)
1539 {
1540 if(j - start < userParameters->getQTminLenLowScoreSegment())
1541 {
1542 for(k = start; k < j; k++)
1543 {
1544 (*(params->lowScoreRes))[i][k] = -1;
1545 }
1546 }
1547 start = -1;
1548 }
1549 }
1550 }
1551 }
1552 }
1553
1554 /**
1555 * Change: Mark 22-5-07, changed the distmatrix to be the size of alignObject.numSeqs
1556 */
QTcalcWeightsForLowScoreSeg(LowScoreSegParams * params)1557 void Clustal::QTcalcWeightsForLowScoreSeg(LowScoreSegParams* params)
1558 {
1559 int i, j;
1560 vector<int> weight;
1561 float dscore;
1562 DistMatrix distMat(alignmentObj.getNumSeqs() + 1); // Mark: changed size
1563 // Aw potential trouble here: what if we don't have write
1564 // permission to current directory?
1565 #ifdef UNIX
1566 char treeName[FILENAMELEN]=".score.ph";
1567 #else
1568 char treeName[FILENAMELEN]="tmp.ph";
1569 #endif
1570
1571 if(params->nSeqs <= 0)
1572 {
1573 return;
1574 }
1575
1576 // if sequence weights have been calculated before - don't bother
1577 //doing it again (it takes too long). data.seqweight is set to NULL when
1578 // new sequences are loaded. /
1579 if(params->seqWeightCalculated == true)
1580 {
1581 return;
1582 }
1583
1584 utilityObject->info("Calculating sequence weights...");
1585
1586 // count pairwise percent identities to make a phylogenetic tree //
1587 if(params->nSeqs >= 2)
1588 { //i=firstSeq + 1; i <= firstSeq + nSeqs
1589 for (i = params->firstSeq + 1; i <= params->firstSeq + params->nSeqs; i++)
1590 { //j=i + 1; i <= firstSeq + nSeqs
1591 for (j = i + 1; j <= params->firstSeq + params->nSeqs; j++) // Mark 22-5-07
1592 {
1593 dscore = alignmentObj.countid(i, j); // Mark 22-5-07
1594 distMat(i, j) = (100.0 - dscore) / 100.0;
1595 distMat(j, i) = distMat(i, j);
1596 }
1597 }
1598 string name = string(treeName);
1599 bool success = false;
1600 weight.resize(params->firstSeq + params->nSeqs + 1);
1601 TreeInterface tree;
1602 tree.getWeightsForQtLowScore(&weight, &distMat, &alignmentObj, params->firstSeq + 1,
1603 params->nSeqs, &name, &success); // Mark change 10-5-07
1604 if(!success)
1605 {
1606 return;
1607 }
1608
1609 for(i = params->firstSeq;i < params->firstSeq + params->nSeqs;i++)
1610 {
1611 (*(params->seqWeight))[i - params->firstSeq] = weight[i];
1612 }
1613
1614 utilityObject->info("Done.");
1615 }
1616 }
1617
QTSetFileNamesForOutput(AlignmentFileNames fileNames)1618 void Clustal::QTSetFileNamesForOutput(AlignmentFileNames fileNames)
1619 {
1620 QTFileNames = fileNames;
1621 }
1622
QTRealignSelectedRange(AlignmentFileNames fileNames,int beginPos,int endPos,bool realignEndGapPen)1623 bool Clustal::QTRealignSelectedRange(AlignmentFileNames fileNames, int beginPos, int endPos, bool realignEndGapPen)
1624 {
1625 bool alignEndGapPen = userParameters->getEndGapPenalties();
1626
1627 Alignment saveOldAlign = alignmentObj; // Take a copy of it. Note provided copy
1628 // constructor is ok.
1629 bool ok;
1630 ok = alignmentObj.removeAllOutsideRange(beginPos, endPos);
1631
1632 if(!ok)
1633 {
1634 alignmentObj = saveOldAlign;
1635 return false;
1636 }
1637 // Temporarily set the alignment output to be input
1638 int saveOutOrder = userParameters->getOutputOrder();
1639 userParameters->setOutputOrder(INPUT);
1640
1641 //set end gap penalties to be realignEndGapPenalties
1642 userParameters->setEndGapPenalties(realignEndGapPen);
1643 //do the alignment
1644 if(alignmentObj.getNumSeqs() <= 0)
1645 {
1646 alignmentObj = saveOldAlign;
1647 return false;
1648 }
1649 QTSetFileNamesForOutput(fileNames);
1650 string phylipName = fileNames.treeFile;
1651 align(&phylipName, false);
1652
1653 userParameters->setOutputOrder(saveOutOrder);
1654
1655 // reset the end gap penalties
1656 userParameters->setEndGapPenalties(alignEndGapPen);
1657
1658 // remove postions that only contain gaps
1659 int nSeqs = alignmentObj.getNumSeqs();
1660 alignmentObj.removeAllGapOnlyColumns(1, nSeqs, 0);
1661
1662 // save it to a temporary area.
1663 SeqArray realignedArea = *(alignmentObj.getSeqArray());
1664
1665 // Paste it back into the original alignment.
1666 alignmentObj = saveOldAlign;
1667 bool result;
1668 result = alignmentObj.updateRealignedRange(realignedArea, beginPos, endPos);
1669 if(result == false)
1670 {
1671 utilityObject->error("something went wrong while updating the realigned range\n");
1672 }
1673
1674 // output the alignments
1675 AlignmentOutput alignOutput;
1676 if(!alignOutput.QTOpenFilesForOutput(QTFileNames))
1677 {
1678 return false; // could not open the files.
1679 }
1680
1681 alignOutput.createAlignmentOutput(&alignmentObj, 1, nSeqs);
1682
1683 return true;
1684 }
1685
test()1686 void Clustal::test()
1687 {
1688 cout << "RUNNING TEST\n";
1689 AlignmentOutput alignOutput;
1690 string path;
1691 utilityObject->getPath(userParameters->getSeqName(), &path);
1692
1693 if(!alignOutput.openAlignmentOutput(path))
1694 {
1695 cerr << "could not open the file\n";
1696 return;
1697 }
1698
1699 vector<int> selected;
1700 int nSeqs = alignmentObj.getNumSeqs();
1701 selected.resize(nSeqs + 1, 0);
1702 selected[9] = 1;
1703 selected[10] = 1;
1704 //selected[1] = 1;
1705 alignmentObj.removeGapOnlyColsFromSelectedSeqs(&selected);
1706 alignOutput.createAlignmentOutput(&alignmentObj, 1, nSeqs);
1707 }
1708
1709 /**
1710 * This function is used to ask the user if they would like to use an existing guide tree.
1711 * Note: It expects that phylipName actually points to a string that has been allocated.
1712 */
useExistingGuideTree(int type,string * phylipName,const string & path)1713 bool Clustal::useExistingGuideTree(int type, string* phylipName, const string& path)
1714 {
1715 bool useTree = false;
1716 string treeName;
1717 InFileStream _treeFile; //nige
1718 bool paramUseTree;
1719
1720 string* ptrToMsg;
1721 if(type == Sequences)
1722 {
1723 ptrToMsg = &sequencesMsg;
1724 paramUseTree = userParameters->getUseTreeFile();
1725 }
1726 else if(type == Profile1)
1727 {
1728 ptrToMsg = &profile1Msg;
1729 paramUseTree = userParameters->getUseTree1File();;
1730 }
1731 else if(type == Profile2)
1732 {
1733 ptrToMsg = &profile2Msg;
1734 paramUseTree = userParameters->getUseTree2File();;
1735 }
1736 else
1737 {
1738 ptrToMsg = &sequencesMsg;
1739 paramUseTree = userParameters->getUseTreeFile();
1740 }
1741
1742 if (checkTree && userParameters->getMenuFlag())
1743 {
1744 treeName = path + "dnd";
1745
1746 _treeFile.open(treeName.c_str());
1747 _treeFile.seekg(0, std::ios::beg);
1748
1749 if(_treeFile.is_open())
1750 {
1751 string message = *ptrToMsg + treeName + " (y/n) ? [y]";
1752 string answer;
1753 utilityObject->getStr(message, answer);
1754
1755 if(answer[0] != 'n' && answer[0] != 'N')
1756 {
1757 if(!phylipName)
1758 {
1759 phylipName = new string(treeName);
1760 }
1761 else
1762 {
1763 *phylipName = treeName;
1764 }
1765 useTree = true;
1766 }
1767 _treeFile.close();
1768 }
1769 }
1770 else if (!userParameters->getMenuFlag() && paramUseTree)
1771 {
1772 useTree = true;
1773 }
1774
1775 return useTree;
1776 }
1777
promptForNewGuideTreeName(int type,string * treeName,const string & path)1778 void Clustal::promptForNewGuideTreeName(int type, string* treeName, const string& path)
1779 {
1780 string* ptrToMsg;
1781 if(type == Profile1)
1782 {
1783 ptrToMsg = &newProfile1TreePrompt;
1784 }
1785 else if(type == Profile2)
1786 {
1787 ptrToMsg = &newProfile2TreePrompt;
1788 }
1789 else
1790 {
1791 ptrToMsg = &newProfile1TreePrompt;
1792 }
1793
1794 if(!treeName)
1795 {
1796 treeName = new string("");
1797 }
1798
1799 while(treeName->empty())
1800 {
1801 string message = *ptrToMsg + path + "dnd]";
1802 string answer;
1803 utilityObject->getStr(message, answer);
1804 if(answer.empty())
1805 {
1806 answer = path + "dnd";
1807 *treeName = answer;
1808 }
1809 else
1810 {
1811 *treeName = answer;
1812 }
1813 }
1814 }
1815
1816 }
1817