1 /**
2  * Author: Mark Larkin
3  *
4  * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson.
5  */
6 /**
7  * Changes:
8  *
9  * 10-02-07,Nigel Brown(EMBL): changed ifstream to InFileStream to handle
10  * cross-platform end-of-lines (for dendrograms, not for help file). Remerged
11  * this change 13-04-07.
12  *
13  * Mark 10-5-2007: Bug fix # 42. call getWeightsForQtLowScore function in
14  * QTcalcWeightsForLowScoreSeg instead of getWeightsFromDistMat.
15  *
16  * Mark 22-5-07, changed the distmatrix to be the size of alignObject.numSeqs
17  * Mark Change 20-6-07, added call to calculateMaxLengths()
18  * Mark, 3-7-07, Changed getHelp.
19  */
20 #ifdef HAVE_CONFIG_H
21     #include "config.h"
22 #endif
23 
24 #include <iostream>
25 #include <fstream>
26 #include <cstdio>
27 #include "Clustal.h"
28 #include "pairwise/FullPairwiseAlign.h"
29 #include "pairwise/FastPairwiseAlign.h"
30 #include "multipleAlign/MSA.h"
31 #include "multipleAlign/LowScoreSegProfile.h"
32 #include "multipleAlign/Iteration.h"
33 #include "tree/TreeInterface.h"
34 #include "general/debuglogObject.h"
35 #include "general/statsObject.h"
36 #include "alignment/ObjectiveScore.h"
37 #include "general/ClustalWResources.h"
38 #include "Help.h"
39 #include <ctime>
40 
41 using namespace std;
42 
43 namespace clustalw
44 {
45 
Clustal()46 Clustal::Clustal()
47 {
48 #ifdef WINDOWS
49     helpFileName = string("clustalw.hlp");
50 #else
51     helpFileName = string("clustalw_help");
52 #endif
53 
54     checkTree = true;
55     newSeq = 0;
56     sequencesMsg = "\nUse the existing GUIDE TREE file,  ";
57     profile1Msg = "\nUse the existing GUIDE TREE file for Profile 1,  " ;
58     profile2Msg = "\nUse the existing GUIDE TREE file for Profile 2,  ";
59 
60     newProfile1TreePrompt = "\nEnter name for new GUIDE TREE file for profile 1 [";
61     newProfile2TreePrompt = "\nEnter name for new GUIDE TREE file for profile 2 [";
62 
63     initInterface();
64 }
65 
66 /** ***********************************************************************
67  * The function align is used to do a full multiple sequence alignment.   *
68  **************************************************************************/
69 // FIXME: merge doAlignUseOldTree in here
align(string * phylipName,bool createOutput)70 void Clustal::align(string* phylipName, bool createOutput)
71 {
72     //time_t start, end;
73     //double dif;
74     //start = time (NULL);
75     //ObjectiveScore score;
76     //double _score = score.getSagaScore(&alignmentObj);
77     //cout << "SAGA score " << _score << "\n";
78 
79     string path;
80     int count;
81     AlignmentOutput alignOutput;
82 
83     if(userParameters->getEmpty() && userParameters->getMenuFlag())
84     {
85         utilityObject->error("No sequences in memory. Load sequences first.");
86         return;
87     }
88 
89     userParameters->setStructPenalties1(NONE);
90     userParameters->setStructPenalties2(NONE);
91 
92     alignmentObj.clearSecStruct1();
93     alignmentObj.clearSecStruct2();
94 
95     utilityObject->getPath(userParameters->getSeqName(), &path);
96 
97     if(createOutput && (userParameters->getMenuFlag() || !userParameters->getInteractive()))
98     {
99         if(!alignOutput.openAlignmentOutput(path))
100         {
101             return;
102         }
103     }
104     else if(createOutput)
105     {
106         // We are using clustalQT.
107         // Open all the files.
108         if(!alignOutput.QTOpenFilesForOutput(QTFileNames))
109         {
110             return; // could not open the files.
111         }
112     }
113 
114     if (userParameters->getSaveParameters())
115     {
116         userParameters->createParameterOutput();
117     }
118 
119     if(userParameters->getResetAlignmentsNew() || userParameters->getResetAlignmentsAll())
120     {
121         alignmentObj.resetAlign();
122     }
123     if(userParameters->getDisplayInfo())
124     {
125         cout << "Start of Pairwise alignments\n";
126         cout << "Aligning...\n";
127     }
128     if(userParameters->getDNAFlag())
129     {
130         userParameters->setDNAParams();
131     }
132     else
133     {
134         userParameters->setProtParams();
135     }
136 
137     if (statsObject->isEnabled()) {
138         statsObject->logInputSeqStats(&alignmentObj);
139     }
140 
141 
142     /// STEP 1: PAIRWISE ALIGNMENT TO GENERATE DISTANCE MATRIX
143 
144     SymMatrix distMat;
145     int _numSeqs = alignmentObj.getNumSeqs();
146     distMat.ResizeRect(_numSeqs + 1);
147 
148     PairwiseAlignBase* pairwiseDist;
149     if (userParameters->getQuickPairAlign())
150     {
151         pairwiseDist = new FastPairwiseAlign();
152     }
153     else
154     {
155         pairwiseDist = new FullPairwiseAlign();
156     }
157     // Generate distance matrix!
158     pairwiseDist->pairwiseAlign(&alignmentObj, &distMat, 0, _numSeqs, 0, _numSeqs);
159     delete pairwiseDist;
160 
161     bool success = false;
162     auto_ptr<AlignmentSteps> progSteps;
163     vector<int> seqWeight(_numSeqs + 1);
164 
165     #if DEBUGFULL
166         if(logObject && DEBUGLOG)
167         {
168             logObject->logMsg("Calling getWeightsAndStepsFromDistMat\n");
169         }
170     #endif
171 
172     TreeInterface calcSteps;
173     progSteps = calcSteps.getWeightsAndStepsFromDistMat(&seqWeight, &distMat, &alignmentObj,
174                                                         1, _numSeqs, phylipName, &success);
175     //cout << "weights and steps calculated!\n";
176     //end = time (NULL);
177     //dif = difftime(end, start);
178     //cout << "It took " << dif << " seconds so Far\n";
179 
180     if(!success)
181     {
182         #if DEBUGFULL
183             if(logObject && DEBUGLOG)
184             {
185                 logObject->logMsg("Unsuccessful!!!\n");
186             }
187         #endif
188         return;
189     }
190     #if DEBUGFULL
191         if(logObject && DEBUGLOG)
192         {
193             logObject->logMsg("doing multiSeqAlign\n");
194         }
195     #endif
196     MSA* msaObj = new MSA();
197     count = msaObj->multiSeqAlign(&alignmentObj, &distMat, &seqWeight, progSteps.get(), 0);
198     delete msaObj;
199     //cout << "alignment finished!\n";
200 
201     //end = time (NULL);
202     //dif = difftime(end, start);
203     //cout << "It took " << dif << " seconds so Far\n";
204 
205     if (count <= 0)
206     {
207         return;
208     }
209 
210     if (userParameters->getMenuFlag())
211     {
212         cout << "\n\n\n";
213     }
214 
215     /// Do iteration to improve alignment!!!
216     if(userParameters->getDoRemoveFirstIteration() == ALIGNMENT)
217     {
218         //userParameters->setNumIterations(_numSeqs * 2);
219         Iteration iterateObj;
220         iterateObj.removeFirstIterate(&alignmentObj);
221         alignmentObj.calculateMaxLengths(); // Mark Change 20-6-07
222         if(userParameters->getDisplayInfo())
223             cout << "Finished iteration\n";
224     }
225 
226     if (statsObject->isEnabled()) {
227         statsObject->logAlignedSeqStats(&alignmentObj);
228     }
229 
230 
231 
232     /// STEP 4: OUTPUT THE ALIGNMENT
233     if(createOutput)
234     {
235         alignOutput.createAlignmentOutput(&alignmentObj, 1, _numSeqs);
236     }
237     (*phylipName) = "";
238 
239     //end = time (NULL);
240     //dif = difftime(end, start);
241     //cout << "It took " << dif << " seconds\n";
242 }
243 
244 /** ****************************************************************************
245  * The function sequencesAlignToProfile is used to align a set of sequences to *
246  * a profile                                                                   *
247  *******************************************************************************/
sequencesAlignToProfile(string * phylipName)248 void Clustal::sequencesAlignToProfile(string* phylipName)
249 {
250     string path;
251     string treeName;
252     bool useTree;
253     int i, j, count;
254     float dscore;
255     bool saveSS2;
256     AlignmentOutput alignOutput;
257 
258     if(userParameters->getProfile1Empty() && userParameters->getMenuFlag())
259     {
260         utilityObject->error("No profile in memory. Input 1st profile first.\n");
261         return;
262     }
263 
264     if(userParameters->getProfile2Empty() && userParameters->getMenuFlag())
265     {
266         utilityObject->error("No sequences in memory. Input sequences first.\n");
267         return;
268     }
269 
270     utilityObject->getPath(userParameters->getProfile2Name(), &path);
271 
272     if(userParameters->getMenuFlag() || !userParameters->getInteractive())
273     {
274         if(!alignOutput.openAlignmentOutput(path))
275         {
276             return;
277         }
278     }
279     else
280     {
281         // We are using clustalQT.
282         // Open all the files.
283         if(!alignOutput.QTOpenFilesForOutput(QTFileNames))
284         {
285             return; // could not open the files.
286         }
287     }
288 
289     newSeq = alignmentObj.getProfile1NumSeqs() + 1;
290 
291     // check for secondary structure information for list of sequences
292 
293     saveSS2 = userParameters->getUseSS2();
294 
295     if (userParameters->getStructPenalties2() != NONE && userParameters->getUseSS2() == true
296         && (alignmentObj.getNumSeqs() - alignmentObj.getProfile1NumSeqs() > 1))
297     {
298         if (userParameters->getStructPenalties2() == SECST)
299         {
300             utilityObject->warning("\n\nWARNING: ignoring secondary structure for a list of sequences\n\n");
301         }
302         else if (userParameters->getStructPenalties2() == GMASK)
303         {
304             utilityObject->warning("\n\nWARNING: ignoring gap penalty mask for a list of sequences\n\n");
305         }
306         userParameters->setUseSS2(false);
307     }
308 
309     DistMatrix distMat;
310     int _numSeqs = alignmentObj.getNumSeqs();
311     distMat.ResizeRect(_numSeqs + 1);
312 
313     //
314     // Convert to similarities!!!!!!!!
315     // This is calcualting the similarities of the sequences in the profile part
316     //
317     for (i = 1; i <= newSeq; i++)
318     {
319         for (j = i + 1; j <= newSeq; j++)
320         {
321            dscore = alignmentObj.countid(i,j);
322            distMat(i, j) = ((double)100.0 - (double)dscore)/(double)100.0;
323            distMat(j, i) = distMat(i, j);
324         }
325     }
326     //distMat.printArray();
327 
328     InFileStream _treeFile;  //nige
329 
330     useTree = false;
331     //
332     // Note put this into a separate function!!!!!
333    //
334     if (_numSeqs >= 2)
335     {
336         useTree = useExistingGuideTree(Sequences, phylipName, path);
337     }
338 
339     if (userParameters->getSaveParameters())
340     {
341         userParameters->createParameterOutput();
342     }
343 
344     if(userParameters->getResetAlignmentsNew() || userParameters->getResetAlignmentsAll())
345     {
346         alignmentObj.resetProfile2();
347     }
348     else
349     {
350         alignmentObj.fixGaps();
351     }
352 
353     int _length = 0;
354     if (userParameters->getStructPenalties1() == SECST)
355     {
356         _length = alignmentObj.getSeqLength(1);
357         calcGapPenaltyMask(_length, alignmentObj.getSecStructMask1(),
358                            alignmentObj.getGapPenaltyMask1());
359     }
360     if (userParameters->getStructPenalties2() == SECST)
361     {
362         _length = alignmentObj.getSeqLength(alignmentObj.getProfile1NumSeqs() + 1);
363         calcGapPenaltyMask(_length, alignmentObj.getSecStructMask2(),
364                            alignmentObj.getGapPenaltyMask2());
365     }
366 
367     // PROGRESSIVE ALIGNMENT ALGORITHM //
368     vector<int> seqWeights(_numSeqs + 1);
369 
370     bool success = false;
371 
372     if (useTree == false) // create the new tree file, if necessary
373     {
374         if(userParameters->getDisplayInfo())
375         {
376             cout << "Start of Pairwise alignments\n";
377             cout << "Aligning...\n";
378         }
379 
380         if(userParameters->getDNAFlag())
381         {
382             userParameters->setDNAParams();
383         }
384         else
385         {
386             userParameters->setProtParams();
387         }
388 
389         // STEP 1: CALCULATE DISTANCE MATRIX USING PAIRWISE ALIGNMENT //
390         PairwiseAlignBase* pairwiseDist;
391         if (userParameters->getQuickPairAlign())
392         {
393             pairwiseDist = new FastPairwiseAlign();
394         }
395         else
396         {
397             pairwiseDist = new FullPairwiseAlign();
398         }
399 
400         // Generate distance matrix!
401         pairwiseDist->pairwiseAlign(&alignmentObj, &distMat, 0, _numSeqs, newSeq-2, _numSeqs);
402         delete pairwiseDist;
403 
404         if(userParameters->getDisplayInfo())
405             cout << "\n\n";
406 
407         TreeInterface calcSeqWeights;
408         calcSeqWeights.getWeightsFromDistMat(&seqWeights, &distMat, &alignmentObj, 1,
409                                              _numSeqs, phylipName, &success);
410     }
411     else
412     {
413         TreeInterface calcSeqWeights;
414         calcSeqWeights.getWeightsFromGuideTree(&alignmentObj, &distMat, phylipName,
415         &seqWeights, 1, _numSeqs, &success);
416     }
417 
418     if(!success)
419     {
420         return;
421     }
422     // If it is true, call the function here to get the seqweights etc from it.
423 
424     // if users request only the guide tree, return
425     if (userParameters->getNewTreeFile())
426     {
427         return;
428     }
429 
430 
431     MSA* msaObj = new MSA();
432     count = msaObj->seqsAlignToProfile(&alignmentObj, &distMat, &seqWeights, newSeq - 2,
433                                        *phylipName);
434     delete msaObj;
435 
436     userParameters->setUseSS2(saveSS2);
437 
438     if (count <= 0)
439     {
440         return;
441     }
442 
443     if (userParameters->getMenuFlag())
444     {
445         cout << "\n\n\n";
446     }
447 
448     /// STEP 4: OUTPUT THE ALIGNMENT  //
449     alignOutput.createAlignmentOutput(&alignmentObj, 1, _numSeqs);
450 
451     (*phylipName) = "";
452 }
453 
454 /** ****************************************************************************
455  * The function profileAlign is used to align two profiles                     *
456  *******************************************************************************/
profileAlign(string * p1TreeName,string * p2TreeName)457 void Clustal::profileAlign(string* p1TreeName, string* p2TreeName)
458 {
459     string path;
460     //string treeName;
461     bool useTree1, useTree2;
462     int count, i, j, dscore;
463     int _profile1NumSeqs = alignmentObj.getProfile1NumSeqs();
464     AlignmentOutput alignOutput;
465 
466     if(userParameters->getProfile1Empty() || userParameters->getProfile2Empty())
467     {
468         utilityObject->error("No sequences in memory. Load sequences first.\n\n");
469         return;
470     }
471 
472     utilityObject->getPath(userParameters->getProfile1Name(), &path);
473 
474     if(userParameters->getMenuFlag() || !userParameters->getInteractive())
475     {
476         if(!alignOutput.openAlignmentOutput(path))
477         {
478             return;
479         }
480     }
481     else
482     {
483         // We are using clustalQT.
484         // Open all the files.
485         if(!alignOutput.QTOpenFilesForOutput(QTFileNames))
486         {
487             return; // could not open the files.
488         }
489     }
490 
491     if(userParameters->getResetAlignmentsNew() || userParameters->getResetAlignmentsAll())
492     {
493         alignmentObj.resetProfile1();
494         alignmentObj.resetProfile2();
495     }
496     else
497     {
498         alignmentObj.fixGaps();
499     }
500 
501     // Check if there exists a tree for profile1
502     useTree1 = false;
503 
504     if (_profile1NumSeqs >= 2)
505     {
506         useTree1 = useExistingGuideTree(Profile1, p1TreeName, path);
507     }
508 
509     // Check if there exists a tree for profile2
510     useTree2 = false;
511 
512     utilityObject->getPath(userParameters->getProfile2Name(), &path);
513 
514     if (alignmentObj.getNumSeqs() - _profile1NumSeqs >= 2)
515     {
516         useTree2 = useExistingGuideTree(Profile2, p2TreeName, path);
517     }
518 
519     if (userParameters->getSaveParameters())
520     {
521         userParameters->createParameterOutput();
522     }
523     int _length = 0;
524     if (userParameters->getStructPenalties1() == SECST)
525     {
526         _length = alignmentObj.getSeqLength(1);
527         calcGapPenaltyMask(_length, alignmentObj.getSecStructMask1(),
528                            alignmentObj.getGapPenaltyMask1());
529     }
530 
531     if (userParameters->getStructPenalties2() == SECST)
532     {
533         _length = alignmentObj.getSeqLength(_profile1NumSeqs + 1);
534         calcGapPenaltyMask(_length, alignmentObj.getSecStructMask2(),
535                            alignmentObj.getGapPenaltyMask2());
536     }
537 
538     // Declare the distance matrix
539     DistMatrix distMat;
540     int _numSeqs = alignmentObj.getNumSeqs();
541     distMat.ResizeRect(_numSeqs + 1);
542 
543     if (useTree1 == false)
544     {
545         if (_profile1NumSeqs >= 2)
546         {
547             for (i = 1; i <= _profile1NumSeqs; i++)
548             {
549                 for (j = i + 1; j <= _profile1NumSeqs; j++)
550                 {
551                     dscore = static_cast<int>(alignmentObj.countid(i,j));
552                     distMat(i, j) = (100.0 - dscore)/100.0;
553                     distMat(j, i) = distMat(i, j);
554                 }
555             }
556             utilityObject->getPath(userParameters->getProfile1Name(), &path);
557 
558             // We need to get the name of the file first because the message is different!
559             if(userParameters->getMenuFlag())
560             {
561                 promptForNewGuideTreeName(Profile1, p1TreeName, path);
562             }
563             else
564             {
565                 string treeName;
566                 treeName = path + "dnd";
567                 p1TreeName = new string(treeName);
568             }
569         }
570 
571         if (useTree2 == false)
572         {
573             if(_numSeqs - _profile1NumSeqs >= 2)
574             {
575                 for (i = 1 + _profile1NumSeqs; i <= _numSeqs; i++)
576                 {
577                     for (j = i + 1; j <= _numSeqs; j++)
578                     {
579                         dscore = static_cast<int>(alignmentObj.countid(i,j));
580                         distMat(i, j) = (100.0 - dscore) / 100.0;
581                         distMat(j, i) = distMat(i, j);
582                     }
583                 }
584 
585                 utilityObject->getPath(userParameters->getProfile2Name(), &path);
586 
587                 if(userParameters->getMenuFlag())
588                 {
589                     promptForNewGuideTreeName(Profile2, p2TreeName, path);
590                 }
591                 else
592                 {
593                     string treeName;
594                     treeName = path + "dnd";
595                     p2TreeName = new string(treeName);
596                 }
597 
598             }
599         }
600     }
601 
602     bool success = false;
603     vector<int> prof1Weight, prof2Weight;
604     prof1Weight.resize(_profile1NumSeqs);
605     prof2Weight.resize(_numSeqs);
606     TreeInterface tree;
607     tree.getWeightsForProfileAlign(&alignmentObj, &distMat, p1TreeName, &prof1Weight,
608                                    p2TreeName, &prof2Weight, _numSeqs, _profile1NumSeqs,
609                                    useTree1, useTree2, &success);
610     if(!success)
611     {
612         return;
613     }
614 
615     // do an initial alignment to get the pairwise identities between the two
616     // profiles - used to set parameters for the final alignment
617     MSA* msaObj = new MSA();
618 
619     alignmentObj.resetProfile1();
620     alignmentObj.resetProfile2();
621 
622     count = msaObj->doProfileAlign(&alignmentObj, &distMat, &prof1Weight, &prof2Weight);
623     delete msaObj;
624 
625     if (count == 0)
626     {
627         return;
628     }
629     if(userParameters->getMenuFlag())
630     {
631         cout << "\n\n\n";
632     }
633 
634     alignOutput.createAlignmentOutput(&alignmentObj, 1, _numSeqs);
635 
636     (*p1TreeName) = "";
637     (*p2TreeName) = "";
638 }
639 
doGuideTreeOnly(string * phylipName)640 void Clustal::doGuideTreeOnly(string* phylipName)
641 {
642     string path;
643     if(userParameters->getEmpty())
644     {
645         utilityObject->error("No sequences in memory. Load sequences first.\n");
646         return;
647     }
648 
649     userParameters->setStructPenalties1(NONE);
650     userParameters->setStructPenalties2(NONE);
651 
652     alignmentObj.clearSecStruct1();
653     alignmentObj.clearSecStruct2();
654 
655     if(userParameters->getResetAlignmentsNew() || userParameters->getResetAlignmentsAll())
656     {
657         alignmentObj.resetAlign();
658     }
659 
660     utilityObject->getPath(userParameters->getSeqName(), &path);
661     int _numSeqs = alignmentObj.getNumSeqs();
662 
663     if (_numSeqs < 2)
664     {
665         utilityObject->error("Less than 2 sequences in memory. Phylogenetic tree cannot be built.\n");
666         return;
667     }
668 
669     if (userParameters->getSaveParameters())
670     {
671         userParameters->createParameterOutput();
672     }
673 
674     if(userParameters->getDisplayInfo())
675     {
676         cout << "Start of Pairwise alignments\n";
677         cout << "Aligning...\n";
678     }
679 
680     if(userParameters->getDNAFlag())
681     {
682         userParameters->setDNAParams();
683     }
684     else
685     {
686         userParameters->setProtParams();
687     }
688 
689     ///STEP 1: PAIRWISE ALIGNMENT TO GENERATE DISTANCE MATRIX //
690 
691     DistMatrix distMat;
692     distMat.ResizeRect(_numSeqs + 1);
693 
694     PairwiseAlignBase* pairwiseDist;
695     if (userParameters->getQuickPairAlign())
696     {
697         pairwiseDist = new FastPairwiseAlign();
698     }
699     else
700     {
701         pairwiseDist = new FullPairwiseAlign();
702     }
703     // Generate distance matrix!
704     pairwiseDist->pairwiseAlign(&alignmentObj, &distMat, 0, _numSeqs, 0, _numSeqs);
705     delete pairwiseDist;
706 
707     /* AW DEBUG
708        fprintf(stdout, "\nDEBUG: distance matrix following...\n");
709        distMat.printArray();
710     */
711 
712     bool success = false;
713     TreeInterface tree;
714     tree.generateTreeFromDistMat(&distMat, &alignmentObj, 1, _numSeqs, phylipName, &success);
715 
716     if(userParameters->getResetAlignmentsNew() || userParameters->getResetAlignmentsAll())
717     {
718         alignmentObj.resetAlign();
719     }
720 
721     (*phylipName) = "";
722 }
723 
724 
725 // FIXME this is to  90% identical with align(), please merge
doAlignUseOldTree(string * phylipName)726 void Clustal::doAlignUseOldTree(string* phylipName)
727 {
728     string path;
729     int count;
730     AlignmentOutput alignOutput;
731 
732     if(userParameters->getEmpty())
733     {
734         utilityObject->error("No sequences in memory. Load sequences first.\n");
735         return;
736     }
737 
738     userParameters->setStructPenalties1(NONE);
739     userParameters->setStructPenalties2(NONE);
740 
741     alignmentObj.clearSecStruct1();
742     alignmentObj.clearSecStruct2();
743 
744     utilityObject->getPath(userParameters->getSeqName(), &path);
745 
746     if(userParameters->getMenuFlag() || !userParameters->getInteractive())
747     {
748         if(!alignOutput.openAlignmentOutput(path))
749         {
750             return;
751         }
752     }
753     else
754     {
755         // We are using clustalQT.
756         // Open all the files.
757         if(!alignOutput.QTOpenFilesForOutput(QTFileNames))
758         {
759             return; // could not open the files.
760         }
761     }
762 
763     if(userParameters->getResetAlignmentsNew() || userParameters->getResetAlignmentsAll())
764     {
765         alignmentObj.resetAlign();
766     }
767 
768     int _numSeqs = alignmentObj.getNumSeqs();
769     DistMatrix distMat;
770     distMat.ResizeRect(_numSeqs + 1);
771     utilityObject->getPath(userParameters->getSeqName(), &path);
772 
773     if (_numSeqs >= 2)
774     {
775 
776         if(userParameters->getMenuFlag())
777         {
778             phylipName = new string(path);
779             *phylipName = *phylipName + "dnd";
780 
781             string message, answer;
782             message = "\nEnter a name for the guide tree file [" + *phylipName + "]";
783             utilityObject->getStr(message, answer);
784 
785             if(!answer.empty())
786             {
787                 phylipName = new string(answer);
788             }
789         }
790 
791         if(userParameters->getMenuFlag() || !userParameters->getInteractive())
792         {
793             InFileStream _treeFile;  //nige
794             _treeFile.open(phylipName->c_str());
795             if(!_treeFile.is_open())
796             {
797                 utilityObject->error("Cannot open tree file [%s]\n", phylipName->c_str());
798                 return;
799             }
800             _treeFile.close();
801         }
802     }
803     else
804     {
805         if(userParameters->getDisplayInfo())
806         {
807             cout << "Start of Pairwise alignments\n";
808             cout << "Aligning...\n";
809         }
810         if(userParameters->getDNAFlag())
811         {
812             userParameters->setDNAParams();
813         }
814         else
815         {
816             userParameters->setProtParams();
817         }
818 
819         PairwiseAlignBase* pairwiseDist;
820         if (userParameters->getQuickPairAlign())
821         {
822             pairwiseDist = new FastPairwiseAlign();
823         }
824         else
825         {
826             pairwiseDist = new FullPairwiseAlign();
827         }
828         // Generate distance matrix!
829         pairwiseDist->pairwiseAlign(&alignmentObj, &distMat, 0, _numSeqs, 0, _numSeqs);
830         delete pairwiseDist;
831     }
832 
833     if (userParameters->getSaveParameters())
834     {
835         userParameters->createParameterOutput();
836     }
837     auto_ptr<AlignmentSteps> progSteps;
838     vector<int> seqWeights(_numSeqs + 1);
839     bool success = false;
840     TreeInterface tree;
841     progSteps = tree.getWeightsAndStepsFromTree(&alignmentObj, &distMat, phylipName,
842                                                 &seqWeights, 1, _numSeqs, &success);
843 
844     if(!success)
845     {
846         return;
847     }
848 
849     MSA* msaObj = new MSA();
850     count = msaObj->multiSeqAlign(&alignmentObj, &distMat, &seqWeights, progSteps.get(), 0);
851     delete msaObj;
852 
853     if (count <= 0)
854     {
855         return;
856     }
857 
858     if (userParameters->getMenuFlag())
859     {
860         cout << "\n\n\n";
861     }
862 
863     // same as in align()
864     if(userParameters->getDoRemoveFirstIteration() == ALIGNMENT)
865     {
866         //userParameters->setNumIterations(_numSeqs * 2);
867         Iteration iterateObj;
868         iterateObj.removeFirstIterate(&alignmentObj);
869         alignmentObj.calculateMaxLengths(); // Mark Change 20-6-07
870         if(userParameters->getDisplayInfo())
871             cout << "Finished iteration\n";
872     }
873 
874 
875     alignOutput.createAlignmentOutput(&alignmentObj, 1, _numSeqs);
876 
877     phylipName = new string("");
878 }
879 
880 
881 
getFullHelp()882 void Clustal::getFullHelp()
883 {
884     vector<string> markers;
885     Help myhelp;
886     bool showtitle = true;
887 
888     markers = myhelp.ListSectionMarkers();
889     for (unsigned int i=0; i<markers.size(); i++) {
890         string m =  markers[i];
891         getHelp(m, showtitle);
892     }
893 }
894 
getHelp(char helpPointer,bool printTitle)895 void Clustal::getHelp(char helpPointer, bool printTitle)
896 {
897     string s = "";
898     s += helpPointer;
899     Clustal::getHelp(s, printTitle);
900 }
901 
902 
903 
904 /*
905  * Andreas Wilm (UCD): edited to support new help system (separate
906  * file now which is compiled in)
907  *
908  * Author?
909  * The clustal help file is called clustalw_help. Should be in the same
910  * directory. I have changed it to a C++ style implementation. I am taking
911  * out support for VMS until later. We will see if we are still supporting that platform.
912  */
getHelp(string helpPointer,bool printTitle)913 void Clustal::getHelp(string helpPointer, bool printTitle)
914 {
915     Help myhelp;
916     string helpString;
917 
918     helpString =  myhelp.GetSection(helpPointer);
919 
920     if (printTitle) {
921         helpString = "\n\n>> HELP " +
922             helpPointer +
923             " <<             " +
924             myhelp.GetSectionTitle(helpPointer) +
925             "\n\n" +
926             helpString;
927     }
928 
929 
930     if(! userParameters->getMenuFlag())
931     {
932         cout << helpString;
933     }
934     else
935     {
936         string::size_type lastPos = 0;
937         string::size_type pos = helpString.find_first_of("\n", lastPos);
938         int nlines = 0;
939 
940         while (pos != string::npos)
941         {
942             cout << helpString.substr(lastPos, pos - lastPos);
943             nlines++;
944 
945             if(nlines >= PAGE_LEN)
946             {
947                 char tempChar;
948                 cout << "\nPress [RETURN] to continue or  X  to stop ";
949                 cin.get(tempChar);
950                 if(toupper(tempChar) == 'X')
951                 {
952                     return;
953                 }
954                 else
955                 {
956                     nlines = 0;
957                 }
958             }
959 
960             lastPos = pos; //helpString.find_first_not_of("\n", pos);
961             pos = helpString.find_first_of("\n", lastPos+1);
962             //cerr << "DEBUG: pos=" << pos << " lastPos=" << lastPos << "/" << helpString.length() << "\n";
963         }
964     }
965 }
966 
967 
968 
969 /**
970  * The wrap functions will be used with interface classes to perform the tasks
971  * that were previously done there.
972  */
sequenceInput(bool append,string * offendingSeq)973 int Clustal::sequenceInput(bool append, string *offendingSeq)
974 {
975     int code;
976     // If we are not appending, we need to clear the Alignment object.
977     if(!append)
978     {
979         alignmentObj.clearAlignment();
980     }
981 
982     FileReader readSeqFile;
983     code = readSeqFile.seqInput(&alignmentObj, append, offendingSeq);
984     return code;
985 }
986 
987 /**
988  * profile1Input is a wrapper function for the profileInput. This is because the
989  * other classes dont have access to FileReader
990  */
profile1Input(string profile1Name)991 int Clustal::profile1Input(string profile1Name)
992 {
993     int code;
994     // I need to clear out the Alignment object.
995     alignmentObj.clearAlignment();
996     userParameters->setProfileNum(1);
997 
998     userParameters->setSeqName(profile1Name);
999     userParameters->setProfile1Name(profile1Name);
1000 
1001     FileReader readProfileFile;
1002     code = readProfileFile.profileInput(&alignmentObj);
1003 
1004     // If we are using the commandline check if there are seqs!
1005     if(!userParameters->getInteractive())
1006     {
1007         // AW: FIXME code should be handled higher up the stack and check all codes
1008         // also, shouldnt we use  utilityObject->error()?
1009         if(code != OK)
1010         {
1011             if (code==NOSEQUENCESINFILE)
1012                 cerr << "ERROR: There are no sequences in profile2 file." << std::endl;
1013             else if (code==ALLNAMESNOTDIFFERENT)
1014                 cerr << "ERROR: Not all sequence names are different" << std::endl;
1015             else
1016                 cerr << "ERROR: Unhandled error code (" << code << ") returned from profileInput.\n";
1017             // AW: should we really exit here? What if called from clustalx?
1018             exit(2);
1019         }
1020     }
1021 
1022     return code;
1023 }
1024 
1025 /**
1026  * profile2Input is a wrapper function for the profileInput. This is because the
1027  * other classes dont have access to FileReader
1028  */
profile2Input(string profile2Name)1029 int Clustal::profile2Input(string profile2Name)
1030 {
1031     int code;
1032 
1033     if(userParameters->getProfileNum() == 2)
1034     {
1035         // Remove the sequences from the previous one.
1036         int numSeqsProfile1 = alignmentObj.getProfile1NumSeqs();
1037         alignmentObj.resizeSeqArray(numSeqsProfile1 + 1);
1038         // Clear anything from profile2 in alignment.
1039         alignmentObj.clearSecStruct2();
1040     }
1041     userParameters->setProfileNum(2);
1042 
1043     userParameters->setSeqName(profile2Name);
1044     userParameters->setProfile2Name(profile2Name);
1045 
1046 
1047     FileReader readProfileFile;
1048     cout << "before profileInput\n";
1049     code = readProfileFile.profileInput(&alignmentObj);
1050     cout << "after profileInput\n";
1051 
1052     if(!userParameters->getInteractive())
1053     {
1054         // AW: FIXME code should be handled higher up the stack and check all codes
1055         // also, shouldnt we use  utilityObject->error()?
1056         if(code != OK) {
1057             if (code==NOSEQUENCESINFILE)
1058                 cerr << "ERROR: There are no sequences in profile2 file." << std::endl;
1059             else if (code==ALLNAMESNOTDIFFERENT)
1060                 cerr << "ERROR: Not all sequence names are different" << std::endl;
1061             else
1062                 cerr << "ERROR: Unhandled error code (" << code << ") returned from profileInput.\n";
1063             // AW: should we really exit here? What if called from clustalx?
1064             // DD: fixed
1065             if(!userParameters->getGui())
1066                 exit(2);
1067         }
1068     }
1069     return code;
1070 }
1071 
1072 
1073 /**
1074  * The function commandline_readseq is called by the command line to
1075  * read in the sequences. It also prints out the names. This was
1076  * previously done by the command line parser.
1077  */
commandLineReadSeq(int firstSeq)1078 int Clustal::commandLineReadSeq(int firstSeq)
1079 {
1080     // Clear the alignment, although obviously there shouldnt be anything in it.
1081     alignmentObj.clearAlignment();
1082     userParameters->setProfileNum(0);
1083     int code = 0;
1084     string offendingSeq;
1085     FileReader readInputFile;
1086     code = readInputFile.readSeqs(&alignmentObj, firstSeq, &offendingSeq);
1087 
1088     if(code != OK)
1089     {
1090         if(code == CANNOTOPENFILE)
1091         {
1092             utilityObject->error("Cannot open input file. No alignment!\n");
1093         }
1094         else if(code == NOSEQUENCESINFILE)
1095         {
1096             utilityObject->error("No sequences in file. No alignment!\n");
1097         }
1098         else if(code == ALLNAMESNOTDIFFERENT)
1099         {
1100             utilityObject->error("Multiple sequences found with same name (found %s at least twice)!", offendingSeq.c_str());
1101         }
1102         else if(code == EMPTYSEQUENCE)
1103         {
1104             utilityObject->error("Empty sequences found: %s\n", offendingSeq.c_str());
1105         }
1106         else if(code == SEQUENCETOOBIG)
1107         {
1108             utilityObject->error("Sequence(s) too big: %s\n", offendingSeq.c_str());
1109         }
1110         else if(code == BADFORMAT)
1111         {
1112             utilityObject->error("Sequences are badly formatted!\n");
1113         }
1114         else
1115         {
1116             utilityObject->error("\nThere was a problem reading in the file. No alignment!\n");
1117         }
1118         exit(-1);
1119     }
1120 
1121     alignmentObj.printSequencesAddedInfo();
1122 
1123     userParameters->setEmpty(false);
1124     return code;
1125 }
1126 
1127 /*
1128  * The function outputNow is used to output the alignment. It can be called by the
1129  * menu or command line parser.
1130  */
outputNow()1131 void Clustal::outputNow()
1132 {
1133     if(alignmentObj.getNumSeqs() > 0)
1134     {
1135         string path = "";
1136         if(!userParameters->getMenuFlag())
1137         {
1138             string _seqName = userParameters->getSeqName();
1139             utilityObject->getPath(_seqName, &path);
1140         }
1141         AlignmentOutput alignmentOutput;
1142         alignmentOutput.openAlignmentOutput(path);
1143 
1144         alignmentOutput.createAlignmentOutput(&alignmentObj, 1, alignmentObj.getNumSeqs());
1145     }
1146     else
1147     {
1148         utilityObject->error("No sequences have been loaded\n");
1149     }
1150 }
1151 
phylogeneticTree(string * phylipName,string * clustalName,string * distName,string * nexusName,string pimName)1152 void Clustal::phylogeneticTree(string* phylipName, string* clustalName, string* distName,
1153                                string* nexusName, string pimName)
1154 {
1155     TreeNames treeNames;
1156     treeNames.clustalName = *clustalName;
1157     treeNames.distName = *distName;
1158     treeNames.nexusName = *nexusName;
1159     treeNames.phylipName = *phylipName;
1160     treeNames.pimName = pimName;
1161     TreeInterface tree;
1162     tree.treeFromAlignment(&treeNames, &alignmentObj);
1163 }
1164 
bootstrapTree(string * phylipName,string * clustalName,string * nexusName)1165 void Clustal::bootstrapTree(string* phylipName, string* clustalName, string* nexusName)
1166 {
1167     TreeNames treeNames;
1168     treeNames.clustalName = *clustalName;
1169     treeNames.nexusName = *nexusName;
1170     treeNames.phylipName = *phylipName;
1171     TreeInterface tree;
1172     tree.bootstrapTree(&treeNames, &alignmentObj);
1173 }
1174 
initInterface()1175 void Clustal::initInterface()
1176 {
1177     userParameters->setEmpty(true);
1178     userParameters->setProfile1Empty(true);
1179     userParameters->setProfile2Empty(true);
1180 }
1181 
1182 /**
1183  * The function calcGapPenaltyMask is used to calculate the gapMask from the secondary
1184  * structure information.
1185  */
calcGapPenaltyMask(int prfLength,vector<char> * mask,vector<char> * gapMask)1186 void Clustal::calcGapPenaltyMask(int prfLength, vector<char>* mask, vector<char>* gapMask)
1187 {
1188     int i,j;
1189 
1190     vector<char> structMask;
1191     structMask.resize(prfLength + 1);
1192 
1193     int _helixEndPlus = userParameters->getHelixEndPlus();
1194     int _helixEndMinus = userParameters->getHelixEndMinus();
1195     int _strandEndPlus = userParameters->getStrandEndPlus();
1196     int _strandEndMinus = userParameters->getStrandEndMinus();
1197 
1198     i = 0;
1199     while (i < prfLength)
1200     {
1201         if (tolower((*mask)[i]) == 'a' || (*mask)[i] == '$')
1202         {
1203             for (j = -_helixEndPlus; j < 0; j++)
1204             {
1205                 if(i + j < (int)structMask.size())
1206                 {
1207                     if ((i + j >= 0) && (tolower(structMask[i + j]) != 'a') &&
1208                         (tolower(structMask[i + j]) != 'b'))
1209                     {
1210                         structMask[i + j] = 'a';
1211                     }
1212                 }
1213             }
1214             for (j = 0; j < _helixEndMinus; j++)
1215             {
1216                 if (i + j >= prfLength || (tolower((*mask)[i + j]) != 'a'
1217                                     && (*mask)[i + j] != '$'))
1218                 {
1219                     break;
1220                 }
1221                 structMask[i + j] = 'a';
1222             }
1223             i += j;
1224             while (tolower((*mask)[i]) == 'a' || (*mask)[i] == '$')
1225             {
1226                 if (i >= prfLength)
1227                 {
1228                     break;
1229                 }
1230                 if ((*mask)[i] == '$')
1231                 {
1232                     structMask[i] = 'A';
1233                     i++;
1234                     break;
1235                 }
1236                 else
1237                 {
1238                     structMask[i] = (*mask)[i];
1239                 }
1240                 i++;
1241             }
1242             for (j = 0; j < _helixEndMinus; j++)
1243             {
1244                 if ((i - j - 1 >= 0) && (tolower((*mask)[i - j - 1]) == 'a'
1245                                          || (*mask)[i - j - 1] == '$'))
1246                 {
1247                     structMask[i - j - 1] = 'a';
1248                 }
1249             }
1250             for (j = 0; j < _helixEndPlus; j++)
1251             {
1252                 if (i + j >= prfLength)
1253                 {
1254                     break;
1255                 }
1256                 structMask[i+j] = 'a';
1257             }
1258         }
1259         else if (tolower((*mask)[i]) == 'b' || (*mask)[i] == '%')
1260         {
1261             for (j = -_strandEndPlus; j < 0; j++)
1262             {
1263                 if ((i + j >= 0) && (tolower(structMask[i + j]) != 'a')
1264                                  && (tolower(structMask[i + j]) != 'b'))
1265                 {
1266                     structMask[i + j] = 'b';
1267                 }
1268             }
1269             for (j = 0; j < _strandEndPlus; j++)
1270             {
1271                 if (i + j >= prfLength || (tolower((*mask)[i + j]) != 'b'
1272                                           && (*mask)[i + j] != '%'))
1273                 {
1274                     break;
1275                 }
1276                 structMask[i+j] = 'b';
1277             }
1278             i += j;
1279             while (tolower((*mask)[i]) == 'b' || (*mask)[i] == '%')
1280             {
1281                 if (i >= prfLength)
1282                 {
1283                     break;
1284                 }
1285                 if ((*mask)[i] == '%')
1286                 {
1287                     structMask[i] = 'B';
1288                     i++;
1289                     break;
1290                 }
1291                 else
1292                 {
1293                     structMask[i] = (*mask)[i];
1294                 }
1295                 i++;
1296             }
1297             for (j = 0; j < _strandEndMinus; j++)
1298             {
1299                 if ((i-j-1>=0) && (tolower((*mask)[i-j-1]) == 'b' || (*mask)[i-j-1] == '%'))
1300                 {
1301                     structMask[i - j - 1] = 'b';
1302                 }
1303             }
1304             for (j = 0; j < _strandEndPlus; j++)
1305             {
1306                 if (i + j >= prfLength)
1307                 {
1308                     break;
1309                 }
1310                 structMask[i+j] = 'b';
1311             }
1312         }
1313         else
1314         {
1315             i++;
1316         }
1317     }
1318 
1319     for(i = 0; i < prfLength;i++)
1320     {
1321         switch (structMask[i])
1322         {
1323             case 'A':
1324                 (*gapMask)[i] = userParameters->getHelixPenalty() + '0';
1325                 break;
1326             case 'a':
1327                 (*gapMask)[i] = userParameters->getHelixEndPenalty() + '0';
1328                 break;
1329             case 'B':
1330                 (*gapMask)[i] = userParameters->getStrandPenalty() +'0';
1331                 break;
1332             case 'b':
1333                 (*gapMask)[i] = userParameters->getStrandEndPenalty() + '0';
1334                 break;
1335             default:
1336                 (*gapMask)[i] = userParameters->getLoopPenalty() + '0';
1337                 break;
1338         }
1339     }
1340 }
1341 
1342 
1343 /**
1344  * The function QTcalcLowScoreSegments is used to calculate the residues in the sequences
1345  * that score badly.
1346  * @param firstSeq first seq in the alignment or profile
1347  * @param nSeqs the number of sequences
1348  * @param nCols the length of the longest seq
1349  * @param seqWeight a vector to hold the sequence weights
1350  * @param lowScoreRes
1351  * @param seqWeightCalculated
1352  */
QTcalcLowScoreSegments(LowScoreSegParams * params)1353 void Clustal::QTcalcLowScoreSegments(LowScoreSegParams* params)
1354 {
1355     int i, j;
1356     float sum, prevSum;
1357     float gscale;
1358     vector<int> weight;
1359     int sweight;
1360     vector<int> gaps;
1361     int matrix[NUMRES][NUMRES];
1362     vector<float> fsum;
1363     vector<float> bsum;
1364     vector<float> pscore;
1365     int _maxAA = userParameters->getMaxAA();
1366 
1367     // STEP 1: Calculate the sequence weights
1368     QTcalcWeightsForLowScoreSeg(params);
1369 
1370     subMatrix->getQTMatrixForLowScoreSeg(matrix);
1371 
1372     const SeqArray* seqArray = alignmentObj.getSeqArray();
1373 
1374     gaps.resize(params->nCols + 1);
1375 
1376     for (j = 1; j <= params->nCols; j++)
1377     {
1378         gaps[j - 1] = 0;
1379         for(i = params->firstSeq + 1; i < params->firstSeq + params->nSeqs; i++)
1380         {
1381             if (j < alignmentObj.getSeqLength(i))
1382             {
1383                 if (((*seqArray)[i][j] < 0) || ((*seqArray)[i][j] > _maxAA))
1384                 {
1385                     gaps[j-1]++;
1386                 }
1387             }
1388         }
1389     }
1390 
1391     // STEP 2: Calculate the profile
1392     LowScoreSegProfile lowScoreProfile(params->nCols, params->firstSeq,
1393                                        params->firstSeq + params->nSeqs);
1394 
1395     weight.resize(params->firstSeq + params->nSeqs + 1);
1396 
1397     for(i = params->firstSeq;i < params->firstSeq + params->nSeqs;i++)
1398     {
1399         weight[i] = (*(params->seqWeight))[i - params->firstSeq];
1400     }
1401 
1402     lowScoreProfile.calcLowScoreSegProfile(seqArray, matrix, &weight);
1403 
1404     const SeqArray* profile = lowScoreProfile.getProfilePtr();
1405 
1406     sweight = 0;
1407     for(i = params->firstSeq; i < params->firstSeq + params->nSeqs; i++)
1408     {
1409         sweight += weight[i];
1410     }
1411 
1412     //Now, use the profile scores to mark segments of each sequence which score badly.
1413 
1414     fsum.resize(params->nCols + 2);
1415     bsum.resize(params->nCols + 2);
1416     pscore.resize(params->nCols + 2);
1417 
1418     for(i = params->firstSeq + 1; i < params->firstSeq + params->nSeqs + 1; i++)
1419     {
1420     // In a forward phase, sum the profile scores. Mark negative sums as exceptions.
1421     //If the sum is positive, then it gets reset to 0.
1422         sum = 0.0;
1423         for(j = 1; j <= alignmentObj.getSeqLength(i); j++)
1424         {
1425             gscale = (float)(params->nSeqs - gaps[j - 1]) / (float)params->nSeqs;
1426             if((*seqArray)[i][j] < 0 || (*seqArray)[i][j] >= _maxAA)
1427             {
1428                 pscore[j - 1] = 0.0;
1429                 sum = 0.0;
1430             }
1431             else
1432             {
1433                 pscore[j-1]=((*profile)[j][(*seqArray)[i][j]]- weight[i - 1] *
1434                              matrix[(*seqArray)[i][j]][(*seqArray)[i][j]]) * gscale / sweight;
1435             }
1436             sum += pscore[j - 1];
1437             if(sum > 0.0)
1438             {
1439                 sum = 0.0;
1440             }
1441             fsum[j - 1] = sum;
1442         }
1443 // trim off any positive scoring residues from the end of the segments
1444         prevSum = 0;
1445         for(j = alignmentObj.getSeqLength(i) - 1; j >= 0; j--)
1446         {
1447             if(prevSum >= 0.0 && fsum[j] < 0.0 && pscore[j] >= 0.0)
1448             {
1449                 fsum[j] = 0.0;
1450             }
1451             prevSum = fsum[j];
1452         }
1453 
1454 // Now, in a backward phase, do the same summing process.
1455         sum = 0.0;
1456         for(j = alignmentObj.getSeqLength(i); j >= 1; j--)
1457         {
1458             if((*seqArray)[i][j] < 0 || (*seqArray)[i][j] >= _maxAA)
1459             {
1460                 sum = 0;
1461             }
1462             else
1463             {
1464                 sum += pscore[j - 1];
1465             }
1466             if(sum > 0.0)
1467             {
1468                 sum = 0.0;
1469             }
1470             bsum[j - 1] = sum;
1471         }
1472 // trim off any positive scoring residues from the start of the segments
1473         prevSum = 0;
1474         for(j = 0; j < alignmentObj.getSeqLength(i); j++)
1475         {
1476             if(prevSum >= 0.0 && bsum[j] < 0.0 && pscore[j] >= 0.0)
1477             {
1478                 bsum[j] = 0.0;
1479             }
1480             prevSum = bsum[j];
1481         }
1482 //Mark residues as exceptions if they score negative in the forward AND backward directions.
1483         for(j = 1; j <= alignmentObj.getSeqLength(i); j++)
1484         {
1485             if(fsum[j - 1] < 0.0 && bsum[j - 1] < 0.0)
1486             {
1487                 if((*seqArray)[i][j] >= 0 && (*seqArray)[i][j] < _maxAA)
1488                 {
1489                     (*(params->lowScoreRes))[i - params->firstSeq - 1][j - 1] = -1;
1490                 }
1491             }
1492         }
1493     }
1494 
1495 // Finally, apply the length cutoff to the segments - removing segments shorter
1496 //than the cutoff
1497 
1498     QTremoveShortSegments(params);
1499 
1500 }
1501 
QTremoveShortSegments(LowScoreSegParams * params)1502 void Clustal::QTremoveShortSegments(LowScoreSegParams* params)
1503 {
1504     int i,j,k,start;
1505     //panel_data data;
1506 
1507     //GetPanelExtra(p,&data);
1508     if(params->nSeqs <= 0)
1509         return;
1510 
1511 // Reset all the exceptions - a value of 1 indicates an exception that
1512 // will be displayed. A value of -1 is used to remember exceptions that
1513 // are temporarily hidden in the display
1514 
1515     for(i = 0; i < params->nSeqs; i++)
1516     {
1517         for(j = 0; j < params->nCols; j++)
1518         {
1519             if((*(params->lowScoreRes))[i][j] == -1)
1520             {
1521                 (*(params->lowScoreRes))[i][j] = 1;
1522             }
1523         }
1524     }
1525 
1526     for(i = 0; i < params->nSeqs; i++)
1527     {
1528         start = -1;
1529         for(j = 0; j <= params->nCols; j++)
1530         {
1531             if(start == -1)
1532             {
1533                 if((*(params->lowScoreRes))[i][j] == 1)
1534                     start = j;
1535             }
1536             else
1537             {
1538                 if(j == params->nCols || (*(params->lowScoreRes))[i][j] == 0)
1539                 {
1540                     if(j - start < userParameters->getQTminLenLowScoreSegment())
1541                     {
1542                         for(k = start; k < j; k++)
1543                         {
1544                             (*(params->lowScoreRes))[i][k] = -1;
1545                         }
1546                     }
1547                     start = -1;
1548                 }
1549             }
1550         }
1551     }
1552 }
1553 
1554 /**
1555  * Change: Mark 22-5-07, changed the distmatrix to be the size of alignObject.numSeqs
1556  */
QTcalcWeightsForLowScoreSeg(LowScoreSegParams * params)1557 void Clustal::QTcalcWeightsForLowScoreSeg(LowScoreSegParams* params)
1558 {
1559     int i, j;
1560     vector<int> weight;
1561     float dscore;
1562     DistMatrix distMat(alignmentObj.getNumSeqs() + 1); // Mark: changed size
1563     // Aw potential trouble here: what if we don't have write
1564     // permission to current directory?
1565 #ifdef UNIX
1566     char treeName[FILENAMELEN]=".score.ph";
1567 #else
1568     char treeName[FILENAMELEN]="tmp.ph";
1569 #endif
1570 
1571     if(params->nSeqs <= 0)
1572     {
1573         return;
1574     }
1575 
1576 // if sequence weights have been calculated before - don't bother
1577 //doing it again (it takes too long). data.seqweight is set to NULL when
1578 // new sequences are loaded. /
1579     if(params->seqWeightCalculated == true)
1580     {
1581         return;
1582     }
1583 
1584     utilityObject->info("Calculating sequence weights...");
1585 
1586     // count pairwise percent identities to make a phylogenetic tree //
1587     if(params->nSeqs >= 2)
1588     {       //i=firstSeq + 1; i <= firstSeq + nSeqs
1589         for (i = params->firstSeq + 1; i <= params->firstSeq + params->nSeqs; i++)
1590         {   //j=i + 1; i <= firstSeq + nSeqs
1591             for (j = i + 1; j <= params->firstSeq + params->nSeqs; j++) // Mark 22-5-07
1592             {
1593                 dscore = alignmentObj.countid(i, j); // Mark 22-5-07
1594                 distMat(i, j) = (100.0 - dscore) / 100.0;
1595                 distMat(j, i) = distMat(i, j);
1596             }
1597         }
1598         string name = string(treeName);
1599         bool success = false;
1600         weight.resize(params->firstSeq + params->nSeqs + 1);
1601         TreeInterface tree;
1602         tree.getWeightsForQtLowScore(&weight, &distMat, &alignmentObj, params->firstSeq + 1,
1603                                    params->nSeqs, &name, &success); // Mark change 10-5-07
1604         if(!success)
1605         {
1606             return;
1607         }
1608 
1609         for(i = params->firstSeq;i < params->firstSeq + params->nSeqs;i++)
1610         {
1611             (*(params->seqWeight))[i - params->firstSeq] = weight[i];
1612         }
1613 
1614         utilityObject->info("Done.");
1615     }
1616 }
1617 
QTSetFileNamesForOutput(AlignmentFileNames fileNames)1618 void Clustal::QTSetFileNamesForOutput(AlignmentFileNames fileNames)
1619 {
1620     QTFileNames = fileNames;
1621 }
1622 
QTRealignSelectedRange(AlignmentFileNames fileNames,int beginPos,int endPos,bool realignEndGapPen)1623 bool Clustal::QTRealignSelectedRange(AlignmentFileNames fileNames, int beginPos, int endPos, bool realignEndGapPen)
1624 {
1625     bool alignEndGapPen = userParameters->getEndGapPenalties();
1626 
1627     Alignment saveOldAlign = alignmentObj; // Take a copy of it. Note provided copy
1628                                            // constructor is ok.
1629     bool ok;
1630     ok = alignmentObj.removeAllOutsideRange(beginPos, endPos);
1631 
1632     if(!ok)
1633     {
1634         alignmentObj = saveOldAlign;
1635         return false;
1636     }
1637     // Temporarily set the alignment output to be input
1638     int saveOutOrder = userParameters->getOutputOrder();
1639     userParameters->setOutputOrder(INPUT);
1640 
1641     //set end gap penalties to be realignEndGapPenalties
1642     userParameters->setEndGapPenalties(realignEndGapPen);
1643     //do the alignment
1644     if(alignmentObj.getNumSeqs() <= 0)
1645     {
1646         alignmentObj = saveOldAlign;
1647         return false;
1648     }
1649     QTSetFileNamesForOutput(fileNames);
1650     string phylipName = fileNames.treeFile;
1651     align(&phylipName, false);
1652 
1653     userParameters->setOutputOrder(saveOutOrder);
1654 
1655     // reset the end gap penalties
1656     userParameters->setEndGapPenalties(alignEndGapPen);
1657 
1658     // remove postions that only contain gaps
1659     int nSeqs = alignmentObj.getNumSeqs();
1660     alignmentObj.removeAllGapOnlyColumns(1, nSeqs, 0);
1661 
1662     // save it to a temporary area.
1663     SeqArray realignedArea = *(alignmentObj.getSeqArray());
1664 
1665     // Paste it back into the original alignment.
1666     alignmentObj = saveOldAlign;
1667     bool result;
1668     result = alignmentObj.updateRealignedRange(realignedArea, beginPos, endPos);
1669     if(result == false)
1670     {
1671         utilityObject->error("something went wrong while updating the realigned range\n");
1672     }
1673 
1674     // output the alignments
1675     AlignmentOutput alignOutput;
1676     if(!alignOutput.QTOpenFilesForOutput(QTFileNames))
1677     {
1678         return false; // could not open the files.
1679     }
1680 
1681     alignOutput.createAlignmentOutput(&alignmentObj, 1, nSeqs);
1682 
1683     return true;
1684 }
1685 
test()1686 void Clustal::test()
1687 {
1688     cout << "RUNNING TEST\n";
1689     AlignmentOutput alignOutput;
1690     string path;
1691     utilityObject->getPath(userParameters->getSeqName(), &path);
1692 
1693     if(!alignOutput.openAlignmentOutput(path))
1694     {
1695         cerr << "could not open the file\n";
1696         return;
1697     }
1698 
1699     vector<int> selected;
1700     int nSeqs = alignmentObj.getNumSeqs();
1701     selected.resize(nSeqs + 1, 0);
1702     selected[9] = 1;
1703     selected[10] = 1;
1704     //selected[1] = 1;
1705     alignmentObj.removeGapOnlyColsFromSelectedSeqs(&selected);
1706     alignOutput.createAlignmentOutput(&alignmentObj, 1, nSeqs);
1707 }
1708 
1709 /**
1710  * This function is used to ask the user if they would like to use an existing guide tree.
1711  * Note: It expects that phylipName actually points to a string that has been allocated.
1712  */
useExistingGuideTree(int type,string * phylipName,const string & path)1713 bool Clustal::useExistingGuideTree(int type, string* phylipName, const string& path)
1714 {
1715     bool useTree = false;
1716     string treeName;
1717     InFileStream _treeFile;  //nige
1718     bool paramUseTree;
1719 
1720     string* ptrToMsg;
1721     if(type == Sequences)
1722     {
1723         ptrToMsg = &sequencesMsg;
1724         paramUseTree = userParameters->getUseTreeFile();
1725     }
1726     else if(type == Profile1)
1727     {
1728         ptrToMsg = &profile1Msg;
1729         paramUseTree = userParameters->getUseTree1File();;
1730     }
1731     else if(type == Profile2)
1732     {
1733         ptrToMsg = &profile2Msg;
1734         paramUseTree = userParameters->getUseTree2File();;
1735     }
1736     else
1737     {
1738         ptrToMsg = &sequencesMsg;
1739         paramUseTree = userParameters->getUseTreeFile();
1740     }
1741 
1742     if (checkTree && userParameters->getMenuFlag())
1743     {
1744         treeName = path + "dnd";
1745 
1746         _treeFile.open(treeName.c_str());
1747         _treeFile.seekg(0, std::ios::beg);
1748 
1749         if(_treeFile.is_open())
1750         {
1751             string message = *ptrToMsg + treeName + "  (y/n) ? [y]";
1752             string answer;
1753             utilityObject->getStr(message, answer);
1754 
1755             if(answer[0] != 'n' && answer[0] != 'N')
1756             {
1757                 if(!phylipName)
1758                 {
1759                     phylipName = new string(treeName);
1760                 }
1761                 else
1762                 {
1763                     *phylipName = treeName;
1764                 }
1765                 useTree = true;
1766             }
1767             _treeFile.close();
1768         }
1769     }
1770     else if (!userParameters->getMenuFlag() && paramUseTree)
1771     {
1772         useTree = true;
1773     }
1774 
1775     return useTree;
1776 }
1777 
promptForNewGuideTreeName(int type,string * treeName,const string & path)1778 void Clustal::promptForNewGuideTreeName(int type, string* treeName, const string& path)
1779 {
1780     string* ptrToMsg;
1781     if(type == Profile1)
1782     {
1783         ptrToMsg = &newProfile1TreePrompt;
1784     }
1785     else if(type == Profile2)
1786     {
1787         ptrToMsg = &newProfile2TreePrompt;
1788     }
1789     else
1790     {
1791         ptrToMsg = &newProfile1TreePrompt;
1792     }
1793 
1794     if(!treeName)
1795     {
1796         treeName = new string("");
1797     }
1798 
1799     while(treeName->empty())
1800     {
1801         string message = *ptrToMsg + path + "dnd]";
1802         string answer;
1803         utilityObject->getStr(message, answer);
1804         if(answer.empty())
1805         {
1806             answer = path + "dnd";
1807             *treeName = answer;
1808         }
1809         else
1810         {
1811             *treeName = answer;
1812         }
1813     }
1814 }
1815 
1816 }
1817