1 /**
2  * Author: Mark Larkin
3  *
4  * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson.
5  */
6 #ifdef HAVE_CONFIG_H
7     #include "config.h"
8 #endif
9 #include <stdio.h>
10 #include <string>
11 #include <cstdlib>
12 #include <exception>
13 #include <iostream>
14 #include <sstream>
15 #include "SubMatrix.h"
16 #include "matrices.h"
17 #include "../general/InvalidCombination.cpp"
18 #include "../general/debuglogObject.h"
19 
20 namespace clustalw
21 {
22 using namespace std;
23 
24 /**
25  *
26  * @param log
27  */
SubMatrix()28 SubMatrix::SubMatrix()
29 : sizenAAMatrix(276),
30   sizeDNAMatrix(153),
31   matrixAvgScore(0),
32   QTDNAHistMatNum(DNAIUB),
33   QTAAHistMatNum(AAHISTGONNETPAM250),
34   QTsegmentDNAMatNum(DNAIUB),
35   QTsegmentAAMatNum(QTAASEGGONNETPAM250)
36 {
37     userSeries = false;
38 
39     setUpCrossReferences();
40 
41     #if DEBUGFULL
42         if(logObject && DEBUGLOG)
43         {
44             logObject->logMsg("Creating the SubMatrix object\n");
45         }
46     #endif
47 
48     try
49     {
50         /* Set up the vectors with the matrices defined in matrices.h
51          * The matrices are intially defined as a short array, as there is no way
52          * to intiailize a vector with a {....} list.
53          */
54         blosum30mtVec = new Matrix(blosum30mt, blosum30mt + sizenAAMatrix);
55         blosum40mtVec = new Matrix(blosum40mt, blosum40mt + sizenAAMatrix);
56         blosum45mtVec = new Matrix(blosum45mt, blosum45mt + sizenAAMatrix);
57         blosum62mt2Vec = new Matrix(blosum62mt2, blosum62mt2 + sizenAAMatrix);
58         blosum80mtVec = new Matrix(blosum80mt, blosum80mt + sizenAAMatrix);
59         pam20mtVec = new Matrix(pam20mt, pam20mt + sizenAAMatrix);
60         pam60mtVec = new Matrix(pam60mt, pam60mt + sizenAAMatrix);
61         pam120mtVec = new Matrix(pam120mt, pam120mt + sizenAAMatrix);
62         pam350mtVec = new Matrix(pam350mt, pam350mt + sizenAAMatrix);
63         idmatVec = new Matrix(idmat, idmat + sizenAAMatrix);
64         gon40mtVec = new Matrix(gon40mt, gon40mt + sizenAAMatrix);
65         gon80mtVec = new Matrix(gon80mt, gon80mt + sizenAAMatrix);
66         gon120mtVec = new Matrix(gon120mt, gon120mt + sizenAAMatrix);
67         gon160mtVec = new Matrix(gon160mt, gon160mt + sizenAAMatrix);
68         gon250mtVec = new Matrix(gon250mt, gon250mt + sizenAAMatrix);
69         gon350mtVec = new Matrix(gon350mt, gon350mt + sizenAAMatrix);
70         clustalvdnamtVec = new Matrix(clustalvdnamt, clustalvdnamt + sizeDNAMatrix);
71         swgapdnamtVec = new Matrix(swgapdnamt, swgapdnamt + sizeDNAMatrix);
72 
73         /*
74          * Set up the vectors for user defined types.
75          * Probably dont need to do this, as they may not be used. It would be better
76          * to initialise their size if thye are used.
77          */
78         userMat.resize(NUMRES * NUMRES);
79         pwUserMat.resize(NUMRES * NUMRES);
80         userDNAMat.resize(NUMRES * NUMRES);
81         pwUserDNAMat.resize(NUMRES * NUMRES);
82         QTscoreUserMatrix.resize(NUMRES * NUMRES);
83         QTscoreUserDNAMatrix.resize(NUMRES * NUMRES);
84         QTsegmentDNAMatrix.resize(NUMRES * NUMRES);
85         QTsegmentAAMatrix.resize(NUMRES * NUMRES);
86 
87         userMatSeries.resize(MAXMAT); // Maximum number of matrices
88         vector<Matrix>::iterator firstM = userMatSeries.begin();
89         vector<Matrix>::iterator lastM = userMatSeries.end();
90 
91         while(firstM != lastM)
92         {
93             firstM->resize(NUMRES * NUMRES);
94             firstM++;
95         }
96 
97         // Maybe I should put this in with the other xref intialisations!
98         AAXrefseries.resize(MAXMAT);
99         vector<Xref>::iterator firstX = AAXrefseries.begin();
100         vector<Xref>::iterator lastX = AAXrefseries.end();
101 
102         while(firstX != lastX)
103         {
104             firstX->resize(NUMRES + 1);
105             firstX++;
106         }
107 
108         // Set the defaults.
109         matrixNum = 3;
110         matrixName = new string("gonnet");
111         DNAMatrixNum = 1;
112         DNAMatrixName = new string("iub");
113         pwMatrixNum = 3;
114         pwMatrixName = new string("gonnet");
115         pwDNAMatrixNum = 1;
116         pwDNAMatrixName = new string("iub");
117     }
118     catch(const exception &ex)
119     {
120         cerr << ex.what() << endl;
121         cerr << "Terminating program. Cannot continue\n";
122         exit(1);
123     }
124 }
125 
setValuesToDefault()126 void SubMatrix::setValuesToDefault()
127 {
128     matrixAvgScore = 0;
129     QTDNAHistMatNum = DNAIUB;
130     QTAAHistMatNum = AAHISTGONNETPAM250;
131     QTsegmentDNAMatNum = DNAIUB;
132     QTsegmentAAMatNum = QTAASEGGONNETPAM250;
133     userSeries = false;
134     matrixNum = 3;
135     DNAMatrixNum = 1;
136     pwMatrixNum = 3;
137     pwDNAMatrixNum = 1;
138 }
139 
140 /**
141  * The destructor frees up any dynamically allocated memory.
142  */
~SubMatrix()143 SubMatrix::~SubMatrix()
144 {
145     delete blosum30mtVec;
146     delete blosum40mtVec;
147     delete blosum45mtVec;
148     delete blosum62mt2Vec;
149     delete blosum80mtVec;
150     delete pam20mtVec;
151     delete pam60mtVec;
152     delete pam120mtVec;
153     delete pam350mtVec;
154     delete idmatVec;
155     delete gon40mtVec;
156     delete gon80mtVec;
157     delete gon120mtVec;
158     delete gon160mtVec;
159     delete gon250mtVec;
160     delete gon350mtVec;
161     delete clustalvdnamtVec;
162     delete swgapdnamtVec;
163     delete matrixName;
164     delete DNAMatrixName;
165     delete pwMatrixName;
166     delete pwDNAMatrixName;
167 }
168 
169 /**
170  * This function sets up the initial cross references.
171  */
setUpCrossReferences()172 void SubMatrix::setUpCrossReferences()
173 {
174     char c1, c2;
175     short i, j, maxRes;
176     defaultAAXref.resize(NUMRES + 1);
177     defaultDNAXref.resize(NUMRES + 1);
178 
179     string aminoAcidOrder = "ABCDEFGHIKLMNPQRSTVWXYZ";
180     string nucleicAcidOrder = "ABCDGHKMNRSTUVWXY";
181     /*
182      * I also need to resize the user defined xrefs.
183      */
184     DNAXref.resize(NUMRES + 1);
185     AAXref.resize(NUMRES + 1);
186     pwAAXref.resize(NUMRES + 1);
187     pwDNAXref.resize(NUMRES + 1);
188     QTscoreXref.resize(NUMRES + 1);
189     QTscoreDNAXref.resize(NUMRES + 1);
190     QTsegmentDNAXref.resize(NUMRES + 1);
191     QTsegmentAAXref.resize(NUMRES + 1);
192     /*
193      * set up cross-reference for default matrices hard-coded in matrices.h
194      */
195     for (i = 0; i < NUMRES; i++)
196     {
197         defaultAAXref[i] = -1;
198     }
199     for (i = 0; i < NUMRES; i++)
200     {
201         defaultDNAXref[i] = -1;
202     }
203 
204     maxRes = 0;
205 
206     for (i = 0; (c1 = aminoAcidOrder[i]); i++)
207     {
208         for (j = 0; (c2 = userParameters->getAminoAcidCode(j)); j++)
209         {
210             if (c1 == c2)
211             {
212                 defaultAAXref[i] = j;
213 
214                 maxRes++;
215                 break;
216             }
217         }
218         if ((defaultAAXref[i] ==  - 1) && (aminoAcidOrder[i] != '*'))
219         {
220             utilityObject->error("residue %c in matrices.h is not recognised",
221                 aminoAcidOrder[i]);
222         }
223     }
224 
225     maxRes = 0;
226     for (i = 0; (c1 = nucleicAcidOrder[i]); i++)
227     {
228         for (j = 0; (c2 = userParameters->getAminoAcidCode(j)); j++)
229         {
230             if (c1 == c2)
231             {
232                 defaultDNAXref[i] = j;
233                 maxRes++;
234                 break;
235             }
236         }
237         if ((defaultDNAXref[i] ==  - 1) && (nucleicAcidOrder[i] != '*'))
238         {
239             utilityObject->error("nucleic acid %c in matrices.h is not recognised",
240                 nucleicAcidOrder[i]);
241         }
242     }
243 
244 }
245 
246 /**
247  * The function getPairwiseMatrix is called by the user to get the correct sub matrix for the
248  * pairwise alignment stage. It calls getMatrix to actually calculate the matrix.
249  * This function provides an interface for the user. It allows the user to get the matrix
250  * they wish to use for the pairwise alignment part.
251  * @param matrix[][]
252  * @param scale
253  * @param matAvg
254  * @return
255  */
getPairwiseMatrix(int matrix[NUMRES][NUMRES],PairScaleValues & scale,int & matAvg)256 int SubMatrix::getPairwiseMatrix(int matrix[NUMRES][NUMRES], PairScaleValues& scale,
257                                  int& matAvg)
258 {
259     int _maxRes; // Local copy.
260     /* Pointers to Matrix and xref to use in calculation */
261     Matrix* _matPtr;
262     Xref* _matXref;
263 
264     #if DEBUGFULL
265         if(logObject && DEBUGLOG)
266         {
267             logObject->logMsg("In the function getPairwiseMatrix: \n");
268         }
269     #endif
270 
271     string matrixPointer;
272     string xrefPointer;
273 
274     #ifdef OS_MAC
275         scale.intScale = 10;
276     #else
277         scale.intScale = 100;
278     #endif
279     scale.gapOpenScale = scale.gapExtendScale = 1.0;
280 
281     if (userParameters->getDNAFlag())
282     {
283         #if DEBUGFULL
284             if(logObject && DEBUGLOG)
285             {
286                 string msg = "    (DNA AND Pairwise) " + *pwDNAMatrixName + "\n";
287                 logObject->logMsg(msg);
288             }
289         #endif
290         if (pwDNAMatrixName->compare("iub") == 0)
291         {
292             matrixPointer = "swgapdnamtVec"; xrefPointer = "defaultDNAXref";
293             _matPtr = swgapdnamtVec;
294             _matXref = &defaultDNAXref;
295         }
296         else if (pwDNAMatrixName->compare("clustalw") == 0)
297         {
298             matrixPointer = "clustalvdnamtVec"; xrefPointer = "defaultDNAXref";
299             _matPtr = clustalvdnamtVec;
300             _matXref = &defaultDNAXref;
301             scale.gapOpenScale = 0.6667;
302             scale.gapExtendScale = 0.751;
303         }
304         else
305         {
306             matrixPointer = "pwUserDNAMat"; xrefPointer = "pwDNAXref";
307             _matPtr = &pwUserDNAMat;
308             _matXref = &pwDNAXref;
309         }
310         _maxRes = getMatrix(_matPtr, _matXref, matrix, true, scale.intScale);
311 
312         if (_maxRes == 0)
313         {
314             return ((int) -1);
315         }
316         float _transitionWeight = userParameters->getTransitionWeight();
317         // Mark change 17-5-07
318         matrix[0][4] = static_cast<int>(_transitionWeight * matrix[0][0]);
319         matrix[4][0] = static_cast<int>(_transitionWeight * matrix[0][0]);
320         matrix[2][11] = static_cast<int>(_transitionWeight * matrix[0][0]);
321         matrix[11][2] = static_cast<int>(_transitionWeight * matrix[0][0]);
322         matrix[2][12] = static_cast<int>(_transitionWeight * matrix[0][0]);
323         matrix[12][2] = static_cast<int>(_transitionWeight * matrix[0][0]);
324 
325     }
326     else
327     {
328         #if DEBUGFULL
329             if(logObject && DEBUGLOG)
330             {
331                 string msg = "    (Protein AND Pairwise) " + *pwMatrixName + "\n";
332                 logObject->logMsg(msg);
333             }
334         #endif
335 
336         if (pwMatrixName->compare("blosum") == 0)
337         {
338             matrixPointer = "blosum30mtVec"; xrefPointer = "defaultAAXref";
339             _matPtr = blosum30mtVec;
340             _matXref = &defaultAAXref;
341         }
342         else if (pwMatrixName->compare("pam") == 0)
343         {
344             matrixPointer = "pam350mtVec"; xrefPointer = "defaultAAXref";
345             _matPtr = pam350mtVec;
346             _matXref = &defaultAAXref;
347         }
348         else if (pwMatrixName->compare("gonnet") == 0)
349         {
350             matrixPointer = "gon250mtVec"; xrefPointer = "defaultAAXref";
351             _matPtr = gon250mtVec;
352             scale.intScale /= 10;
353             _matXref = &defaultAAXref;
354         }
355         else if (pwMatrixName->compare("id") == 0)
356         {
357             matrixPointer = "idmatVec"; xrefPointer = "defaultAAXref";
358             _matPtr = idmatVec;
359             _matXref = &defaultAAXref;
360         }
361         else
362         {
363             matrixPointer = "pwUserMat"; xrefPointer = "pwAAXref";
364             _matPtr = &pwUserMat;
365             _matXref = &pwAAXref;
366         }
367 
368         _maxRes = getMatrix(_matPtr, _matXref, matrix, true, scale.intScale);
369 
370         if (_maxRes == 0)
371         {
372             return ((int) -1);
373         }
374     }
375 
376     #if DEBUGFULL
377         if(logObject && DEBUGLOG)
378         {
379             ostringstream outs;
380             outs << "    Called getMatrix with "
381                  << matrixPointer << " and " << xrefPointer << ".\n"
382                  << "    intScale = " << scale.intScale << ", gapOpenScale = "
383                  << scale.gapOpenScale << ", gapExtendScale = " << scale.gapExtendScale
384                  << "\n\n";
385             logObject->logMsg(outs.str());
386         }
387     #endif
388     matAvg = matrixAvgScore;
389     return _maxRes;
390 }
391 
392 /**
393  * The function getProfileAlignMatrix provides an interface for the user to get
394  * the matrix to be used in the profile alignment. This depends on the matrix series
395  * that was chosen, and also on the percent identity.
396  * @param matrix[][]
397  * @param pcid
398  * @param minLen
399  * @param scaleParam
400  * @param matAvg
401  * @return
402  */
getProfileAlignMatrix(int matrix[NUMRES][NUMRES],double pcid,int minLen,PrfScaleValues & scaleParam,int & matAvg)403 int SubMatrix::getProfileAlignMatrix(int matrix[NUMRES][NUMRES], double pcid, int minLen,
404                                   PrfScaleValues& scaleParam, int& matAvg)
405 {
406     bool found = false;
407     bool errorGiven = false;
408     bool _negMatrix = userParameters->getUseNegMatrix();
409     int i = 0, j = 0;
410     int _maxRes = 0;
411     scaleParam.intScale = 100;
412     string matrixPointer;
413     string xrefPointer;
414 
415     #if DEBUGFULL
416         if(logObject && DEBUGLOG)
417         {
418             logObject->logMsg("In the function getProfileAlignMatrix: \n");
419         }
420     #endif
421     if (userParameters->getDNAFlag())
422     {
423         #if DEBUGFULL
424             if(logObject && DEBUGLOG)
425             {
426                 ostringstream outs;
427                 outs << "    (DNA AND Multiple align) "<< DNAMatrixName->c_str() << "\n";
428                 logObject->logMsg(outs.str());
429             }
430         #endif
431         scaleParam.scale = 1.0;
432         if (DNAMatrixName->compare("iub") == 0)
433         {
434             _matPtr = swgapdnamtVec;
435             _matXref = &defaultDNAXref;
436             matrixPointer = "swgapdnamtVec"; xrefPointer = "defaultDNAXref";
437         }
438         else if (DNAMatrixName->compare("clustalw") == 0)
439         {
440             _matPtr = clustalvdnamtVec;
441             _matXref = &defaultDNAXref;
442             scaleParam.scale = 0.66;
443             matrixPointer = "clustalvdnamtVec"; xrefPointer = "defaultDNAXref";
444         }
445         else
446         {
447             _matPtr = &userDNAMat;
448             _matXref = &DNAXref;
449             matrixPointer = "userDNAMat"; xrefPointer = "DNAXref";
450         }
451 
452         _maxRes = getMatrix(_matPtr, _matXref, matrix, _negMatrix,
453                             static_cast<int>(scaleParam.intScale)); // Mark change 17-5-07
454         if (_maxRes == 0)
455         {
456             return ((int) - 1);
457         }
458 
459         float _transitionWeight = userParameters->getTransitionWeight();
460         // fix suggested by Chanan Rubin at Compugen
461         matrix[(*_matXref)[0]][(*_matXref)[4]] =
462                             static_cast<int>(_transitionWeight * matrix[0][0]);
463         matrix[(*_matXref)[4]][(*_matXref)[0]] =
464                             static_cast<int>(_transitionWeight * matrix[0][0]);
465         matrix[(*_matXref)[2]][(*_matXref)[11]] =
466                             static_cast<int>(_transitionWeight * matrix[0][0]);
467         matrix[(*_matXref)[11]][(*_matXref)[2]] =
468                             static_cast<int>(_transitionWeight * matrix[0][0]);
469         matrix[(*_matXref)[2]][(*_matXref)[12]] =
470                             static_cast<int>(_transitionWeight * matrix[0][0]);
471         matrix[(*_matXref)[12]][(*_matXref)[2]] =
472                             static_cast<int>(_transitionWeight * matrix[0][0]);
473 
474     }
475     else // Amino acid alignment!!!!
476     {
477         #if DEBUGFULL
478             if(logObject && DEBUGLOG)
479             {
480                 ostringstream outs;
481                 outs << "    (Protein AND Multiple align) "<< matrixName->c_str() << "\n";
482                 logObject->logMsg(outs.str());
483             }
484         #endif
485 
486         scaleParam.scale = 0.75;
487         if (matrixName->compare("blosum") == 0)
488         {
489             scaleParam.scale = 0.75;
490             if (_negMatrix || userParameters->getDistanceTree() == false)
491             {
492                 _matPtr = blosum40mtVec;
493                 matrixPointer = "blosum40mtVec";
494             }
495             else if (pcid > 80.0)
496             {
497                 _matPtr = blosum80mtVec;
498                 matrixPointer = "blosum80mtVec";
499             }
500             else if (pcid > 60.0)
501             {
502                 _matPtr = blosum62mt2Vec;
503                 matrixPointer = "blosum62mt2Vec";
504             }
505             else if (pcid > 40.0)
506             {
507                 _matPtr = blosum45mtVec;
508                 matrixPointer = "blosum45mtVec";
509             }
510             else if (pcid > 30.0)
511             {
512                 scaleParam.scale = 0.5;
513                 _matPtr = blosum45mtVec;
514                 matrixPointer = "blosum45mtVec";
515             }
516             else if (pcid > 20.0)
517             {
518                 scaleParam.scale = 0.6;
519                 _matPtr = blosum45mtVec;
520                 matrixPointer = "blosum45mtVec";
521             }
522             else
523             {
524                 scaleParam.scale = 0.6;
525                 _matPtr = blosum30mtVec;
526                 matrixPointer = "blosum30mtVec";
527             }
528             _matXref = &defaultAAXref;
529             xrefPointer = "defaultAAXref";
530         }
531         else if (matrixName->compare("pam") == 0)
532         {
533             scaleParam.scale = 0.75;
534             if (_negMatrix || userParameters->getDistanceTree() == false)
535             {
536                 _matPtr = pam120mtVec;
537                 matrixPointer = "pam120mtVec";
538             }
539             else if (pcid > 80.0)
540             {
541                 _matPtr = pam20mtVec;
542                 matrixPointer = "pam20mtVec";
543             }
544             else if (pcid > 60.0)
545             {
546                 _matPtr = pam60mtVec;
547                 matrixPointer = "pam60mtVec";
548             }
549             else if (pcid > 40.0)
550             {
551                 _matPtr = pam120mtVec;
552                 matrixPointer = "pam120mtVec";
553             }
554             else
555             {
556                 _matPtr = pam350mtVec;
557                 matrixPointer = "pam350mtVec";
558             }
559             _matXref = &defaultAAXref;
560             xrefPointer = "defaultAAXref";
561         }
562         else if (matrixName->compare("gonnet") == 0)
563         {
564             scaleParam.scale /= 2.0;
565             if (_negMatrix || userParameters->getDistanceTree() == false)
566             {
567                 _matPtr = gon250mtVec;
568                 matrixPointer = "gon250mtVec";
569             }
570             else if (pcid > 35.0)
571             {
572                 _matPtr = gon80mtVec;
573                 scaleParam.scale /= 2.0;
574                 matrixPointer = "gon80mtVec";
575             }
576             else if (pcid > 25.0)
577             {
578                 if (minLen < 100)
579                 {
580                     _matPtr = gon250mtVec;
581                     matrixPointer = "gon250mtVec";
582                 }
583                 else
584                 {
585                     _matPtr = gon120mtVec;
586                     matrixPointer = "gon120mtVec";
587                 }
588             }
589             else
590             {
591                 if (minLen < 100)
592                 {
593                     _matPtr = gon350mtVec;
594                     matrixPointer = "gon350mtVec";
595                 }
596                 else
597                 {
598                     _matPtr = gon160mtVec;
599                     matrixPointer = "gon160mtVec";
600                 }
601             }
602             _matXref = &defaultAAXref;
603             xrefPointer = "defaultAAXref";
604             scaleParam.intScale /= 10;
605         }
606         else if (matrixName->compare("id") == 0)
607         {
608             _matPtr = idmatVec;
609             _matXref = &defaultAAXref;
610             xrefPointer = "defaultAAXref";
611             matrixPointer = "idmatVec";
612         }
613         else if (userSeries)
614         {
615             _matPtr = NULL;
616             found = false;
617             for (i = 0; i < matSeries.nmat; i++)
618             {
619                 if (pcid >= matSeries.mat[i].llimit && pcid <=
620                     matSeries.mat[i].ulimit)
621                 {
622                     j = i;
623                     found = true;
624                     break;
625                 }
626             }
627             if (found == false)
628             {
629                 if (!errorGiven)
630                 {
631                     utilityObject->warning(
632                         "\nSeries matrix not found for sequence percent identity = %d.\n""(Using first matrix in series as a default.)\n""This alignment may not be optimal!\n""SUGGESTION: Check your matrix series input file and try again.", (int)pcid);
633                 }
634                 errorGiven = true;
635                 j = 0;
636             }
637 
638             _matPtr = matSeries.mat[j].matptr;
639             _matXref = matSeries.mat[j].AAXref;
640             // this gives a scale of 0.5 for pcid=llimit and 1.0 for pcid=ulimit
641             scaleParam.scale = 0.5 + (pcid - matSeries.mat[j].llimit) / (
642                 (matSeries.mat[j].ulimit - matSeries.mat[j].llimit) *2.0);
643             xrefPointer = "matSeries.mat[j].AAXref";
644             matrixPointer = "matSeries.mat[j].matptr";
645         }
646         else
647         {
648             _matPtr = &userMat;
649             _matXref = &AAXref;
650             xrefPointer = "AAXref";
651             matrixPointer = "userMat";
652         }
653 
654         _maxRes = getMatrix(_matPtr, _matXref, matrix, _negMatrix,
655                             static_cast<int>(scaleParam.intScale));
656         if (_maxRes == 0)
657         {
658             cerr << "Error: matrix " << matrixName << " not found\n";
659             return ( - 1);
660         }
661     }
662 
663     #if DEBUGFULL
664         if(logObject && DEBUGLOG)
665         {
666             ostringstream outs;
667             outs << "    Called getMatrix with "
668                  << matrixPointer << " and " << xrefPointer << ".\n"
669                  << "    intScale = " << scaleParam.intScale << ", scale = "
670                  << scaleParam.scale << ", pcid = " << pcid << "\n\n";
671             logObject->logMsg(outs.str());
672         }
673     #endif
674 
675     matAvg = matrixAvgScore;
676     return _maxRes;
677 }
678 
679 /**
680  * The function getMatrix is what the other parts of the code call to get a useable
681  * substitution matrix. This is stored in matrix[NUMRES][NUMRES].
682  * @param matptr
683  * @param xref
684  * @param matrix[][]
685  * @param negFlag
686  * @param scale
687  * @return
688  */
getMatrix(Matrix * matptr,Xref * xref,int matrix[NUMRES][NUMRES],bool negFlag,int scale,bool minimise)689 int SubMatrix::getMatrix(Matrix* matptr, Xref* xref, int matrix[NUMRES][NUMRES],
690                          bool negFlag, int scale, bool minimise)
691 {
692     int ggScore = 0;
693     int grScore = 0;
694     int i, j, k, ix = 0;
695     int ti, tj;
696     int maxRes;
697     int av1, av2, av3, min, max;
698 
699     for (i = 0; i < NUMRES; i++)
700     {
701         for (j = 0; j < NUMRES; j++)
702         {
703             matrix[i][j] = 0;
704         }
705     }
706 
707     ix = 0;
708     maxRes = 0;
709     for (i = 0; i <= userParameters->getMaxAA(); i++)
710     {
711         ti = (*xref)[i];
712         for (j = 0; j <= i; j++)
713         {
714             tj = (*xref)[j];
715             if ((ti !=  - 1) && (tj !=  - 1))
716             {
717                 k = (*matptr)[ix];
718                 if (ti == tj)
719                 {
720                     matrix[ti][ti] = k * scale;
721                     maxRes++;
722                 }
723                 else
724                 {
725                     matrix[ti][tj] = k * scale;
726                     matrix[tj][ti] = k * scale;
727                 }
728                 ix++;
729             }
730         }
731     }
732 
733     --maxRes;
734 
735     av1 = av2 = av3 = 0;
736     for (i = 0; i <= userParameters->getMaxAA(); i++)
737     {
738         for (j = 0; j <= i; j++)
739         {
740             av1 += matrix[i][j];
741             if (i == j)
742             {
743                 av2 += matrix[i][j];
744             }
745             else
746             {
747                 av3 += matrix[i][j];
748             }
749         }
750     }
751 
752     av1 /= (maxRes * maxRes) / 2;
753     av2 /= maxRes;
754     av3 = static_cast<int>(av3 / (((float)(maxRes * maxRes - maxRes)) / 2));
755     matrixAvgScore =  - av3;
756 
757     min = max = matrix[0][0];
758     for (i = 0; i <= userParameters->getMaxAA(); i++)
759     for (j = 1; j <= i; j++)
760     {
761         if (matrix[i][j] < min)
762         {
763             min = matrix[i][j];
764         }
765         if (matrix[i][j] > max)
766         {
767             max = matrix[i][j];
768         }
769     }
770     //cout << "MAX = " << max << "\n";
771     if(!minimise)
772     {
773         /*
774             if requested, make a positive matrix - add -(lowest score) to every entry
775         */
776         if (negFlag == false)
777         {
778             if (min < 0)
779             {
780                 for (i = 0; i <= userParameters->getMaxAA(); i++)
781                 {
782                     ti = (*xref)[i];
783                     if (ti !=  - 1)
784                     {
785                         for (j = 0; j <= userParameters->getMaxAA(); j++)
786                         {
787                             tj = (*xref)[j];
788 
789                             if (tj !=  - 1)
790                             {
791                                 matrix[ti][tj] -= min;
792                             }
793                         }
794                     }
795                 }
796             }
797         }
798 
799         // local copies of the gap positions
800         int _gapPos1 = userParameters->getGapPos1();
801         int _gapPos2 = userParameters->getGapPos2();
802 
803         for (i = 0; i < userParameters->getGapPos1(); i++)
804         {
805             matrix[i][_gapPos1] = grScore;
806             matrix[_gapPos1][i] = grScore;
807             matrix[i][_gapPos2] = grScore;
808             matrix[_gapPos2][i] = grScore;
809         }
810         matrix[_gapPos1][_gapPos1] = ggScore;
811         matrix[_gapPos2][_gapPos2] = ggScore;
812         matrix[_gapPos2][_gapPos1] = ggScore;
813         matrix[_gapPos1][_gapPos2] = ggScore;
814     }
815     else
816     {
817         // DO THE SAGA MATRIX
818         for (i = 0; i <= userParameters->getMaxAA(); i++)
819         {
820             for (j = 0; j <= userParameters->getMaxAA(); j++)
821             {
822                 matrix[i][j] = max - matrix[i][j];
823             }
824         }
825     }
826     maxRes += 2;
827 
828     return (maxRes);
829 }
830 
831 /**
832  * The function getUserMatFromFile is used to read in a user defined matrix.
833  * @param str
834  * @param alignResidueType
835  * @param alignType
836  * @return
837  */
getUserMatFromFile(char * str,int alignResidueType,int alignType)838 bool SubMatrix::getUserMatFromFile(char *str, int alignResidueType, int alignType)
839 {
840     int maxRes;
841 
842     FILE *infile;
843     // Need to check if the values are a valid combination!
844     checkResidueAndAlignType(alignResidueType, alignType);
845 
846     if(userParameters->getMenuFlag())
847     {
848         utilityObject->getStr(string("Enter name of the matrix file"), line2);
849     }
850     else
851     {
852         line2 = string(str);
853     }
854 
855     if(line2.size() == 0)
856     {
857         return false;
858     }
859 
860     if((infile = fopen(line2.c_str(), "r")) == NULL)
861     {
862         utilityObject->error("Cannot find matrix file [%s]", line2.c_str());
863         return false;
864     }
865 
866     strcpy(str, line2.c_str());
867     // Find out which part of the code we are reading the matrix in for.
868     mat = getUserMatAddress(alignResidueType, alignType);
869     xref = getUserXrefAddress(alignResidueType, alignType);
870 
871     if ((alignResidueType == Protein) && (alignType == MultipleAlign))
872     {
873         // Try read in a matrix series.
874         maxRes = readMatrixSeries(str, userMat, AAXref);
875     }
876     else // Read in a single matrix!!!
877     {
878         maxRes = readUserMatrix(str, *mat, *xref);
879     }
880 
881     if (maxRes <= 0) return false;
882 
883     return true;
884 }
885 
886 /**
887  * The function compareMatrices is used to compare 2 matrices that have been read in.
888  * It will compare them in a given region. It will not compare all of them, as some of it
889  * will be random memory.
890  * @param mat1[][]
891  * @param mat2[][]
892  */
compareMatrices(int mat1[NUMRES][NUMRES],int mat2[NUMRES][NUMRES])893 void SubMatrix::compareMatrices(int mat1[NUMRES][NUMRES], int mat2[NUMRES][NUMRES])
894 {
895     int same = 1;
896     for(int row = 0; row < NUMRES; row++)
897     {
898         for(int col = 0; col < NUMRES; col++)
899         {
900             if(mat1[row][col] != mat2[row][col])
901             {
902                 same = 0;
903                 cout << "The row is " << row << ". The column is " << col << endl;
904                 break; // It is not the same. End the loop.
905             }
906         }
907     }
908 
909     if(same == 0)
910     {
911         cout << "It was not the same\n";
912     }
913     else
914     {
915         cout << "It is the same\n";
916     }
917 }
918 
919 /**
920  * This function is simply to display the results of the getMatrix function.
921  * This is so that I can compare it to the original clustal version of it.
922  * @param mat[][]
923  */
printGetMatrixResults(int mat[NUMRES][NUMRES])924 void SubMatrix::printGetMatrixResults(int mat[NUMRES][NUMRES])
925 {
926     ofstream outfile("getmatrix.out");
927 
928     if(!outfile)
929         cerr<<"oops failed to open !!!\n";
930 
931     for(int row = 0; row < NUMRES; row++)
932     {
933         for(int col = 0; col < NUMRES; col++)
934         {
935             if((mat[row][col] > 9) || (mat[row][col] < 0))
936             {
937                 outfile <<" "<< mat[row][col]<<",";
938             }
939             else
940             {
941                 outfile <<"  "<< mat[row][col]<<",";
942             }
943         }
944         outfile<<"\n";
945     }
946 }
947 
948 /**
949  * This function is from the old interface. It simply calls the readMatrixSeries
950  * function. It seems to only be called from the commandline part.
951  * @param str
952  * @return
953  */
getUserMatSeriesFromFile(char * str)954 bool SubMatrix::getUserMatSeriesFromFile(char *str)
955 {
956     int maxRes;
957 
958     FILE *infile;
959 
960     if(userParameters->getMenuFlag()) // Check if we are using menu!
961     {
962         utilityObject->getStr(string("Enter name of the matrix file"), line2);
963     }
964     else
965     {
966         //strcpy(lin2,str);
967         line2 = string(str);
968     }
969 
970     if(line2.size() == 0) return false;
971 
972     if((infile = fopen(line2.c_str(), "r"))==NULL)
973     {
974         utilityObject->error("Cannot find matrix file [%s]",line2.c_str());
975         return false;
976     }
977 
978     strcpy(str, line2.c_str());
979 
980     maxRes = readMatrixSeries(str, userMat, AAXref);
981     if (maxRes <= 0) return false;
982 
983     return true;
984 }
985 
986 /*
987  * The function readMatrixSeries is used to read in a series of matrices from a file.
988  * It calls readUserMatrix to read in the individual matrices. The matrices are stored
989  * in userMatSeries.
990  */
readMatrixSeries(const char * fileName,Matrix & userMat,Xref & xref)991 int SubMatrix::readMatrixSeries(const char *fileName, Matrix& userMat, Xref& xref)
992 {
993     FILE *fd = NULL;
994     char mat_fileName[FILENAMELEN];
995     char inline1[1024];
996     int maxRes = 0;
997     int nmat;
998     int n, llimit, ulimit;
999 
1000     if (fileName[0] == '\0')
1001     {
1002         utilityObject->error("comparison matrix not specified");
1003         return ((int)0);
1004     }
1005     if ((fd = fopen(fileName, "r")) == NULL)
1006     {
1007         utilityObject->error("cannot open %s", fileName);
1008         return ((int)0);
1009     }
1010 
1011     /* check the first line to see if it's a series or a single matrix */
1012     while (fgets(inline1, 1024, fd) != NULL)
1013     {
1014         if (commentline(inline1))
1015         {
1016             continue;
1017         }
1018         if (utilityObject->lineType(inline1, "CLUSTAL_SERIES"))
1019         {
1020             userSeries = true;
1021         }
1022         else
1023         {
1024             userSeries = false;
1025         }
1026         break;
1027     }
1028 
1029     /* it's a single matrix */
1030     if (userSeries == false)
1031     {
1032         fclose(fd);
1033         maxRes = readUserMatrix(fileName, userMat, xref);
1034         return (maxRes);
1035     }
1036 
1037     /* it's a series of matrices, find the next MATRIX line */
1038     nmat = 0;
1039     matSeries.nmat = 0;
1040     while (fgets(inline1, 1024, fd) != NULL)
1041     {
1042         if (commentline(inline1))
1043         {
1044             continue;
1045         }
1046         if (utilityObject->lineType(inline1, "MATRIX")) // Have found a matrix
1047         {
1048             if (sscanf(inline1 + 6, "%d %d %s", &llimit, &ulimit, mat_fileName)
1049                 != 3)
1050             {
1051                 utilityObject->error("Bad format in file %s\n", fileName);
1052                 fclose(fd);
1053                 return ((int)0);
1054             }
1055             if (llimit < 0 || llimit > 100 || ulimit < 0 || ulimit > 100)
1056             {
1057                 utilityObject->error("Bad format in file %s\n", fileName);
1058                 fclose(fd);
1059                 return ((int)0);
1060             }
1061             if (ulimit <= llimit)
1062             {
1063                 utilityObject->error("in file %s: lower limit is greater than upper (%d-%d)\n",
1064                     fileName, llimit, ulimit);
1065                 fclose(fd);
1066                 return ((int)0);
1067             }
1068 
1069             n = readUserMatrix(mat_fileName, userMatSeries[nmat],
1070                 AAXrefseries[nmat]);
1071 
1072             //cout << "Read in matrix number " << nmat << "\n"; // NOTE Testing!!!!!
1073             char nameOfFile[] = "matrix";
1074             printInFormat(userMatSeries[nmat], nameOfFile);
1075             if (n <= 0)
1076             {
1077                 utilityObject->error("Bad format in matrix file %s\n", mat_fileName);
1078                 fclose(fd);
1079                 return ((int)0);
1080             }
1081             matSeries.mat[nmat].llimit = llimit;
1082             matSeries.mat[nmat].ulimit = ulimit;
1083             matSeries.mat[nmat].matptr = &userMatSeries[nmat];
1084             matSeries.mat[nmat].AAXref = &AAXrefseries[nmat];
1085             nmat++;
1086 
1087             if(nmat >= MAXMAT)
1088             {
1089                // We have read in all the matrices that we can read into this vector
1090                // Write a message to the screen, and break out of loop.
1091                cerr << "The matrix series file has more entries than allowed in \n"
1092                     << "a user defined series. The most that are allowed is "
1093                     << MAXMAT << ".\n"
1094                     << "The first " << MAXMAT << " have been read in and will be used.\n";
1095                break; // Get out of the loop!
1096             }
1097         }
1098     }
1099     fclose(fd);
1100     matSeries.nmat = nmat;
1101 
1102     maxRes = n;
1103     return (maxRes);
1104 }
1105 
1106 /*
1107  * This function is used to read a single user matrix from a file.
1108  * It can be called repeatedly if there are multiple matrices in the file.
1109  */
readUserMatrix(const char * fileName,Matrix & userMat,Xref & xref)1110 int SubMatrix::readUserMatrix(const char *fileName, Matrix& userMat, Xref& xref)
1111 {
1112     double f;
1113     FILE *fd;
1114     int numargs, farg;
1115     int i, j, k = 0;
1116     char codes[NUMRES];
1117     char inline1[1024];
1118     char *args[NUMRES + 4];
1119     char c1, c2;
1120     int ix1, ix = 0;
1121     int maxRes = 0;
1122     float scale;
1123 
1124     if (fileName[0] == '\0')
1125     {
1126         utilityObject->error("comparison matrix not specified");
1127         return ((int)0);
1128     }
1129 
1130     if ((fd = fopen(fileName, "r")) == NULL)
1131     {
1132         utilityObject->error("cannot open %s", fileName);
1133         return ((int)0);
1134     }
1135     maxRes = 0;
1136     while (fgets(inline1, 1024, fd) != NULL)
1137     {
1138         if (commentline(inline1))
1139         {
1140             continue;
1141         }
1142         if (utilityObject->lineType(inline1, "CLUSTAL_SERIES"))
1143         {
1144             utilityObject->error("in %s - single matrix expected.", fileName);
1145             fclose(fd);
1146             return ((int)0);
1147         }
1148         /*
1149         read residue characters.
1150          */
1151         k = 0;
1152         for (j = 0; j < (int)strlen(inline1); j++)
1153         {
1154             if (isalpha((int)inline1[j]))
1155             {
1156                 codes[k++] = inline1[j];
1157             }
1158             if (k > NUMRES)
1159             {
1160                 utilityObject->error("too many entries in matrix %s", fileName);
1161                 fclose(fd);
1162                 return ((int)0);
1163             }
1164         }
1165         codes[k] = '\0';
1166         break;
1167     }
1168 
1169     if (k == 0)
1170     {
1171         utilityObject->error("wrong format in matrix %s", fileName);
1172         fclose(fd);
1173         return ((int)0);
1174     }
1175 
1176     /*
1177     cross-reference the residues
1178      */
1179     for (i = 0; i < NUMRES; i++)
1180     {
1181         xref[i] =  - 1;
1182     }
1183 
1184     maxRes = 0;
1185     for (i = 0; (c1 = codes[i]); i++)
1186     {
1187         for (j = 0; (c2 = userParameters->getAminoAcidCode(j)); j++)
1188         if (c1 == c2)
1189         {
1190             xref[i] = j;
1191             maxRes++;
1192             break;
1193         }
1194         if ((xref[i] ==  - 1) && (codes[i] != '*'))
1195         {
1196             utilityObject->warning("residue %c in matrix %s not recognised", codes[i],
1197                 fileName);
1198         }
1199     }
1200 
1201 
1202     /*
1203     get the weights
1204      */
1205 
1206     ix = ix1 = 0;
1207     while (fgets(inline1, 1024, fd) != NULL)
1208     {
1209         if (inline1[0] == '\n')
1210         {
1211             continue;
1212         }
1213         if (inline1[0] == '#' || inline1[0] == '!')
1214         {
1215             break;
1216         }
1217         numargs = getArgs(inline1, args, (int)(k + 1));
1218         if (numargs < maxRes)
1219         {
1220             utilityObject->error("wrong format in matrix %s", fileName);
1221             fclose(fd);
1222             return ((int)0);
1223         }
1224         if (isalpha(args[0][0]))
1225         {
1226             farg = 1;
1227         }
1228         else
1229         {
1230             farg = 0;
1231         }
1232 
1233         /* decide whether the matrix values are float or decimal */
1234         scale = 1.0;
1235         for (i = 0; i < (int)strlen(args[farg]); i++)
1236         if (args[farg][i] == '.')
1237         {
1238             /* we've found a float value */
1239             scale = 10.0;
1240             break;
1241         }
1242 
1243         for (i = 0; i <= ix; i++)
1244         {
1245             if (xref[i] !=  - 1)
1246             {
1247                 f = atof(args[i + farg]);
1248                 userMat[ix1++] = (short)(f *scale);
1249             }
1250         }
1251         ix++;
1252     }
1253     if (ix != k + 1)
1254     {
1255         utilityObject->error("wrong format in matrix %s", fileName);
1256         fclose(fd);
1257         return ((int)0);
1258     }
1259 
1260     userMat.resize(ix1 + 1);
1261 
1262     maxRes += 2;
1263     fclose(fd);
1264 
1265     return (maxRes);
1266 }
1267 
1268 /**
1269  *
1270  * @param inline1
1271  * @param args[]
1272  * @param max
1273  * @return
1274  */
getArgs(char * inline1,char * args[],int max)1275 int SubMatrix::getArgs(char *inline1,char *args[],int max)
1276 {
1277     char *inptr;
1278     int i;
1279 
1280     inptr = inline1;
1281     for (i = 0; i <= max; i++)
1282     {
1283         if ((args[i] = strtok(inptr, " \t\n")) == NULL)
1284         {
1285             break;
1286         }
1287         inptr = NULL;
1288     }
1289 
1290     return (i);
1291 }
1292 
1293 
1294 /**
1295  *
1296  * @return
1297  */
getMatrixNum()1298 int SubMatrix::getMatrixNum()
1299 {
1300     return matrixNum;
1301 }
1302 
1303 /**
1304  *
1305  * @return
1306  */
getDNAMatrixNum()1307 int SubMatrix::getDNAMatrixNum()
1308 {
1309     return DNAMatrixNum;
1310 }
1311 
1312 /**
1313  *
1314  * @return
1315  */
getPWMatrixNum()1316 int SubMatrix::getPWMatrixNum()
1317 {
1318     return pwMatrixNum;
1319 }
1320 
1321 /**
1322  *
1323  * @return
1324  */
getPWDNAMatrixNum()1325 int SubMatrix::getPWDNAMatrixNum()
1326 {
1327     return pwDNAMatrixNum;
1328 }
1329 
1330 /**
1331  * The function setCurrentNameAndNum is used to select a matrix series.
1332  * This will then be used for the alignment. The matrices will change, but the
1333  * series remains the same. We can set the series for pairwise/full for both
1334  * protein and DNA. NOTE: The default can be set to user defined matrices.
1335  * @param _matrixName
1336  * @param _matrixNum
1337  * @param alignResidueType
1338  * @param alignType
1339  */
setCurrentNameAndNum(string _matrixName,int _matrixNum,int alignResidueType,int alignType)1340 void SubMatrix::setCurrentNameAndNum(string _matrixName, int _matrixNum,
1341                                           int alignResidueType,int alignType)
1342 {
1343     // Check if the values are valid.
1344     checkResidueAndAlignType(alignResidueType, alignType);
1345 
1346     string residue;
1347     string align;
1348     if((alignResidueType == Protein) && (alignType == Pairwise))
1349     {
1350         residue = "Protein"; align = "Pairwise";
1351         pwMatrixNum = _matrixNum;
1352         pwMatrixName = new string(_matrixName);
1353     }
1354     else if((alignResidueType == Protein) && (alignType == MultipleAlign))
1355     {
1356         residue = "Protein"; align = "MultipleAlign";
1357         matrixNum = _matrixNum;
1358         matrixName = new string(_matrixName);
1359     }
1360     else if((alignResidueType == DNA) && (alignType == Pairwise))
1361     {
1362         residue = "DNA"; align = "Pairwise";
1363         pwDNAMatrixNum = _matrixNum;
1364         pwDNAMatrixName = new string(_matrixName);
1365     }
1366     else if((alignResidueType == DNA) && (alignType == MultipleAlign))
1367     {
1368         residue = "DNA"; align = "MultipleAlign";
1369         DNAMatrixNum = _matrixNum;
1370         DNAMatrixName = new string(_matrixName);
1371 
1372     }
1373 
1374     #if DEBUGFULL
1375         if(logObject && DEBUGLOG)
1376         {
1377             ostringstream outs;
1378             outs << "The matrix/matrix series has been changed for "
1379                  << "(" << residue << " AND " << align << ")."
1380                  << " New value: " << _matrixName << "\n\n";
1381             logObject->logMsg(outs.str());
1382         }
1383     #endif
1384 }
1385 
1386 /**
1387  *
1388  * @param alignResidueType
1389  * @param alignType
1390  * @return
1391  */
getMatrixNumForMenu(int alignResidueType,int alignType)1392 int SubMatrix::getMatrixNumForMenu(int alignResidueType, int alignType)
1393 {
1394     checkResidueAndAlignType(alignResidueType, alignType);
1395 
1396     if((alignResidueType == Protein) && (alignType == Pairwise))
1397     {
1398         return pwMatrixNum;
1399     }
1400     else if((alignResidueType == Protein) && (alignType == MultipleAlign))
1401     {
1402         return matrixNum;
1403     }
1404     else if((alignResidueType == DNA) && (alignType == Pairwise))
1405     {
1406         return pwDNAMatrixNum;
1407     }
1408     else if((alignResidueType == DNA) && (alignType == MultipleAlign))
1409     {
1410         return DNAMatrixNum;
1411     }
1412     else
1413         return -100; // NOTE NONE of these. I need to put in better error checking
1414 }
1415 
1416 /**
1417  *
1418  * @param line
1419  * @return
1420  */
commentline(char * line)1421 bool SubMatrix::commentline(char* line)
1422 {
1423     int i;
1424 
1425     if (line[0] == '#')
1426     {
1427         return true;
1428     }
1429     for (i = 0; line[i] != '\n' && line[i] != EOS; i++)
1430     {
1431         if (!isspace(line[i]))
1432         {
1433             return false;
1434         }
1435     }
1436     return true;
1437 }
1438 
1439 /**
1440  * This function prints out the vector to the file specified by name.
1441  * The vector is printed out in a triangular format, the same as the way
1442  * the arrays are displayed in matrices.h. This function is for testing purposes.
1443  * @param temp
1444  * @param name
1445  */
printInFormat(vector<short> & temp,char * name)1446 void SubMatrix::printInFormat(vector<short>& temp, char* name)
1447 {
1448     char nameOfFile[30];
1449     strcpy(nameOfFile, name);
1450     strcat(nameOfFile, ".out");
1451 
1452     ofstream outfile(nameOfFile);
1453 
1454     if(!outfile)
1455         cerr<<"oops failed to open !!!\n";
1456 
1457     outfile<<"short "<<name<<"[]{\n";
1458 
1459     int numOnCurrentLine = 1;
1460     int soFar = 0;
1461     for(int i = 0; i < (int)temp.size(); i++)
1462     {
1463         if(soFar == numOnCurrentLine)
1464         {
1465             outfile<<"\n";
1466             soFar = 0;
1467             numOnCurrentLine++;
1468         }
1469         if((temp[i] > 9) || (temp[i] < 0))
1470         {
1471             outfile <<" "<< temp[i]<<",";
1472         }
1473         else
1474         {
1475             outfile <<"  "<< temp[i]<<",";
1476         }
1477 
1478         // Now increment so far
1479         soFar++;
1480         // Check to see if the next element is the last element
1481         if((i + 1) == (int)temp.size() - 1)
1482         {
1483             // Print out the last element. Then a curly brace, and break.
1484             if((temp[i+1] > 9) || (temp[i+1] < 0))
1485             {
1486                 outfile <<" "<< temp[i + 1]<<"};\n";
1487             }
1488             else
1489             {
1490                 outfile <<"  "<< temp[i + 1]<<"};\n";
1491             }
1492             break;
1493         }
1494     }
1495 
1496     ofstream outfile2("temp.out");
1497     for(int i = 0; i < (int)temp.size(); i++)
1498     {
1499         outfile2 << temp[i] << " ";
1500     }
1501 }
1502 
1503 
1504 /**
1505  *
1506  * @param temp
1507  * @param name
1508  */
printVectorToFile(vector<short> & temp,char * name)1509 void SubMatrix::printVectorToFile(vector<short>& temp, char* name)
1510 {
1511     char nameOfFile[30];
1512     strcpy(nameOfFile, name);
1513     strcat(nameOfFile, ".out");
1514 
1515     ofstream outfile(nameOfFile);
1516 
1517     if(!outfile)
1518         cerr<<"oops failed to open !!!\n";
1519 
1520     for(int i = 0; i < (int)temp.size(); i++)
1521     {
1522         if((temp[i] > 9) || (temp[i] < 0))
1523         {
1524             outfile <<" "<< temp[i]<<",";
1525         }
1526         else
1527         {
1528             outfile <<"  "<< temp[i]<<",";
1529         }
1530     }
1531     outfile.close();
1532 }
1533 
1534 
1535 /**
1536  *
1537  * @param alignResidueType
1538  * @param alignType
1539  * @return
1540  */
getUserMatAddress(int alignResidueType,int alignType)1541 Matrix* SubMatrix::getUserMatAddress(int alignResidueType, int alignType)
1542 {
1543     if((alignResidueType == Protein) && (alignType == Pairwise))
1544     {
1545         return &pwUserMat;
1546     }
1547     else if((alignResidueType == Protein) && (alignType == MultipleAlign))
1548     {
1549         return &userMat;
1550     }
1551     else if((alignResidueType == DNA) && (alignType == Pairwise))
1552     {
1553         return &pwUserDNAMat;
1554     }
1555     else if((alignResidueType == DNA) && (alignType == MultipleAlign))
1556     {
1557         return &userDNAMat;
1558     }
1559     return NULL;
1560 }
1561 
1562 
1563 /**
1564  *
1565  * @param alignResidueType
1566  * @param alignType
1567  * @return
1568  */
getUserXrefAddress(int alignResidueType,int alignType)1569 Xref* SubMatrix::getUserXrefAddress(int alignResidueType, int alignType)
1570 {
1571     if((alignResidueType == Protein) && (alignType == Pairwise))
1572     {
1573         return &pwAAXref;
1574     }
1575     else if((alignResidueType == Protein) && (alignType == MultipleAlign))
1576     {
1577         return &AAXref;
1578     }
1579     else if((alignResidueType == DNA) && (alignType == Pairwise))
1580     {
1581         return &pwDNAXref;
1582     }
1583     else if((alignResidueType == DNA) && (alignType == MultipleAlign))
1584     {
1585         return &DNAXref;
1586     }
1587     return NULL;
1588 }
1589 
1590 /**
1591  * This is an error handling routine. If an incorrect combination of values
1592  * is given, it will terminate the program.
1593  * @param alignResidueType
1594  * @param alignType
1595  */
checkResidueAndAlignType(int alignResidueType,int alignType)1596 void SubMatrix::checkResidueAndAlignType(int alignResidueType, int alignType)
1597 {
1598     if(((alignResidueType != 0) && (alignResidueType != 1))
1599            || ((alignType != 0) && (alignType != 1)))
1600     {
1601         InvalidCombination ex(alignResidueType, alignType);
1602         ex.whatHappened();
1603         exit(1);
1604     }
1605 }
1606 
1607 /**
1608  * The function tempInterface is used to call the SubMatrix in the way it is
1609  * supposed to be used. It is for testing purposes.
1610  * @param alignResidueType
1611  * @param alignType
1612  */
tempInterface(int alignResidueType,int alignType)1613 void SubMatrix::tempInterface(int alignResidueType, int alignType)
1614 {
1615     char userFile[FILENAMELEN + 1];
1616 
1617     userParameters->setDNAFlag(true);
1618     strcpy(userFile, "mat1");
1619     userParameters->setMenuFlag(false);
1620     getUserMatFromFile(userFile, DNA, Pairwise);
1621     setCurrentNameAndNum(userFile, 4, 3, Pairwise);
1622 
1623     setCurrentNameAndNum("gonnet", 4, Protein, Pairwise);
1624 }
1625 
1626 /**
1627  * A single matrix is used in scoring the alignment. This is Blosum45. This is the
1628  * function to get it.
1629  * @param matrix[][]
1630  * @return
1631  */
getAlnScoreMatrix(int matrix[NUMRES][NUMRES])1632 int SubMatrix::getAlnScoreMatrix(int matrix[NUMRES][NUMRES])
1633 {
1634     int _maxNumRes;
1635     /*
1636        //_maxNumRes = getMatrix(blosum45mtVec, &defaultAAXref, matrix, true, 100);
1637        _maxNumRes = getMatrix(blosum45mtVec, &defaultAAXref, matrix, false, 1, true);
1638        //_maxNumRes = getMatrix(blosum62mt2Vec, &defaultAAXref, matrix, true, 100);
1639     */
1640     // 1.83 style
1641     _maxNumRes = getMatrix(blosum45mtVec, &defaultAAXref, matrix, true, 100);
1642 
1643     return _maxNumRes;
1644 }
1645 
1646 /**
1647  * This function is used to get the matrix that will be used for the calculation of
1648  * the histogram. The histogram values are used in ClustalQt.
1649  * @param matrix[][]
1650  * @param matNum
1651  * @param dnaMatNum
1652  */
getQTMatrixForHistogram(int matrix[NUMRES][NUMRES])1653 void SubMatrix::getQTMatrixForHistogram(int matrix[NUMRES][NUMRES])
1654 {
1655     Matrix* _matPtrLocal;
1656     Xref* _matXrefLocal;
1657     int maxRes;
1658     if(userParameters->getDNAFlag())
1659     {
1660         if (QTDNAHistMatNum == DNAUSERDEFINED)
1661         {
1662             _matPtrLocal = &QTscoreUserDNAMatrix;
1663             _matXrefLocal = &QTscoreDNAXref;
1664         }
1665         else if (QTDNAHistMatNum == DNACLUSTALW)
1666         {
1667             _matPtrLocal = clustalvdnamtVec;
1668             _matXrefLocal = &defaultDNAXref;
1669         }
1670         else
1671         {
1672             _matPtrLocal = swgapdnamtVec;
1673             _matXrefLocal = &defaultDNAXref;
1674         }
1675     }
1676     else
1677     {
1678         if (QTAAHistMatNum == AAHISTIDENTITY)
1679         {
1680             _matPtrLocal = idmatVec;
1681             _matXrefLocal = &defaultAAXref;
1682         }
1683         else if (QTAAHistMatNum == AAHISTGONNETPAM80)
1684         {
1685             _matPtrLocal = gon80mtVec;
1686             _matXrefLocal = &defaultAAXref;
1687         }
1688         else if (QTAAHistMatNum == AAHISTGONNETPAM120)
1689         {
1690             _matPtrLocal = gon120mtVec;
1691             _matXrefLocal = &defaultAAXref;
1692         }
1693         else if (QTAAHistMatNum == AAHISTUSER)
1694         {
1695             _matPtrLocal = &QTscoreUserMatrix;
1696             _matXrefLocal = &QTscoreXref;
1697         }
1698         else if (QTAAHistMatNum == AAHISTGONNETPAM350)
1699         {
1700             _matPtrLocal = gon350mtVec;
1701             _matXrefLocal = &defaultAAXref;
1702         }
1703         else // Default
1704         {
1705             _matPtrLocal = gon250mtVec;
1706             _matXrefLocal = &defaultAAXref;
1707         }
1708     }
1709     maxRes = getMatrix(_matPtrLocal, _matXrefLocal, matrix, false, 100);
1710 
1711 }
1712 
getQTMatrixForLowScoreSeg(int matrix[NUMRES][NUMRES])1713 void SubMatrix::getQTMatrixForLowScoreSeg(int matrix[NUMRES][NUMRES])
1714 {
1715     Matrix* _matPtrLocal;
1716     Xref* _matXrefLocal;
1717     int maxRes;
1718     int _maxAA = userParameters->getMaxAA();
1719     int max = 0;
1720     int offset;
1721 
1722     if(userParameters->getDNAFlag())
1723     {
1724         if (QTsegmentDNAMatNum == DNAUSERDEFINED)
1725         {
1726             _matPtrLocal = &QTsegmentDNAMatrix;
1727             _matXrefLocal = &QTsegmentDNAXref;
1728         }
1729         else if (QTsegmentDNAMatNum == DNACLUSTALW)
1730         {
1731             _matPtrLocal = clustalvdnamtVec;
1732             _matXrefLocal = &defaultDNAXref;
1733         }
1734         else
1735         {
1736             _matPtrLocal = swgapdnamtVec;
1737             _matXrefLocal = &defaultDNAXref;
1738         }
1739         /* get a positive matrix - then adjust it according to scale */
1740         maxRes = getMatrix(_matPtrLocal, _matXrefLocal, matrix, false, 100);
1741         /* find the maximum value */
1742         for(int i = 0; i <= _maxAA; i++)
1743         {
1744             for(int j = 0; j <= _maxAA; j++)
1745             {
1746                 if(matrix[i][j] > max)
1747                 {
1748                     max = matrix[i][j];
1749                 }
1750             }
1751         }
1752         /* subtract max * scale / 2 from each matrix value */
1753         offset = static_cast<int>(static_cast<float>(max *
1754                                   userParameters->getQTlowScoreDNAMarkingScale()) / 20.0);
1755 
1756         for(int i = 0; i <= _maxAA; i++)
1757         {
1758             for(int j = 0; j <= _maxAA; j++)
1759             {
1760                 matrix[i][j] -= offset;
1761             }
1762         }
1763     }
1764     else
1765     {
1766         if (QTsegmentAAMatNum == QTAASEGGONNETPAM80)
1767         {
1768             _matPtrLocal = gon80mtVec;
1769             _matXrefLocal = &defaultAAXref;
1770         }
1771         else if (QTsegmentAAMatNum == QTAASEGGONNETPAM120)
1772         {
1773             _matPtrLocal = gon120mtVec;
1774             _matXrefLocal = &defaultAAXref;
1775         }
1776         else if (QTsegmentAAMatNum == QTAASEGUSER)
1777         {
1778             _matPtrLocal = &QTsegmentAAMatrix;
1779             _matXrefLocal = &QTsegmentAAXref;
1780         }
1781         else if (QTsegmentAAMatNum == QTAASEGGONNETPAM350)
1782         {
1783             _matPtrLocal = gon350mtVec;
1784             _matXrefLocal = &defaultAAXref;
1785         }
1786         else
1787         {
1788             _matPtrLocal = gon250mtVec;
1789             _matXrefLocal = &defaultAAXref;
1790         }
1791         /* get a negative matrix */
1792         maxRes = getMatrix(_matPtrLocal, _matXrefLocal, matrix, true, 100);
1793     }
1794 }
1795 
getQTLowScoreMatFromFile(char * fileName,bool dna)1796 bool SubMatrix::getQTLowScoreMatFromFile(char* fileName, bool dna)
1797 {
1798     int maxRes;
1799 
1800     FILE *infile;
1801 
1802     line2 = string(fileName);
1803 
1804     if(line2.size() == 0)
1805     {
1806         return false;
1807     }
1808 
1809     if((infile = fopen(line2.c_str(), "r")) == NULL)
1810     {
1811         utilityObject->error("Cannot find matrix file [%s]", line2.c_str());
1812         return false;
1813     }
1814 
1815     strcpy(fileName, line2.c_str());
1816 
1817     if(dna)
1818     {
1819         maxRes = readUserMatrix(fileName, QTsegmentDNAMatrix, QTsegmentDNAXref);
1820     }
1821     else
1822     {
1823         maxRes = readUserMatrix(fileName, QTsegmentAAMatrix, QTsegmentAAXref);
1824     }
1825 
1826     if (maxRes <= 0)
1827     {
1828         return false;
1829     }
1830 
1831     return true;
1832 }
1833 
getAAScoreMatFromFile(char * str)1834 bool SubMatrix::getAAScoreMatFromFile(char *str)
1835 {
1836     int maxRes;
1837 
1838     FILE *infile;
1839 
1840     line2 = string(str);
1841 
1842     if(line2.size() == 0)
1843     {
1844         return false;
1845     }
1846 
1847     if((infile = fopen(line2.c_str(), "r")) == NULL)
1848     {
1849         utilityObject->error("Cannot find matrix file [%s]", line2.c_str());
1850         return false;
1851     }
1852 
1853     strcpy(str, line2.c_str());
1854 
1855     maxRes = readUserMatrix(str, QTscoreUserMatrix, QTscoreXref);
1856 
1857     if (maxRes <= 0)
1858     {
1859         return false;
1860     }
1861 
1862     return true;
1863 }
1864 
getDNAScoreMatFromFile(char * str)1865 bool SubMatrix::getDNAScoreMatFromFile(char *str)
1866 {
1867     int maxRes;
1868 
1869     FILE *infile;
1870 
1871     line2 = string(str);
1872 
1873     if(line2.size() == 0)
1874     {
1875         return false;
1876     }
1877 
1878     if((infile = fopen(line2.c_str(), "r")) == NULL)
1879     {
1880         utilityObject->error("Cannot find matrix file [%s]", line2.c_str());
1881         return false;
1882     }
1883 
1884     strcpy(str, line2.c_str());
1885 
1886     maxRes = readUserMatrix(str, QTscoreUserDNAMatrix, QTscoreDNAXref);
1887 
1888     if (maxRes <= 0)
1889     {
1890         return false;
1891     }
1892 
1893     return true;
1894 }
1895 
1896 }
1897