1 /**
2 * Author: Mark Larkin
3 *
4 * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson.
5 */
6 #ifdef HAVE_CONFIG_H
7 #include "config.h"
8 #endif
9 #include <stdio.h>
10 #include <string>
11 #include <cstdlib>
12 #include <exception>
13 #include <iostream>
14 #include <sstream>
15 #include "SubMatrix.h"
16 #include "matrices.h"
17 #include "../general/InvalidCombination.cpp"
18 #include "../general/debuglogObject.h"
19
20 namespace clustalw
21 {
22 using namespace std;
23
24 /**
25 *
26 * @param log
27 */
SubMatrix()28 SubMatrix::SubMatrix()
29 : sizenAAMatrix(276),
30 sizeDNAMatrix(153),
31 matrixAvgScore(0),
32 QTDNAHistMatNum(DNAIUB),
33 QTAAHistMatNum(AAHISTGONNETPAM250),
34 QTsegmentDNAMatNum(DNAIUB),
35 QTsegmentAAMatNum(QTAASEGGONNETPAM250)
36 {
37 userSeries = false;
38
39 setUpCrossReferences();
40
41 #if DEBUGFULL
42 if(logObject && DEBUGLOG)
43 {
44 logObject->logMsg("Creating the SubMatrix object\n");
45 }
46 #endif
47
48 try
49 {
50 /* Set up the vectors with the matrices defined in matrices.h
51 * The matrices are intially defined as a short array, as there is no way
52 * to intiailize a vector with a {....} list.
53 */
54 blosum30mtVec = new Matrix(blosum30mt, blosum30mt + sizenAAMatrix);
55 blosum40mtVec = new Matrix(blosum40mt, blosum40mt + sizenAAMatrix);
56 blosum45mtVec = new Matrix(blosum45mt, blosum45mt + sizenAAMatrix);
57 blosum62mt2Vec = new Matrix(blosum62mt2, blosum62mt2 + sizenAAMatrix);
58 blosum80mtVec = new Matrix(blosum80mt, blosum80mt + sizenAAMatrix);
59 pam20mtVec = new Matrix(pam20mt, pam20mt + sizenAAMatrix);
60 pam60mtVec = new Matrix(pam60mt, pam60mt + sizenAAMatrix);
61 pam120mtVec = new Matrix(pam120mt, pam120mt + sizenAAMatrix);
62 pam350mtVec = new Matrix(pam350mt, pam350mt + sizenAAMatrix);
63 idmatVec = new Matrix(idmat, idmat + sizenAAMatrix);
64 gon40mtVec = new Matrix(gon40mt, gon40mt + sizenAAMatrix);
65 gon80mtVec = new Matrix(gon80mt, gon80mt + sizenAAMatrix);
66 gon120mtVec = new Matrix(gon120mt, gon120mt + sizenAAMatrix);
67 gon160mtVec = new Matrix(gon160mt, gon160mt + sizenAAMatrix);
68 gon250mtVec = new Matrix(gon250mt, gon250mt + sizenAAMatrix);
69 gon350mtVec = new Matrix(gon350mt, gon350mt + sizenAAMatrix);
70 clustalvdnamtVec = new Matrix(clustalvdnamt, clustalvdnamt + sizeDNAMatrix);
71 swgapdnamtVec = new Matrix(swgapdnamt, swgapdnamt + sizeDNAMatrix);
72
73 /*
74 * Set up the vectors for user defined types.
75 * Probably dont need to do this, as they may not be used. It would be better
76 * to initialise their size if thye are used.
77 */
78 userMat.resize(NUMRES * NUMRES);
79 pwUserMat.resize(NUMRES * NUMRES);
80 userDNAMat.resize(NUMRES * NUMRES);
81 pwUserDNAMat.resize(NUMRES * NUMRES);
82 QTscoreUserMatrix.resize(NUMRES * NUMRES);
83 QTscoreUserDNAMatrix.resize(NUMRES * NUMRES);
84 QTsegmentDNAMatrix.resize(NUMRES * NUMRES);
85 QTsegmentAAMatrix.resize(NUMRES * NUMRES);
86
87 userMatSeries.resize(MAXMAT); // Maximum number of matrices
88 vector<Matrix>::iterator firstM = userMatSeries.begin();
89 vector<Matrix>::iterator lastM = userMatSeries.end();
90
91 while(firstM != lastM)
92 {
93 firstM->resize(NUMRES * NUMRES);
94 firstM++;
95 }
96
97 // Maybe I should put this in with the other xref intialisations!
98 AAXrefseries.resize(MAXMAT);
99 vector<Xref>::iterator firstX = AAXrefseries.begin();
100 vector<Xref>::iterator lastX = AAXrefseries.end();
101
102 while(firstX != lastX)
103 {
104 firstX->resize(NUMRES + 1);
105 firstX++;
106 }
107
108 // Set the defaults.
109 matrixNum = 3;
110 matrixName = new string("gonnet");
111 DNAMatrixNum = 1;
112 DNAMatrixName = new string("iub");
113 pwMatrixNum = 3;
114 pwMatrixName = new string("gonnet");
115 pwDNAMatrixNum = 1;
116 pwDNAMatrixName = new string("iub");
117 }
118 catch(const exception &ex)
119 {
120 cerr << ex.what() << endl;
121 cerr << "Terminating program. Cannot continue\n";
122 exit(1);
123 }
124 }
125
setValuesToDefault()126 void SubMatrix::setValuesToDefault()
127 {
128 matrixAvgScore = 0;
129 QTDNAHistMatNum = DNAIUB;
130 QTAAHistMatNum = AAHISTGONNETPAM250;
131 QTsegmentDNAMatNum = DNAIUB;
132 QTsegmentAAMatNum = QTAASEGGONNETPAM250;
133 userSeries = false;
134 matrixNum = 3;
135 DNAMatrixNum = 1;
136 pwMatrixNum = 3;
137 pwDNAMatrixNum = 1;
138 }
139
140 /**
141 * The destructor frees up any dynamically allocated memory.
142 */
~SubMatrix()143 SubMatrix::~SubMatrix()
144 {
145 delete blosum30mtVec;
146 delete blosum40mtVec;
147 delete blosum45mtVec;
148 delete blosum62mt2Vec;
149 delete blosum80mtVec;
150 delete pam20mtVec;
151 delete pam60mtVec;
152 delete pam120mtVec;
153 delete pam350mtVec;
154 delete idmatVec;
155 delete gon40mtVec;
156 delete gon80mtVec;
157 delete gon120mtVec;
158 delete gon160mtVec;
159 delete gon250mtVec;
160 delete gon350mtVec;
161 delete clustalvdnamtVec;
162 delete swgapdnamtVec;
163 delete matrixName;
164 delete DNAMatrixName;
165 delete pwMatrixName;
166 delete pwDNAMatrixName;
167 }
168
169 /**
170 * This function sets up the initial cross references.
171 */
setUpCrossReferences()172 void SubMatrix::setUpCrossReferences()
173 {
174 char c1, c2;
175 short i, j, maxRes;
176 defaultAAXref.resize(NUMRES + 1);
177 defaultDNAXref.resize(NUMRES + 1);
178
179 string aminoAcidOrder = "ABCDEFGHIKLMNPQRSTVWXYZ";
180 string nucleicAcidOrder = "ABCDGHKMNRSTUVWXY";
181 /*
182 * I also need to resize the user defined xrefs.
183 */
184 DNAXref.resize(NUMRES + 1);
185 AAXref.resize(NUMRES + 1);
186 pwAAXref.resize(NUMRES + 1);
187 pwDNAXref.resize(NUMRES + 1);
188 QTscoreXref.resize(NUMRES + 1);
189 QTscoreDNAXref.resize(NUMRES + 1);
190 QTsegmentDNAXref.resize(NUMRES + 1);
191 QTsegmentAAXref.resize(NUMRES + 1);
192 /*
193 * set up cross-reference for default matrices hard-coded in matrices.h
194 */
195 for (i = 0; i < NUMRES; i++)
196 {
197 defaultAAXref[i] = -1;
198 }
199 for (i = 0; i < NUMRES; i++)
200 {
201 defaultDNAXref[i] = -1;
202 }
203
204 maxRes = 0;
205
206 for (i = 0; (c1 = aminoAcidOrder[i]); i++)
207 {
208 for (j = 0; (c2 = userParameters->getAminoAcidCode(j)); j++)
209 {
210 if (c1 == c2)
211 {
212 defaultAAXref[i] = j;
213
214 maxRes++;
215 break;
216 }
217 }
218 if ((defaultAAXref[i] == - 1) && (aminoAcidOrder[i] != '*'))
219 {
220 utilityObject->error("residue %c in matrices.h is not recognised",
221 aminoAcidOrder[i]);
222 }
223 }
224
225 maxRes = 0;
226 for (i = 0; (c1 = nucleicAcidOrder[i]); i++)
227 {
228 for (j = 0; (c2 = userParameters->getAminoAcidCode(j)); j++)
229 {
230 if (c1 == c2)
231 {
232 defaultDNAXref[i] = j;
233 maxRes++;
234 break;
235 }
236 }
237 if ((defaultDNAXref[i] == - 1) && (nucleicAcidOrder[i] != '*'))
238 {
239 utilityObject->error("nucleic acid %c in matrices.h is not recognised",
240 nucleicAcidOrder[i]);
241 }
242 }
243
244 }
245
246 /**
247 * The function getPairwiseMatrix is called by the user to get the correct sub matrix for the
248 * pairwise alignment stage. It calls getMatrix to actually calculate the matrix.
249 * This function provides an interface for the user. It allows the user to get the matrix
250 * they wish to use for the pairwise alignment part.
251 * @param matrix[][]
252 * @param scale
253 * @param matAvg
254 * @return
255 */
getPairwiseMatrix(int matrix[NUMRES][NUMRES],PairScaleValues & scale,int & matAvg)256 int SubMatrix::getPairwiseMatrix(int matrix[NUMRES][NUMRES], PairScaleValues& scale,
257 int& matAvg)
258 {
259 int _maxRes; // Local copy.
260 /* Pointers to Matrix and xref to use in calculation */
261 Matrix* _matPtr;
262 Xref* _matXref;
263
264 #if DEBUGFULL
265 if(logObject && DEBUGLOG)
266 {
267 logObject->logMsg("In the function getPairwiseMatrix: \n");
268 }
269 #endif
270
271 string matrixPointer;
272 string xrefPointer;
273
274 #ifdef OS_MAC
275 scale.intScale = 10;
276 #else
277 scale.intScale = 100;
278 #endif
279 scale.gapOpenScale = scale.gapExtendScale = 1.0;
280
281 if (userParameters->getDNAFlag())
282 {
283 #if DEBUGFULL
284 if(logObject && DEBUGLOG)
285 {
286 string msg = " (DNA AND Pairwise) " + *pwDNAMatrixName + "\n";
287 logObject->logMsg(msg);
288 }
289 #endif
290 if (pwDNAMatrixName->compare("iub") == 0)
291 {
292 matrixPointer = "swgapdnamtVec"; xrefPointer = "defaultDNAXref";
293 _matPtr = swgapdnamtVec;
294 _matXref = &defaultDNAXref;
295 }
296 else if (pwDNAMatrixName->compare("clustalw") == 0)
297 {
298 matrixPointer = "clustalvdnamtVec"; xrefPointer = "defaultDNAXref";
299 _matPtr = clustalvdnamtVec;
300 _matXref = &defaultDNAXref;
301 scale.gapOpenScale = 0.6667;
302 scale.gapExtendScale = 0.751;
303 }
304 else
305 {
306 matrixPointer = "pwUserDNAMat"; xrefPointer = "pwDNAXref";
307 _matPtr = &pwUserDNAMat;
308 _matXref = &pwDNAXref;
309 }
310 _maxRes = getMatrix(_matPtr, _matXref, matrix, true, scale.intScale);
311
312 if (_maxRes == 0)
313 {
314 return ((int) -1);
315 }
316 float _transitionWeight = userParameters->getTransitionWeight();
317 // Mark change 17-5-07
318 matrix[0][4] = static_cast<int>(_transitionWeight * matrix[0][0]);
319 matrix[4][0] = static_cast<int>(_transitionWeight * matrix[0][0]);
320 matrix[2][11] = static_cast<int>(_transitionWeight * matrix[0][0]);
321 matrix[11][2] = static_cast<int>(_transitionWeight * matrix[0][0]);
322 matrix[2][12] = static_cast<int>(_transitionWeight * matrix[0][0]);
323 matrix[12][2] = static_cast<int>(_transitionWeight * matrix[0][0]);
324
325 }
326 else
327 {
328 #if DEBUGFULL
329 if(logObject && DEBUGLOG)
330 {
331 string msg = " (Protein AND Pairwise) " + *pwMatrixName + "\n";
332 logObject->logMsg(msg);
333 }
334 #endif
335
336 if (pwMatrixName->compare("blosum") == 0)
337 {
338 matrixPointer = "blosum30mtVec"; xrefPointer = "defaultAAXref";
339 _matPtr = blosum30mtVec;
340 _matXref = &defaultAAXref;
341 }
342 else if (pwMatrixName->compare("pam") == 0)
343 {
344 matrixPointer = "pam350mtVec"; xrefPointer = "defaultAAXref";
345 _matPtr = pam350mtVec;
346 _matXref = &defaultAAXref;
347 }
348 else if (pwMatrixName->compare("gonnet") == 0)
349 {
350 matrixPointer = "gon250mtVec"; xrefPointer = "defaultAAXref";
351 _matPtr = gon250mtVec;
352 scale.intScale /= 10;
353 _matXref = &defaultAAXref;
354 }
355 else if (pwMatrixName->compare("id") == 0)
356 {
357 matrixPointer = "idmatVec"; xrefPointer = "defaultAAXref";
358 _matPtr = idmatVec;
359 _matXref = &defaultAAXref;
360 }
361 else
362 {
363 matrixPointer = "pwUserMat"; xrefPointer = "pwAAXref";
364 _matPtr = &pwUserMat;
365 _matXref = &pwAAXref;
366 }
367
368 _maxRes = getMatrix(_matPtr, _matXref, matrix, true, scale.intScale);
369
370 if (_maxRes == 0)
371 {
372 return ((int) -1);
373 }
374 }
375
376 #if DEBUGFULL
377 if(logObject && DEBUGLOG)
378 {
379 ostringstream outs;
380 outs << " Called getMatrix with "
381 << matrixPointer << " and " << xrefPointer << ".\n"
382 << " intScale = " << scale.intScale << ", gapOpenScale = "
383 << scale.gapOpenScale << ", gapExtendScale = " << scale.gapExtendScale
384 << "\n\n";
385 logObject->logMsg(outs.str());
386 }
387 #endif
388 matAvg = matrixAvgScore;
389 return _maxRes;
390 }
391
392 /**
393 * The function getProfileAlignMatrix provides an interface for the user to get
394 * the matrix to be used in the profile alignment. This depends on the matrix series
395 * that was chosen, and also on the percent identity.
396 * @param matrix[][]
397 * @param pcid
398 * @param minLen
399 * @param scaleParam
400 * @param matAvg
401 * @return
402 */
getProfileAlignMatrix(int matrix[NUMRES][NUMRES],double pcid,int minLen,PrfScaleValues & scaleParam,int & matAvg)403 int SubMatrix::getProfileAlignMatrix(int matrix[NUMRES][NUMRES], double pcid, int minLen,
404 PrfScaleValues& scaleParam, int& matAvg)
405 {
406 bool found = false;
407 bool errorGiven = false;
408 bool _negMatrix = userParameters->getUseNegMatrix();
409 int i = 0, j = 0;
410 int _maxRes = 0;
411 scaleParam.intScale = 100;
412 string matrixPointer;
413 string xrefPointer;
414
415 #if DEBUGFULL
416 if(logObject && DEBUGLOG)
417 {
418 logObject->logMsg("In the function getProfileAlignMatrix: \n");
419 }
420 #endif
421 if (userParameters->getDNAFlag())
422 {
423 #if DEBUGFULL
424 if(logObject && DEBUGLOG)
425 {
426 ostringstream outs;
427 outs << " (DNA AND Multiple align) "<< DNAMatrixName->c_str() << "\n";
428 logObject->logMsg(outs.str());
429 }
430 #endif
431 scaleParam.scale = 1.0;
432 if (DNAMatrixName->compare("iub") == 0)
433 {
434 _matPtr = swgapdnamtVec;
435 _matXref = &defaultDNAXref;
436 matrixPointer = "swgapdnamtVec"; xrefPointer = "defaultDNAXref";
437 }
438 else if (DNAMatrixName->compare("clustalw") == 0)
439 {
440 _matPtr = clustalvdnamtVec;
441 _matXref = &defaultDNAXref;
442 scaleParam.scale = 0.66;
443 matrixPointer = "clustalvdnamtVec"; xrefPointer = "defaultDNAXref";
444 }
445 else
446 {
447 _matPtr = &userDNAMat;
448 _matXref = &DNAXref;
449 matrixPointer = "userDNAMat"; xrefPointer = "DNAXref";
450 }
451
452 _maxRes = getMatrix(_matPtr, _matXref, matrix, _negMatrix,
453 static_cast<int>(scaleParam.intScale)); // Mark change 17-5-07
454 if (_maxRes == 0)
455 {
456 return ((int) - 1);
457 }
458
459 float _transitionWeight = userParameters->getTransitionWeight();
460 // fix suggested by Chanan Rubin at Compugen
461 matrix[(*_matXref)[0]][(*_matXref)[4]] =
462 static_cast<int>(_transitionWeight * matrix[0][0]);
463 matrix[(*_matXref)[4]][(*_matXref)[0]] =
464 static_cast<int>(_transitionWeight * matrix[0][0]);
465 matrix[(*_matXref)[2]][(*_matXref)[11]] =
466 static_cast<int>(_transitionWeight * matrix[0][0]);
467 matrix[(*_matXref)[11]][(*_matXref)[2]] =
468 static_cast<int>(_transitionWeight * matrix[0][0]);
469 matrix[(*_matXref)[2]][(*_matXref)[12]] =
470 static_cast<int>(_transitionWeight * matrix[0][0]);
471 matrix[(*_matXref)[12]][(*_matXref)[2]] =
472 static_cast<int>(_transitionWeight * matrix[0][0]);
473
474 }
475 else // Amino acid alignment!!!!
476 {
477 #if DEBUGFULL
478 if(logObject && DEBUGLOG)
479 {
480 ostringstream outs;
481 outs << " (Protein AND Multiple align) "<< matrixName->c_str() << "\n";
482 logObject->logMsg(outs.str());
483 }
484 #endif
485
486 scaleParam.scale = 0.75;
487 if (matrixName->compare("blosum") == 0)
488 {
489 scaleParam.scale = 0.75;
490 if (_negMatrix || userParameters->getDistanceTree() == false)
491 {
492 _matPtr = blosum40mtVec;
493 matrixPointer = "blosum40mtVec";
494 }
495 else if (pcid > 80.0)
496 {
497 _matPtr = blosum80mtVec;
498 matrixPointer = "blosum80mtVec";
499 }
500 else if (pcid > 60.0)
501 {
502 _matPtr = blosum62mt2Vec;
503 matrixPointer = "blosum62mt2Vec";
504 }
505 else if (pcid > 40.0)
506 {
507 _matPtr = blosum45mtVec;
508 matrixPointer = "blosum45mtVec";
509 }
510 else if (pcid > 30.0)
511 {
512 scaleParam.scale = 0.5;
513 _matPtr = blosum45mtVec;
514 matrixPointer = "blosum45mtVec";
515 }
516 else if (pcid > 20.0)
517 {
518 scaleParam.scale = 0.6;
519 _matPtr = blosum45mtVec;
520 matrixPointer = "blosum45mtVec";
521 }
522 else
523 {
524 scaleParam.scale = 0.6;
525 _matPtr = blosum30mtVec;
526 matrixPointer = "blosum30mtVec";
527 }
528 _matXref = &defaultAAXref;
529 xrefPointer = "defaultAAXref";
530 }
531 else if (matrixName->compare("pam") == 0)
532 {
533 scaleParam.scale = 0.75;
534 if (_negMatrix || userParameters->getDistanceTree() == false)
535 {
536 _matPtr = pam120mtVec;
537 matrixPointer = "pam120mtVec";
538 }
539 else if (pcid > 80.0)
540 {
541 _matPtr = pam20mtVec;
542 matrixPointer = "pam20mtVec";
543 }
544 else if (pcid > 60.0)
545 {
546 _matPtr = pam60mtVec;
547 matrixPointer = "pam60mtVec";
548 }
549 else if (pcid > 40.0)
550 {
551 _matPtr = pam120mtVec;
552 matrixPointer = "pam120mtVec";
553 }
554 else
555 {
556 _matPtr = pam350mtVec;
557 matrixPointer = "pam350mtVec";
558 }
559 _matXref = &defaultAAXref;
560 xrefPointer = "defaultAAXref";
561 }
562 else if (matrixName->compare("gonnet") == 0)
563 {
564 scaleParam.scale /= 2.0;
565 if (_negMatrix || userParameters->getDistanceTree() == false)
566 {
567 _matPtr = gon250mtVec;
568 matrixPointer = "gon250mtVec";
569 }
570 else if (pcid > 35.0)
571 {
572 _matPtr = gon80mtVec;
573 scaleParam.scale /= 2.0;
574 matrixPointer = "gon80mtVec";
575 }
576 else if (pcid > 25.0)
577 {
578 if (minLen < 100)
579 {
580 _matPtr = gon250mtVec;
581 matrixPointer = "gon250mtVec";
582 }
583 else
584 {
585 _matPtr = gon120mtVec;
586 matrixPointer = "gon120mtVec";
587 }
588 }
589 else
590 {
591 if (minLen < 100)
592 {
593 _matPtr = gon350mtVec;
594 matrixPointer = "gon350mtVec";
595 }
596 else
597 {
598 _matPtr = gon160mtVec;
599 matrixPointer = "gon160mtVec";
600 }
601 }
602 _matXref = &defaultAAXref;
603 xrefPointer = "defaultAAXref";
604 scaleParam.intScale /= 10;
605 }
606 else if (matrixName->compare("id") == 0)
607 {
608 _matPtr = idmatVec;
609 _matXref = &defaultAAXref;
610 xrefPointer = "defaultAAXref";
611 matrixPointer = "idmatVec";
612 }
613 else if (userSeries)
614 {
615 _matPtr = NULL;
616 found = false;
617 for (i = 0; i < matSeries.nmat; i++)
618 {
619 if (pcid >= matSeries.mat[i].llimit && pcid <=
620 matSeries.mat[i].ulimit)
621 {
622 j = i;
623 found = true;
624 break;
625 }
626 }
627 if (found == false)
628 {
629 if (!errorGiven)
630 {
631 utilityObject->warning(
632 "\nSeries matrix not found for sequence percent identity = %d.\n""(Using first matrix in series as a default.)\n""This alignment may not be optimal!\n""SUGGESTION: Check your matrix series input file and try again.", (int)pcid);
633 }
634 errorGiven = true;
635 j = 0;
636 }
637
638 _matPtr = matSeries.mat[j].matptr;
639 _matXref = matSeries.mat[j].AAXref;
640 // this gives a scale of 0.5 for pcid=llimit and 1.0 for pcid=ulimit
641 scaleParam.scale = 0.5 + (pcid - matSeries.mat[j].llimit) / (
642 (matSeries.mat[j].ulimit - matSeries.mat[j].llimit) *2.0);
643 xrefPointer = "matSeries.mat[j].AAXref";
644 matrixPointer = "matSeries.mat[j].matptr";
645 }
646 else
647 {
648 _matPtr = &userMat;
649 _matXref = &AAXref;
650 xrefPointer = "AAXref";
651 matrixPointer = "userMat";
652 }
653
654 _maxRes = getMatrix(_matPtr, _matXref, matrix, _negMatrix,
655 static_cast<int>(scaleParam.intScale));
656 if (_maxRes == 0)
657 {
658 cerr << "Error: matrix " << matrixName << " not found\n";
659 return ( - 1);
660 }
661 }
662
663 #if DEBUGFULL
664 if(logObject && DEBUGLOG)
665 {
666 ostringstream outs;
667 outs << " Called getMatrix with "
668 << matrixPointer << " and " << xrefPointer << ".\n"
669 << " intScale = " << scaleParam.intScale << ", scale = "
670 << scaleParam.scale << ", pcid = " << pcid << "\n\n";
671 logObject->logMsg(outs.str());
672 }
673 #endif
674
675 matAvg = matrixAvgScore;
676 return _maxRes;
677 }
678
679 /**
680 * The function getMatrix is what the other parts of the code call to get a useable
681 * substitution matrix. This is stored in matrix[NUMRES][NUMRES].
682 * @param matptr
683 * @param xref
684 * @param matrix[][]
685 * @param negFlag
686 * @param scale
687 * @return
688 */
getMatrix(Matrix * matptr,Xref * xref,int matrix[NUMRES][NUMRES],bool negFlag,int scale,bool minimise)689 int SubMatrix::getMatrix(Matrix* matptr, Xref* xref, int matrix[NUMRES][NUMRES],
690 bool negFlag, int scale, bool minimise)
691 {
692 int ggScore = 0;
693 int grScore = 0;
694 int i, j, k, ix = 0;
695 int ti, tj;
696 int maxRes;
697 int av1, av2, av3, min, max;
698
699 for (i = 0; i < NUMRES; i++)
700 {
701 for (j = 0; j < NUMRES; j++)
702 {
703 matrix[i][j] = 0;
704 }
705 }
706
707 ix = 0;
708 maxRes = 0;
709 for (i = 0; i <= userParameters->getMaxAA(); i++)
710 {
711 ti = (*xref)[i];
712 for (j = 0; j <= i; j++)
713 {
714 tj = (*xref)[j];
715 if ((ti != - 1) && (tj != - 1))
716 {
717 k = (*matptr)[ix];
718 if (ti == tj)
719 {
720 matrix[ti][ti] = k * scale;
721 maxRes++;
722 }
723 else
724 {
725 matrix[ti][tj] = k * scale;
726 matrix[tj][ti] = k * scale;
727 }
728 ix++;
729 }
730 }
731 }
732
733 --maxRes;
734
735 av1 = av2 = av3 = 0;
736 for (i = 0; i <= userParameters->getMaxAA(); i++)
737 {
738 for (j = 0; j <= i; j++)
739 {
740 av1 += matrix[i][j];
741 if (i == j)
742 {
743 av2 += matrix[i][j];
744 }
745 else
746 {
747 av3 += matrix[i][j];
748 }
749 }
750 }
751
752 av1 /= (maxRes * maxRes) / 2;
753 av2 /= maxRes;
754 av3 = static_cast<int>(av3 / (((float)(maxRes * maxRes - maxRes)) / 2));
755 matrixAvgScore = - av3;
756
757 min = max = matrix[0][0];
758 for (i = 0; i <= userParameters->getMaxAA(); i++)
759 for (j = 1; j <= i; j++)
760 {
761 if (matrix[i][j] < min)
762 {
763 min = matrix[i][j];
764 }
765 if (matrix[i][j] > max)
766 {
767 max = matrix[i][j];
768 }
769 }
770 //cout << "MAX = " << max << "\n";
771 if(!minimise)
772 {
773 /*
774 if requested, make a positive matrix - add -(lowest score) to every entry
775 */
776 if (negFlag == false)
777 {
778 if (min < 0)
779 {
780 for (i = 0; i <= userParameters->getMaxAA(); i++)
781 {
782 ti = (*xref)[i];
783 if (ti != - 1)
784 {
785 for (j = 0; j <= userParameters->getMaxAA(); j++)
786 {
787 tj = (*xref)[j];
788
789 if (tj != - 1)
790 {
791 matrix[ti][tj] -= min;
792 }
793 }
794 }
795 }
796 }
797 }
798
799 // local copies of the gap positions
800 int _gapPos1 = userParameters->getGapPos1();
801 int _gapPos2 = userParameters->getGapPos2();
802
803 for (i = 0; i < userParameters->getGapPos1(); i++)
804 {
805 matrix[i][_gapPos1] = grScore;
806 matrix[_gapPos1][i] = grScore;
807 matrix[i][_gapPos2] = grScore;
808 matrix[_gapPos2][i] = grScore;
809 }
810 matrix[_gapPos1][_gapPos1] = ggScore;
811 matrix[_gapPos2][_gapPos2] = ggScore;
812 matrix[_gapPos2][_gapPos1] = ggScore;
813 matrix[_gapPos1][_gapPos2] = ggScore;
814 }
815 else
816 {
817 // DO THE SAGA MATRIX
818 for (i = 0; i <= userParameters->getMaxAA(); i++)
819 {
820 for (j = 0; j <= userParameters->getMaxAA(); j++)
821 {
822 matrix[i][j] = max - matrix[i][j];
823 }
824 }
825 }
826 maxRes += 2;
827
828 return (maxRes);
829 }
830
831 /**
832 * The function getUserMatFromFile is used to read in a user defined matrix.
833 * @param str
834 * @param alignResidueType
835 * @param alignType
836 * @return
837 */
getUserMatFromFile(char * str,int alignResidueType,int alignType)838 bool SubMatrix::getUserMatFromFile(char *str, int alignResidueType, int alignType)
839 {
840 int maxRes;
841
842 FILE *infile;
843 // Need to check if the values are a valid combination!
844 checkResidueAndAlignType(alignResidueType, alignType);
845
846 if(userParameters->getMenuFlag())
847 {
848 utilityObject->getStr(string("Enter name of the matrix file"), line2);
849 }
850 else
851 {
852 line2 = string(str);
853 }
854
855 if(line2.size() == 0)
856 {
857 return false;
858 }
859
860 if((infile = fopen(line2.c_str(), "r")) == NULL)
861 {
862 utilityObject->error("Cannot find matrix file [%s]", line2.c_str());
863 return false;
864 }
865
866 strcpy(str, line2.c_str());
867 // Find out which part of the code we are reading the matrix in for.
868 mat = getUserMatAddress(alignResidueType, alignType);
869 xref = getUserXrefAddress(alignResidueType, alignType);
870
871 if ((alignResidueType == Protein) && (alignType == MultipleAlign))
872 {
873 // Try read in a matrix series.
874 maxRes = readMatrixSeries(str, userMat, AAXref);
875 }
876 else // Read in a single matrix!!!
877 {
878 maxRes = readUserMatrix(str, *mat, *xref);
879 }
880
881 if (maxRes <= 0) return false;
882
883 return true;
884 }
885
886 /**
887 * The function compareMatrices is used to compare 2 matrices that have been read in.
888 * It will compare them in a given region. It will not compare all of them, as some of it
889 * will be random memory.
890 * @param mat1[][]
891 * @param mat2[][]
892 */
compareMatrices(int mat1[NUMRES][NUMRES],int mat2[NUMRES][NUMRES])893 void SubMatrix::compareMatrices(int mat1[NUMRES][NUMRES], int mat2[NUMRES][NUMRES])
894 {
895 int same = 1;
896 for(int row = 0; row < NUMRES; row++)
897 {
898 for(int col = 0; col < NUMRES; col++)
899 {
900 if(mat1[row][col] != mat2[row][col])
901 {
902 same = 0;
903 cout << "The row is " << row << ". The column is " << col << endl;
904 break; // It is not the same. End the loop.
905 }
906 }
907 }
908
909 if(same == 0)
910 {
911 cout << "It was not the same\n";
912 }
913 else
914 {
915 cout << "It is the same\n";
916 }
917 }
918
919 /**
920 * This function is simply to display the results of the getMatrix function.
921 * This is so that I can compare it to the original clustal version of it.
922 * @param mat[][]
923 */
printGetMatrixResults(int mat[NUMRES][NUMRES])924 void SubMatrix::printGetMatrixResults(int mat[NUMRES][NUMRES])
925 {
926 ofstream outfile("getmatrix.out");
927
928 if(!outfile)
929 cerr<<"oops failed to open !!!\n";
930
931 for(int row = 0; row < NUMRES; row++)
932 {
933 for(int col = 0; col < NUMRES; col++)
934 {
935 if((mat[row][col] > 9) || (mat[row][col] < 0))
936 {
937 outfile <<" "<< mat[row][col]<<",";
938 }
939 else
940 {
941 outfile <<" "<< mat[row][col]<<",";
942 }
943 }
944 outfile<<"\n";
945 }
946 }
947
948 /**
949 * This function is from the old interface. It simply calls the readMatrixSeries
950 * function. It seems to only be called from the commandline part.
951 * @param str
952 * @return
953 */
getUserMatSeriesFromFile(char * str)954 bool SubMatrix::getUserMatSeriesFromFile(char *str)
955 {
956 int maxRes;
957
958 FILE *infile;
959
960 if(userParameters->getMenuFlag()) // Check if we are using menu!
961 {
962 utilityObject->getStr(string("Enter name of the matrix file"), line2);
963 }
964 else
965 {
966 //strcpy(lin2,str);
967 line2 = string(str);
968 }
969
970 if(line2.size() == 0) return false;
971
972 if((infile = fopen(line2.c_str(), "r"))==NULL)
973 {
974 utilityObject->error("Cannot find matrix file [%s]",line2.c_str());
975 return false;
976 }
977
978 strcpy(str, line2.c_str());
979
980 maxRes = readMatrixSeries(str, userMat, AAXref);
981 if (maxRes <= 0) return false;
982
983 return true;
984 }
985
986 /*
987 * The function readMatrixSeries is used to read in a series of matrices from a file.
988 * It calls readUserMatrix to read in the individual matrices. The matrices are stored
989 * in userMatSeries.
990 */
readMatrixSeries(const char * fileName,Matrix & userMat,Xref & xref)991 int SubMatrix::readMatrixSeries(const char *fileName, Matrix& userMat, Xref& xref)
992 {
993 FILE *fd = NULL;
994 char mat_fileName[FILENAMELEN];
995 char inline1[1024];
996 int maxRes = 0;
997 int nmat;
998 int n, llimit, ulimit;
999
1000 if (fileName[0] == '\0')
1001 {
1002 utilityObject->error("comparison matrix not specified");
1003 return ((int)0);
1004 }
1005 if ((fd = fopen(fileName, "r")) == NULL)
1006 {
1007 utilityObject->error("cannot open %s", fileName);
1008 return ((int)0);
1009 }
1010
1011 /* check the first line to see if it's a series or a single matrix */
1012 while (fgets(inline1, 1024, fd) != NULL)
1013 {
1014 if (commentline(inline1))
1015 {
1016 continue;
1017 }
1018 if (utilityObject->lineType(inline1, "CLUSTAL_SERIES"))
1019 {
1020 userSeries = true;
1021 }
1022 else
1023 {
1024 userSeries = false;
1025 }
1026 break;
1027 }
1028
1029 /* it's a single matrix */
1030 if (userSeries == false)
1031 {
1032 fclose(fd);
1033 maxRes = readUserMatrix(fileName, userMat, xref);
1034 return (maxRes);
1035 }
1036
1037 /* it's a series of matrices, find the next MATRIX line */
1038 nmat = 0;
1039 matSeries.nmat = 0;
1040 while (fgets(inline1, 1024, fd) != NULL)
1041 {
1042 if (commentline(inline1))
1043 {
1044 continue;
1045 }
1046 if (utilityObject->lineType(inline1, "MATRIX")) // Have found a matrix
1047 {
1048 if (sscanf(inline1 + 6, "%d %d %s", &llimit, &ulimit, mat_fileName)
1049 != 3)
1050 {
1051 utilityObject->error("Bad format in file %s\n", fileName);
1052 fclose(fd);
1053 return ((int)0);
1054 }
1055 if (llimit < 0 || llimit > 100 || ulimit < 0 || ulimit > 100)
1056 {
1057 utilityObject->error("Bad format in file %s\n", fileName);
1058 fclose(fd);
1059 return ((int)0);
1060 }
1061 if (ulimit <= llimit)
1062 {
1063 utilityObject->error("in file %s: lower limit is greater than upper (%d-%d)\n",
1064 fileName, llimit, ulimit);
1065 fclose(fd);
1066 return ((int)0);
1067 }
1068
1069 n = readUserMatrix(mat_fileName, userMatSeries[nmat],
1070 AAXrefseries[nmat]);
1071
1072 //cout << "Read in matrix number " << nmat << "\n"; // NOTE Testing!!!!!
1073 char nameOfFile[] = "matrix";
1074 printInFormat(userMatSeries[nmat], nameOfFile);
1075 if (n <= 0)
1076 {
1077 utilityObject->error("Bad format in matrix file %s\n", mat_fileName);
1078 fclose(fd);
1079 return ((int)0);
1080 }
1081 matSeries.mat[nmat].llimit = llimit;
1082 matSeries.mat[nmat].ulimit = ulimit;
1083 matSeries.mat[nmat].matptr = &userMatSeries[nmat];
1084 matSeries.mat[nmat].AAXref = &AAXrefseries[nmat];
1085 nmat++;
1086
1087 if(nmat >= MAXMAT)
1088 {
1089 // We have read in all the matrices that we can read into this vector
1090 // Write a message to the screen, and break out of loop.
1091 cerr << "The matrix series file has more entries than allowed in \n"
1092 << "a user defined series. The most that are allowed is "
1093 << MAXMAT << ".\n"
1094 << "The first " << MAXMAT << " have been read in and will be used.\n";
1095 break; // Get out of the loop!
1096 }
1097 }
1098 }
1099 fclose(fd);
1100 matSeries.nmat = nmat;
1101
1102 maxRes = n;
1103 return (maxRes);
1104 }
1105
1106 /*
1107 * This function is used to read a single user matrix from a file.
1108 * It can be called repeatedly if there are multiple matrices in the file.
1109 */
readUserMatrix(const char * fileName,Matrix & userMat,Xref & xref)1110 int SubMatrix::readUserMatrix(const char *fileName, Matrix& userMat, Xref& xref)
1111 {
1112 double f;
1113 FILE *fd;
1114 int numargs, farg;
1115 int i, j, k = 0;
1116 char codes[NUMRES];
1117 char inline1[1024];
1118 char *args[NUMRES + 4];
1119 char c1, c2;
1120 int ix1, ix = 0;
1121 int maxRes = 0;
1122 float scale;
1123
1124 if (fileName[0] == '\0')
1125 {
1126 utilityObject->error("comparison matrix not specified");
1127 return ((int)0);
1128 }
1129
1130 if ((fd = fopen(fileName, "r")) == NULL)
1131 {
1132 utilityObject->error("cannot open %s", fileName);
1133 return ((int)0);
1134 }
1135 maxRes = 0;
1136 while (fgets(inline1, 1024, fd) != NULL)
1137 {
1138 if (commentline(inline1))
1139 {
1140 continue;
1141 }
1142 if (utilityObject->lineType(inline1, "CLUSTAL_SERIES"))
1143 {
1144 utilityObject->error("in %s - single matrix expected.", fileName);
1145 fclose(fd);
1146 return ((int)0);
1147 }
1148 /*
1149 read residue characters.
1150 */
1151 k = 0;
1152 for (j = 0; j < (int)strlen(inline1); j++)
1153 {
1154 if (isalpha((int)inline1[j]))
1155 {
1156 codes[k++] = inline1[j];
1157 }
1158 if (k > NUMRES)
1159 {
1160 utilityObject->error("too many entries in matrix %s", fileName);
1161 fclose(fd);
1162 return ((int)0);
1163 }
1164 }
1165 codes[k] = '\0';
1166 break;
1167 }
1168
1169 if (k == 0)
1170 {
1171 utilityObject->error("wrong format in matrix %s", fileName);
1172 fclose(fd);
1173 return ((int)0);
1174 }
1175
1176 /*
1177 cross-reference the residues
1178 */
1179 for (i = 0; i < NUMRES; i++)
1180 {
1181 xref[i] = - 1;
1182 }
1183
1184 maxRes = 0;
1185 for (i = 0; (c1 = codes[i]); i++)
1186 {
1187 for (j = 0; (c2 = userParameters->getAminoAcidCode(j)); j++)
1188 if (c1 == c2)
1189 {
1190 xref[i] = j;
1191 maxRes++;
1192 break;
1193 }
1194 if ((xref[i] == - 1) && (codes[i] != '*'))
1195 {
1196 utilityObject->warning("residue %c in matrix %s not recognised", codes[i],
1197 fileName);
1198 }
1199 }
1200
1201
1202 /*
1203 get the weights
1204 */
1205
1206 ix = ix1 = 0;
1207 while (fgets(inline1, 1024, fd) != NULL)
1208 {
1209 if (inline1[0] == '\n')
1210 {
1211 continue;
1212 }
1213 if (inline1[0] == '#' || inline1[0] == '!')
1214 {
1215 break;
1216 }
1217 numargs = getArgs(inline1, args, (int)(k + 1));
1218 if (numargs < maxRes)
1219 {
1220 utilityObject->error("wrong format in matrix %s", fileName);
1221 fclose(fd);
1222 return ((int)0);
1223 }
1224 if (isalpha(args[0][0]))
1225 {
1226 farg = 1;
1227 }
1228 else
1229 {
1230 farg = 0;
1231 }
1232
1233 /* decide whether the matrix values are float or decimal */
1234 scale = 1.0;
1235 for (i = 0; i < (int)strlen(args[farg]); i++)
1236 if (args[farg][i] == '.')
1237 {
1238 /* we've found a float value */
1239 scale = 10.0;
1240 break;
1241 }
1242
1243 for (i = 0; i <= ix; i++)
1244 {
1245 if (xref[i] != - 1)
1246 {
1247 f = atof(args[i + farg]);
1248 userMat[ix1++] = (short)(f *scale);
1249 }
1250 }
1251 ix++;
1252 }
1253 if (ix != k + 1)
1254 {
1255 utilityObject->error("wrong format in matrix %s", fileName);
1256 fclose(fd);
1257 return ((int)0);
1258 }
1259
1260 userMat.resize(ix1 + 1);
1261
1262 maxRes += 2;
1263 fclose(fd);
1264
1265 return (maxRes);
1266 }
1267
1268 /**
1269 *
1270 * @param inline1
1271 * @param args[]
1272 * @param max
1273 * @return
1274 */
getArgs(char * inline1,char * args[],int max)1275 int SubMatrix::getArgs(char *inline1,char *args[],int max)
1276 {
1277 char *inptr;
1278 int i;
1279
1280 inptr = inline1;
1281 for (i = 0; i <= max; i++)
1282 {
1283 if ((args[i] = strtok(inptr, " \t\n")) == NULL)
1284 {
1285 break;
1286 }
1287 inptr = NULL;
1288 }
1289
1290 return (i);
1291 }
1292
1293
1294 /**
1295 *
1296 * @return
1297 */
getMatrixNum()1298 int SubMatrix::getMatrixNum()
1299 {
1300 return matrixNum;
1301 }
1302
1303 /**
1304 *
1305 * @return
1306 */
getDNAMatrixNum()1307 int SubMatrix::getDNAMatrixNum()
1308 {
1309 return DNAMatrixNum;
1310 }
1311
1312 /**
1313 *
1314 * @return
1315 */
getPWMatrixNum()1316 int SubMatrix::getPWMatrixNum()
1317 {
1318 return pwMatrixNum;
1319 }
1320
1321 /**
1322 *
1323 * @return
1324 */
getPWDNAMatrixNum()1325 int SubMatrix::getPWDNAMatrixNum()
1326 {
1327 return pwDNAMatrixNum;
1328 }
1329
1330 /**
1331 * The function setCurrentNameAndNum is used to select a matrix series.
1332 * This will then be used for the alignment. The matrices will change, but the
1333 * series remains the same. We can set the series for pairwise/full for both
1334 * protein and DNA. NOTE: The default can be set to user defined matrices.
1335 * @param _matrixName
1336 * @param _matrixNum
1337 * @param alignResidueType
1338 * @param alignType
1339 */
setCurrentNameAndNum(string _matrixName,int _matrixNum,int alignResidueType,int alignType)1340 void SubMatrix::setCurrentNameAndNum(string _matrixName, int _matrixNum,
1341 int alignResidueType,int alignType)
1342 {
1343 // Check if the values are valid.
1344 checkResidueAndAlignType(alignResidueType, alignType);
1345
1346 string residue;
1347 string align;
1348 if((alignResidueType == Protein) && (alignType == Pairwise))
1349 {
1350 residue = "Protein"; align = "Pairwise";
1351 pwMatrixNum = _matrixNum;
1352 pwMatrixName = new string(_matrixName);
1353 }
1354 else if((alignResidueType == Protein) && (alignType == MultipleAlign))
1355 {
1356 residue = "Protein"; align = "MultipleAlign";
1357 matrixNum = _matrixNum;
1358 matrixName = new string(_matrixName);
1359 }
1360 else if((alignResidueType == DNA) && (alignType == Pairwise))
1361 {
1362 residue = "DNA"; align = "Pairwise";
1363 pwDNAMatrixNum = _matrixNum;
1364 pwDNAMatrixName = new string(_matrixName);
1365 }
1366 else if((alignResidueType == DNA) && (alignType == MultipleAlign))
1367 {
1368 residue = "DNA"; align = "MultipleAlign";
1369 DNAMatrixNum = _matrixNum;
1370 DNAMatrixName = new string(_matrixName);
1371
1372 }
1373
1374 #if DEBUGFULL
1375 if(logObject && DEBUGLOG)
1376 {
1377 ostringstream outs;
1378 outs << "The matrix/matrix series has been changed for "
1379 << "(" << residue << " AND " << align << ")."
1380 << " New value: " << _matrixName << "\n\n";
1381 logObject->logMsg(outs.str());
1382 }
1383 #endif
1384 }
1385
1386 /**
1387 *
1388 * @param alignResidueType
1389 * @param alignType
1390 * @return
1391 */
getMatrixNumForMenu(int alignResidueType,int alignType)1392 int SubMatrix::getMatrixNumForMenu(int alignResidueType, int alignType)
1393 {
1394 checkResidueAndAlignType(alignResidueType, alignType);
1395
1396 if((alignResidueType == Protein) && (alignType == Pairwise))
1397 {
1398 return pwMatrixNum;
1399 }
1400 else if((alignResidueType == Protein) && (alignType == MultipleAlign))
1401 {
1402 return matrixNum;
1403 }
1404 else if((alignResidueType == DNA) && (alignType == Pairwise))
1405 {
1406 return pwDNAMatrixNum;
1407 }
1408 else if((alignResidueType == DNA) && (alignType == MultipleAlign))
1409 {
1410 return DNAMatrixNum;
1411 }
1412 else
1413 return -100; // NOTE NONE of these. I need to put in better error checking
1414 }
1415
1416 /**
1417 *
1418 * @param line
1419 * @return
1420 */
commentline(char * line)1421 bool SubMatrix::commentline(char* line)
1422 {
1423 int i;
1424
1425 if (line[0] == '#')
1426 {
1427 return true;
1428 }
1429 for (i = 0; line[i] != '\n' && line[i] != EOS; i++)
1430 {
1431 if (!isspace(line[i]))
1432 {
1433 return false;
1434 }
1435 }
1436 return true;
1437 }
1438
1439 /**
1440 * This function prints out the vector to the file specified by name.
1441 * The vector is printed out in a triangular format, the same as the way
1442 * the arrays are displayed in matrices.h. This function is for testing purposes.
1443 * @param temp
1444 * @param name
1445 */
printInFormat(vector<short> & temp,char * name)1446 void SubMatrix::printInFormat(vector<short>& temp, char* name)
1447 {
1448 char nameOfFile[30];
1449 strcpy(nameOfFile, name);
1450 strcat(nameOfFile, ".out");
1451
1452 ofstream outfile(nameOfFile);
1453
1454 if(!outfile)
1455 cerr<<"oops failed to open !!!\n";
1456
1457 outfile<<"short "<<name<<"[]{\n";
1458
1459 int numOnCurrentLine = 1;
1460 int soFar = 0;
1461 for(int i = 0; i < (int)temp.size(); i++)
1462 {
1463 if(soFar == numOnCurrentLine)
1464 {
1465 outfile<<"\n";
1466 soFar = 0;
1467 numOnCurrentLine++;
1468 }
1469 if((temp[i] > 9) || (temp[i] < 0))
1470 {
1471 outfile <<" "<< temp[i]<<",";
1472 }
1473 else
1474 {
1475 outfile <<" "<< temp[i]<<",";
1476 }
1477
1478 // Now increment so far
1479 soFar++;
1480 // Check to see if the next element is the last element
1481 if((i + 1) == (int)temp.size() - 1)
1482 {
1483 // Print out the last element. Then a curly brace, and break.
1484 if((temp[i+1] > 9) || (temp[i+1] < 0))
1485 {
1486 outfile <<" "<< temp[i + 1]<<"};\n";
1487 }
1488 else
1489 {
1490 outfile <<" "<< temp[i + 1]<<"};\n";
1491 }
1492 break;
1493 }
1494 }
1495
1496 ofstream outfile2("temp.out");
1497 for(int i = 0; i < (int)temp.size(); i++)
1498 {
1499 outfile2 << temp[i] << " ";
1500 }
1501 }
1502
1503
1504 /**
1505 *
1506 * @param temp
1507 * @param name
1508 */
printVectorToFile(vector<short> & temp,char * name)1509 void SubMatrix::printVectorToFile(vector<short>& temp, char* name)
1510 {
1511 char nameOfFile[30];
1512 strcpy(nameOfFile, name);
1513 strcat(nameOfFile, ".out");
1514
1515 ofstream outfile(nameOfFile);
1516
1517 if(!outfile)
1518 cerr<<"oops failed to open !!!\n";
1519
1520 for(int i = 0; i < (int)temp.size(); i++)
1521 {
1522 if((temp[i] > 9) || (temp[i] < 0))
1523 {
1524 outfile <<" "<< temp[i]<<",";
1525 }
1526 else
1527 {
1528 outfile <<" "<< temp[i]<<",";
1529 }
1530 }
1531 outfile.close();
1532 }
1533
1534
1535 /**
1536 *
1537 * @param alignResidueType
1538 * @param alignType
1539 * @return
1540 */
getUserMatAddress(int alignResidueType,int alignType)1541 Matrix* SubMatrix::getUserMatAddress(int alignResidueType, int alignType)
1542 {
1543 if((alignResidueType == Protein) && (alignType == Pairwise))
1544 {
1545 return &pwUserMat;
1546 }
1547 else if((alignResidueType == Protein) && (alignType == MultipleAlign))
1548 {
1549 return &userMat;
1550 }
1551 else if((alignResidueType == DNA) && (alignType == Pairwise))
1552 {
1553 return &pwUserDNAMat;
1554 }
1555 else if((alignResidueType == DNA) && (alignType == MultipleAlign))
1556 {
1557 return &userDNAMat;
1558 }
1559 return NULL;
1560 }
1561
1562
1563 /**
1564 *
1565 * @param alignResidueType
1566 * @param alignType
1567 * @return
1568 */
getUserXrefAddress(int alignResidueType,int alignType)1569 Xref* SubMatrix::getUserXrefAddress(int alignResidueType, int alignType)
1570 {
1571 if((alignResidueType == Protein) && (alignType == Pairwise))
1572 {
1573 return &pwAAXref;
1574 }
1575 else if((alignResidueType == Protein) && (alignType == MultipleAlign))
1576 {
1577 return &AAXref;
1578 }
1579 else if((alignResidueType == DNA) && (alignType == Pairwise))
1580 {
1581 return &pwDNAXref;
1582 }
1583 else if((alignResidueType == DNA) && (alignType == MultipleAlign))
1584 {
1585 return &DNAXref;
1586 }
1587 return NULL;
1588 }
1589
1590 /**
1591 * This is an error handling routine. If an incorrect combination of values
1592 * is given, it will terminate the program.
1593 * @param alignResidueType
1594 * @param alignType
1595 */
checkResidueAndAlignType(int alignResidueType,int alignType)1596 void SubMatrix::checkResidueAndAlignType(int alignResidueType, int alignType)
1597 {
1598 if(((alignResidueType != 0) && (alignResidueType != 1))
1599 || ((alignType != 0) && (alignType != 1)))
1600 {
1601 InvalidCombination ex(alignResidueType, alignType);
1602 ex.whatHappened();
1603 exit(1);
1604 }
1605 }
1606
1607 /**
1608 * The function tempInterface is used to call the SubMatrix in the way it is
1609 * supposed to be used. It is for testing purposes.
1610 * @param alignResidueType
1611 * @param alignType
1612 */
tempInterface(int alignResidueType,int alignType)1613 void SubMatrix::tempInterface(int alignResidueType, int alignType)
1614 {
1615 char userFile[FILENAMELEN + 1];
1616
1617 userParameters->setDNAFlag(true);
1618 strcpy(userFile, "mat1");
1619 userParameters->setMenuFlag(false);
1620 getUserMatFromFile(userFile, DNA, Pairwise);
1621 setCurrentNameAndNum(userFile, 4, 3, Pairwise);
1622
1623 setCurrentNameAndNum("gonnet", 4, Protein, Pairwise);
1624 }
1625
1626 /**
1627 * A single matrix is used in scoring the alignment. This is Blosum45. This is the
1628 * function to get it.
1629 * @param matrix[][]
1630 * @return
1631 */
getAlnScoreMatrix(int matrix[NUMRES][NUMRES])1632 int SubMatrix::getAlnScoreMatrix(int matrix[NUMRES][NUMRES])
1633 {
1634 int _maxNumRes;
1635 /*
1636 //_maxNumRes = getMatrix(blosum45mtVec, &defaultAAXref, matrix, true, 100);
1637 _maxNumRes = getMatrix(blosum45mtVec, &defaultAAXref, matrix, false, 1, true);
1638 //_maxNumRes = getMatrix(blosum62mt2Vec, &defaultAAXref, matrix, true, 100);
1639 */
1640 // 1.83 style
1641 _maxNumRes = getMatrix(blosum45mtVec, &defaultAAXref, matrix, true, 100);
1642
1643 return _maxNumRes;
1644 }
1645
1646 /**
1647 * This function is used to get the matrix that will be used for the calculation of
1648 * the histogram. The histogram values are used in ClustalQt.
1649 * @param matrix[][]
1650 * @param matNum
1651 * @param dnaMatNum
1652 */
getQTMatrixForHistogram(int matrix[NUMRES][NUMRES])1653 void SubMatrix::getQTMatrixForHistogram(int matrix[NUMRES][NUMRES])
1654 {
1655 Matrix* _matPtrLocal;
1656 Xref* _matXrefLocal;
1657 int maxRes;
1658 if(userParameters->getDNAFlag())
1659 {
1660 if (QTDNAHistMatNum == DNAUSERDEFINED)
1661 {
1662 _matPtrLocal = &QTscoreUserDNAMatrix;
1663 _matXrefLocal = &QTscoreDNAXref;
1664 }
1665 else if (QTDNAHistMatNum == DNACLUSTALW)
1666 {
1667 _matPtrLocal = clustalvdnamtVec;
1668 _matXrefLocal = &defaultDNAXref;
1669 }
1670 else
1671 {
1672 _matPtrLocal = swgapdnamtVec;
1673 _matXrefLocal = &defaultDNAXref;
1674 }
1675 }
1676 else
1677 {
1678 if (QTAAHistMatNum == AAHISTIDENTITY)
1679 {
1680 _matPtrLocal = idmatVec;
1681 _matXrefLocal = &defaultAAXref;
1682 }
1683 else if (QTAAHistMatNum == AAHISTGONNETPAM80)
1684 {
1685 _matPtrLocal = gon80mtVec;
1686 _matXrefLocal = &defaultAAXref;
1687 }
1688 else if (QTAAHistMatNum == AAHISTGONNETPAM120)
1689 {
1690 _matPtrLocal = gon120mtVec;
1691 _matXrefLocal = &defaultAAXref;
1692 }
1693 else if (QTAAHistMatNum == AAHISTUSER)
1694 {
1695 _matPtrLocal = &QTscoreUserMatrix;
1696 _matXrefLocal = &QTscoreXref;
1697 }
1698 else if (QTAAHistMatNum == AAHISTGONNETPAM350)
1699 {
1700 _matPtrLocal = gon350mtVec;
1701 _matXrefLocal = &defaultAAXref;
1702 }
1703 else // Default
1704 {
1705 _matPtrLocal = gon250mtVec;
1706 _matXrefLocal = &defaultAAXref;
1707 }
1708 }
1709 maxRes = getMatrix(_matPtrLocal, _matXrefLocal, matrix, false, 100);
1710
1711 }
1712
getQTMatrixForLowScoreSeg(int matrix[NUMRES][NUMRES])1713 void SubMatrix::getQTMatrixForLowScoreSeg(int matrix[NUMRES][NUMRES])
1714 {
1715 Matrix* _matPtrLocal;
1716 Xref* _matXrefLocal;
1717 int maxRes;
1718 int _maxAA = userParameters->getMaxAA();
1719 int max = 0;
1720 int offset;
1721
1722 if(userParameters->getDNAFlag())
1723 {
1724 if (QTsegmentDNAMatNum == DNAUSERDEFINED)
1725 {
1726 _matPtrLocal = &QTsegmentDNAMatrix;
1727 _matXrefLocal = &QTsegmentDNAXref;
1728 }
1729 else if (QTsegmentDNAMatNum == DNACLUSTALW)
1730 {
1731 _matPtrLocal = clustalvdnamtVec;
1732 _matXrefLocal = &defaultDNAXref;
1733 }
1734 else
1735 {
1736 _matPtrLocal = swgapdnamtVec;
1737 _matXrefLocal = &defaultDNAXref;
1738 }
1739 /* get a positive matrix - then adjust it according to scale */
1740 maxRes = getMatrix(_matPtrLocal, _matXrefLocal, matrix, false, 100);
1741 /* find the maximum value */
1742 for(int i = 0; i <= _maxAA; i++)
1743 {
1744 for(int j = 0; j <= _maxAA; j++)
1745 {
1746 if(matrix[i][j] > max)
1747 {
1748 max = matrix[i][j];
1749 }
1750 }
1751 }
1752 /* subtract max * scale / 2 from each matrix value */
1753 offset = static_cast<int>(static_cast<float>(max *
1754 userParameters->getQTlowScoreDNAMarkingScale()) / 20.0);
1755
1756 for(int i = 0; i <= _maxAA; i++)
1757 {
1758 for(int j = 0; j <= _maxAA; j++)
1759 {
1760 matrix[i][j] -= offset;
1761 }
1762 }
1763 }
1764 else
1765 {
1766 if (QTsegmentAAMatNum == QTAASEGGONNETPAM80)
1767 {
1768 _matPtrLocal = gon80mtVec;
1769 _matXrefLocal = &defaultAAXref;
1770 }
1771 else if (QTsegmentAAMatNum == QTAASEGGONNETPAM120)
1772 {
1773 _matPtrLocal = gon120mtVec;
1774 _matXrefLocal = &defaultAAXref;
1775 }
1776 else if (QTsegmentAAMatNum == QTAASEGUSER)
1777 {
1778 _matPtrLocal = &QTsegmentAAMatrix;
1779 _matXrefLocal = &QTsegmentAAXref;
1780 }
1781 else if (QTsegmentAAMatNum == QTAASEGGONNETPAM350)
1782 {
1783 _matPtrLocal = gon350mtVec;
1784 _matXrefLocal = &defaultAAXref;
1785 }
1786 else
1787 {
1788 _matPtrLocal = gon250mtVec;
1789 _matXrefLocal = &defaultAAXref;
1790 }
1791 /* get a negative matrix */
1792 maxRes = getMatrix(_matPtrLocal, _matXrefLocal, matrix, true, 100);
1793 }
1794 }
1795
getQTLowScoreMatFromFile(char * fileName,bool dna)1796 bool SubMatrix::getQTLowScoreMatFromFile(char* fileName, bool dna)
1797 {
1798 int maxRes;
1799
1800 FILE *infile;
1801
1802 line2 = string(fileName);
1803
1804 if(line2.size() == 0)
1805 {
1806 return false;
1807 }
1808
1809 if((infile = fopen(line2.c_str(), "r")) == NULL)
1810 {
1811 utilityObject->error("Cannot find matrix file [%s]", line2.c_str());
1812 return false;
1813 }
1814
1815 strcpy(fileName, line2.c_str());
1816
1817 if(dna)
1818 {
1819 maxRes = readUserMatrix(fileName, QTsegmentDNAMatrix, QTsegmentDNAXref);
1820 }
1821 else
1822 {
1823 maxRes = readUserMatrix(fileName, QTsegmentAAMatrix, QTsegmentAAXref);
1824 }
1825
1826 if (maxRes <= 0)
1827 {
1828 return false;
1829 }
1830
1831 return true;
1832 }
1833
getAAScoreMatFromFile(char * str)1834 bool SubMatrix::getAAScoreMatFromFile(char *str)
1835 {
1836 int maxRes;
1837
1838 FILE *infile;
1839
1840 line2 = string(str);
1841
1842 if(line2.size() == 0)
1843 {
1844 return false;
1845 }
1846
1847 if((infile = fopen(line2.c_str(), "r")) == NULL)
1848 {
1849 utilityObject->error("Cannot find matrix file [%s]", line2.c_str());
1850 return false;
1851 }
1852
1853 strcpy(str, line2.c_str());
1854
1855 maxRes = readUserMatrix(str, QTscoreUserMatrix, QTscoreXref);
1856
1857 if (maxRes <= 0)
1858 {
1859 return false;
1860 }
1861
1862 return true;
1863 }
1864
getDNAScoreMatFromFile(char * str)1865 bool SubMatrix::getDNAScoreMatFromFile(char *str)
1866 {
1867 int maxRes;
1868
1869 FILE *infile;
1870
1871 line2 = string(str);
1872
1873 if(line2.size() == 0)
1874 {
1875 return false;
1876 }
1877
1878 if((infile = fopen(line2.c_str(), "r")) == NULL)
1879 {
1880 utilityObject->error("Cannot find matrix file [%s]", line2.c_str());
1881 return false;
1882 }
1883
1884 strcpy(str, line2.c_str());
1885
1886 maxRes = readUserMatrix(str, QTscoreUserDNAMatrix, QTscoreDNAXref);
1887
1888 if (maxRes <= 0)
1889 {
1890 return false;
1891 }
1892
1893 return true;
1894 }
1895
1896 }
1897